From d8a0967ec00e0ea6eb1d8f8db0905ba1c4ee9b90 Mon Sep 17 00:00:00 2001 From: Ben Thorner Date: Wed, 8 Sep 2021 13:21:23 +0100 Subject: [PATCH] Add validation to prevent blank area names Now that these are used for display on gov.uk/alerts we need to make sure the data is being set properly. We've already found an example where it wasn't [1]. We validate external broadcasts in two stages: with the official CAP XML schema [2] and then again with our own, more specific schema for the converted JSON. Since this validation is a custom requirement I've made it part of the JSON schema. Note that jsonschema recommends avoiding metachars like "\w" since they're not supported by all implementations [3]. I've tested the new validation manually and it works as expected by disallowing e.g. " " but still alowing "foo" and "foo bar". [1]: https://www.notifications.service.gov.uk/services/120107d0-d99a-4c42-8b70-f37d2f28879b/rejected-alerts/d6e0c70e-60f6-4422-8589-2a2d159c63f2 [2]: https://github.com/alphagov/notifications-api/blob/81a25ff1effa821f0096fcc5edea79b0a5b17f5e/app/xml_schemas/CAP-v1.2.xsd [3]: http://json-schema.org/understanding-json-schema/reference/regular_expressions.html --- app/v2/broadcast/broadcast_schemas.py | 1 + .../v2/broadcast/sample_cap_xml_documents.py | 3 +++ tests/app/v2/broadcast/test_post_broadcast.py | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/app/v2/broadcast/broadcast_schemas.py b/app/v2/broadcast/broadcast_schemas.py index 67886507e..99dd572d7 100644 --- a/app/v2/broadcast/broadcast_schemas.py +++ b/app/v2/broadcast/broadcast_schemas.py @@ -76,6 +76,7 @@ post_broadcast_schema = { "properties": { "name": { "type": "string", + "pattern": "([a-zA-Z1-9]+ )*[a-zA-Z1-9]+", }, "polygons": { "type": "array", diff --git a/tests/app/v2/broadcast/sample_cap_xml_documents.py b/tests/app/v2/broadcast/sample_cap_xml_documents.py index a8a0b8cab..a17691e63 100644 --- a/tests/app/v2/broadcast/sample_cap_xml_documents.py +++ b/tests/app/v2/broadcast/sample_cap_xml_documents.py @@ -1,3 +1,5 @@ +import re + WAINFLEET = """ 50385fcb0ab7aa447bbd46d848ce8466E @@ -235,3 +237,4 @@ WITH_PLACEHOLDER_FOR_CONTENT = """ LONG_GSM7 = WITH_PLACEHOLDER_FOR_CONTENT.format('a' * 1396) LONG_UCS2 = WITH_PLACEHOLDER_FOR_CONTENT.format('ลต' * 616) +MISSING_AREA_NAMES = re.sub(".*", " ", WAINFLEET) diff --git a/tests/app/v2/broadcast/test_post_broadcast.py b/tests/app/v2/broadcast/test_post_broadcast.py index fbfe9ae7e..aa70ca2c7 100644 --- a/tests/app/v2/broadcast/test_post_broadcast.py +++ b/tests/app/v2/broadcast/test_post_broadcast.py @@ -229,3 +229,24 @@ def test_content_too_long_returns_400( }], 'status_code': 400, } + + +def test_invalid_areas_returns_400( + client, + sample_broadcast_service +): + auth_header = create_service_authorization_header(service_id=sample_broadcast_service.id) + response = client.post( + path='/v2/broadcast', + data=sample_cap_xml_documents.MISSING_AREA_NAMES, + headers=[('Content-Type', 'application/cap+xml'), auth_header], + ) + + assert json.loads(response.get_data(as_text=True)) == { + 'errors': [{ + 'error': 'ValidationError', + # the blank spaces represent the blank areaDesc in the XML + 'message': 'areas does not match ([a-zA-Z1-9]+ )*[a-zA-Z1-9]+', + }], + 'status_code': 400, + }