From dec16a98f6762c02a1d0a9ddc3ddbc698c3ffed9 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Mon, 8 Feb 2021 08:50:51 +0000 Subject: [PATCH] Handle XML files that have a declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `lxml` wants its input in bytes: > XML is explicitly defined as a stream of bytes. It's not Unicode text. > […] rule number one: do not decode your XML data yourself. – https://lxml.de/FAQ.html#why-can-t-lxml-parse-my-xml-from-unicode-strings It will accept strings unless, unless the document contains a declaration[1] with an `encoding` attribute. Then it will refuse to parse the document and raises a `ValueError`[2]. We can get fix this by passing `lxml` the bytes from the request, rather than the decoded text. 1. > XML documents may begin with an XML declaration that describes some > information about themselves. An example is > ``. – https://en.wikipedia.org/wiki/XML#XML_declaration 2. See an example of this exception being raised in production here: https://kibana.logit.io/s/9423a789-282c-4113-908d-0be3b1bc9d1d/app/kibana#/doc/logstash-*/logstash-2021.02.05/syslog?id=AXdzfZVz5ZSa5DKpJiYd&_g=() --- app/v2/broadcast/post_broadcast.py | 2 +- tests/app/v2/broadcast/test_post_broadcast.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/app/v2/broadcast/post_broadcast.py b/app/v2/broadcast/post_broadcast.py index 79b32ae4a..6f9e56142 100644 --- a/app/v2/broadcast/post_broadcast.py +++ b/app/v2/broadcast/post_broadcast.py @@ -27,7 +27,7 @@ def create_broadcast(): status_code=415, ) - cap_xml = request.get_data(as_text=True) + cap_xml = request.get_data() if not validate_xml(cap_xml, 'CAP-v1.2.xsd'): raise BadRequestError( diff --git a/tests/app/v2/broadcast/test_post_broadcast.py b/tests/app/v2/broadcast/test_post_broadcast.py index 0cc2839df..a364e3eaa 100644 --- a/tests/app/v2/broadcast/test_post_broadcast.py +++ b/tests/app/v2/broadcast/test_post_broadcast.py @@ -1,3 +1,5 @@ +import pytest + from flask import json from freezegun import freeze_time from tests import create_authorization_header @@ -109,15 +111,20 @@ def test_valid_post_cap_xml_broadcast_returns_201( assert response_json['updated_at'] is None +@pytest.mark.parametrize('xml_document', ( + 'Oh no', + '', +)) def test_invalid_post_cap_xml_broadcast_returns_400( client, sample_broadcast_service, + xml_document, ): auth_header = create_authorization_header(service_id=sample_broadcast_service.id) response = client.post( path='/v2/broadcast', - data="Oh no", + data=xml_document, headers=[('Content-Type', 'application/cap+xml'), auth_header], )