From 4761d852e9fa785a1f049efcd0777c7ae5a2ef75 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Wed, 8 Nov 2017 13:32:30 +0000 Subject: [PATCH] Fix escaping in inbound text messages from MMG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of our providers gives us messages with special characters escaped, ie a newline comes through as `\n`, not a literal newline. We shouldn’t be showing these backslashes to any of our users. We also have examples of real inbound messages containing `👍` and `’`, so we should continue to display these properly. It’s a bit tricky, because the strings we get from this provider are a mixture of escape sequences (eg `\n`) and unicode characters (eg `😨`). So we have to first convert the unicode character `😨` into an escape sequence, `\U0001f628` in this example. We do this by encoding with the `raw_unicode_escape` codec: > Latin-1 encoding with \uXXXX and \UXXXXXXXX for other code points. > Existing backslashes are not escaped in any way. It is used in the > Python pickle protocol. – https://docs.python.org/3/library/codecs.html#text-encodings Then we turn this back into a string using the `unicode_escape` codec, which transforms all escape sequences into their literal representations (eg `\U0001f628` becomes `😨` and `\n` becomes a newline). --- app/notifications/receive_notifications.py | 6 +++- .../test_receive_notification.py | 31 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/app/notifications/receive_notifications.py b/app/notifications/receive_notifications.py index ac116b28a..fa9d11146 100644 --- a/app/notifications/receive_notifications.py +++ b/app/notifications/receive_notifications.py @@ -82,7 +82,11 @@ def receive_firetext_sms(): def format_mmg_message(message): - return unquote(message.replace('+', ' ')) + return unescape_string(unquote(message.replace('+', ' '))) + + +def unescape_string(string): + return string.encode('raw_unicode_escape').decode('unicode_escape') def format_mmg_datetime(date): diff --git a/tests/app/notifications/test_receive_notification.py b/tests/app/notifications/test_receive_notification.py index 26a628bb7..d5d504099 100644 --- a/tests/app/notifications/test_receive_notification.py +++ b/tests/app/notifications/test_receive_notification.py @@ -12,6 +12,7 @@ from app.notifications.receive_notifications import ( create_inbound_sms_object, strip_leading_forty_four, has_inbound_sms_permissions, + unescape_string, ) from app.models import InboundSms, EMAIL_TYPE, SMS_TYPE, INBOUND_SMS_TYPE @@ -166,6 +167,36 @@ def test_format_mmg_message(message, expected_output): assert format_mmg_message(message) == expected_output +@pytest.mark.parametrize('raw, expected', [ + ( + '😬', + '😬', + ), + ( + '1\\n2', + '1\n2', + ), + ( + '\\\'"\\\'', + '\'"\'', + ), + ( + """ + + """, + """ + + """, + ), + ( + '\x79 \\x79 \\\\x79', # we should never see the middle one + 'y y \\x79', + ), +]) +def test_unescape_string(raw, expected): + assert unescape_string(raw) == expected + + @pytest.mark.parametrize('provider_date, expected_output', [ ('2017-01-21+11%3A56%3A11', datetime(2017, 1, 21, 11, 56, 11)), ('2017-05-21+11%3A56%3A11', datetime(2017, 5, 21, 10, 56, 11))