mirror of
https://github.com/GSA/notifications-api.git
synced 2026-02-01 15:46:07 -05:00
Fix escaping in inbound text messages from MMG
One of our providers gives us messages with special characters escaped, ie a newline comes through as `\n`, not a literal newline. We shouldn’t be showing these backslashes to any of our users. We also have examples of real inbound messages containing `👍` and `’`, so we should continue to display these properly. It’s a bit tricky, because the strings we get from this provider are a mixture of escape sequences (eg `\n`) and unicode characters (eg `😨`). So we have to first convert the unicode character `😨` into an escape sequence, `\U0001f628` in this example. We do this by encoding with the `raw_unicode_escape` codec: > Latin-1 encoding with \uXXXX and \UXXXXXXXX for other code points. > Existing backslashes are not escaped in any way. It is used in the > Python pickle protocol. – https://docs.python.org/3/library/codecs.html#text-encodings Then we turn this back into a string using the `unicode_escape` codec, which transforms all escape sequences into their literal representations (eg `\U0001f628` becomes `😨` and `\n` becomes a newline).
This commit is contained in:
@@ -82,7 +82,11 @@ def receive_firetext_sms():
|
|||||||
|
|
||||||
|
|
||||||
def format_mmg_message(message):
|
def format_mmg_message(message):
|
||||||
return unquote(message.replace('+', ' '))
|
return unescape_string(unquote(message.replace('+', ' ')))
|
||||||
|
|
||||||
|
|
||||||
|
def unescape_string(string):
|
||||||
|
return string.encode('raw_unicode_escape').decode('unicode_escape')
|
||||||
|
|
||||||
|
|
||||||
def format_mmg_datetime(date):
|
def format_mmg_datetime(date):
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from app.notifications.receive_notifications import (
|
|||||||
create_inbound_sms_object,
|
create_inbound_sms_object,
|
||||||
strip_leading_forty_four,
|
strip_leading_forty_four,
|
||||||
has_inbound_sms_permissions,
|
has_inbound_sms_permissions,
|
||||||
|
unescape_string,
|
||||||
)
|
)
|
||||||
|
|
||||||
from app.models import InboundSms, EMAIL_TYPE, SMS_TYPE, INBOUND_SMS_TYPE
|
from app.models import InboundSms, EMAIL_TYPE, SMS_TYPE, INBOUND_SMS_TYPE
|
||||||
@@ -166,6 +167,36 @@ def test_format_mmg_message(message, expected_output):
|
|||||||
assert format_mmg_message(message) == expected_output
|
assert format_mmg_message(message) == expected_output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('raw, expected', [
|
||||||
|
(
|
||||||
|
'😬',
|
||||||
|
'😬',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'1\\n2',
|
||||||
|
'1\n2',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'\\\'"\\\'',
|
||||||
|
'\'"\'',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
|
||||||
|
""",
|
||||||
|
"""
|
||||||
|
|
||||||
|
""",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'\x79 \\x79 \\\\x79', # we should never see the middle one
|
||||||
|
'y y \\x79',
|
||||||
|
),
|
||||||
|
])
|
||||||
|
def test_unescape_string(raw, expected):
|
||||||
|
assert unescape_string(raw) == expected
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('provider_date, expected_output', [
|
@pytest.mark.parametrize('provider_date, expected_output', [
|
||||||
('2017-01-21+11%3A56%3A11', datetime(2017, 1, 21, 11, 56, 11)),
|
('2017-01-21+11%3A56%3A11', datetime(2017, 1, 21, 11, 56, 11)),
|
||||||
('2017-05-21+11%3A56%3A11', datetime(2017, 5, 21, 10, 56, 11))
|
('2017-05-21+11%3A56%3A11', datetime(2017, 5, 21, 10, 56, 11))
|
||||||
|
|||||||
Reference in New Issue
Block a user