mirror of
https://github.com/GSA/notifications-api.git
synced 2025-12-13 08:42:21 -05:00
578 lines
18 KiB
Python
578 lines
18 KiB
Python
import pytest
|
||
from markupsafe import Markup
|
||
|
||
from notifications_utils.formatters import (
|
||
autolink_urls,
|
||
escape_html,
|
||
formatted_list,
|
||
make_quotes_smart,
|
||
normalise_whitespace,
|
||
remove_smart_quotes_from_email_addresses,
|
||
remove_whitespace_before_punctuation,
|
||
replace_hyphens_with_en_dashes,
|
||
sms_encode,
|
||
strip_all_whitespace,
|
||
strip_and_remove_obscure_whitespace,
|
||
strip_unsupported_characters,
|
||
unlink_govuk_escaped,
|
||
)
|
||
from notifications_utils.template import (
|
||
HTMLEmailTemplate,
|
||
PlainTextEmailTemplate,
|
||
SMSMessageTemplate,
|
||
SMSPreviewTemplate,
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("url", "expected_html"),
|
||
[
|
||
(
|
||
"""https://example.com/"onclick="alert('hi')""",
|
||
"""<a class="govuk-link govuk-link--no-visited-state" href="https://example.com/%22onclick=%22alert%28%27hi%27%29">https://example.com/"onclick="alert('hi')</a>""", # noqa
|
||
),
|
||
(
|
||
"""https://example.com/"style='text-decoration:blink'""",
|
||
"""<a class="govuk-link govuk-link--no-visited-state" href="https://example.com/%22style=%27text-decoration:blink%27">https://example.com/"style='text-decoration:blink'</a>""", # noqa
|
||
),
|
||
],
|
||
)
|
||
def test_URLs_get_escaped_in_sms(url, expected_html):
|
||
assert expected_html in str(
|
||
SMSPreviewTemplate({"content": url, "template_type": "sms"})
|
||
)
|
||
|
||
|
||
def test_HTML_template_has_URLs_replaced_with_links():
|
||
assert (
|
||
'<a style="word-wrap: break-word; color: #1D70B8;" href="https://service.example.com/accept_invite/a1b2c3d4">'
|
||
"https://service.example.com/accept_invite/a1b2c3d4"
|
||
"</a>"
|
||
) in str(
|
||
HTMLEmailTemplate(
|
||
{
|
||
"content": (
|
||
"You’ve been invited to a service. Click this link:\n"
|
||
"https://service.example.com/accept_invite/a1b2c3d4\n"
|
||
"\n"
|
||
"Thanks\n"
|
||
),
|
||
"subject": "",
|
||
"template_type": "email",
|
||
}
|
||
)
|
||
)
|
||
|
||
|
||
def test_escaping_govuk_in_email_templates():
|
||
template_content = "GOV.UK"
|
||
expected = "GOV.\u200BUK"
|
||
assert unlink_govuk_escaped(template_content) == expected
|
||
template_json = {
|
||
"content": template_content,
|
||
"subject": "",
|
||
"template_type": "email",
|
||
}
|
||
assert expected in str(PlainTextEmailTemplate(template_json))
|
||
assert expected in str(HTMLEmailTemplate(template_json))
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("template_content", "expected"),
|
||
[
|
||
# Cases that we add the breaking space
|
||
("GOV.UK", "GOV.\u200BUK"),
|
||
("gov.uk", "gov.\u200Buk"),
|
||
(
|
||
"content with space infront GOV.UK",
|
||
"content with space infront GOV.\u200BUK",
|
||
),
|
||
("content with tab infront\tGOV.UK", "content with tab infront\tGOV.\u200BUK"),
|
||
(
|
||
"content with newline infront\nGOV.UK",
|
||
"content with newline infront\nGOV.\u200BUK",
|
||
),
|
||
("*GOV.UK", "*GOV.\u200BUK"),
|
||
("#GOV.UK", "#GOV.\u200BUK"),
|
||
("^GOV.UK", "^GOV.\u200BUK"),
|
||
(" #GOV.UK", " #GOV.\u200BUK"),
|
||
("GOV.UK with CONTENT after", "GOV.\u200BUK with CONTENT after"),
|
||
("#GOV.UK with CONTENT after", "#GOV.\u200BUK with CONTENT after"),
|
||
# Cases that we don't add the breaking space
|
||
("https://gov.uk", "https://gov.uk"),
|
||
("https://www.gov.uk", "https://www.gov.uk"),
|
||
("www.gov.uk", "www.gov.uk"),
|
||
("WWW.GOV.UK", "WWW.GOV.UK"),
|
||
("WWW.GOV.UK.", "WWW.GOV.UK."),
|
||
(
|
||
"https://www.gov.uk/?utm_source=gov.uk",
|
||
"https://www.gov.uk/?utm_source=gov.uk",
|
||
),
|
||
("mygov.uk", "mygov.uk"),
|
||
("www.this-site-is-not-gov.uk", "www.this-site-is-not-gov.uk"),
|
||
(
|
||
"www.gov.uk?websites=bbc.co.uk;gov.uk;nsh.scot",
|
||
"www.gov.uk?websites=bbc.co.uk;gov.uk;nsh.scot",
|
||
),
|
||
("reply to: xxxx@xxx.gov.uk", "reply to: xxxx@xxx.gov.uk"),
|
||
("southwark.gov.uk", "southwark.gov.uk"),
|
||
("data.gov.uk", "data.gov.uk"),
|
||
("gov.uk/foo", "gov.uk/foo"),
|
||
("*GOV.UK/foo", "*GOV.UK/foo"),
|
||
("#GOV.UK/foo", "#GOV.UK/foo"),
|
||
("^GOV.UK/foo", "^GOV.UK/foo"),
|
||
("gov.uk#departments-and-policy", "gov.uk#departments-and-policy"),
|
||
# Cases that we know currently aren't supported by our regex and have a non breaking space added when they
|
||
# shouldn't however, we accept the fact that our regex isn't perfect as we think the chance of a user using a
|
||
# URL like this in their content is very small.
|
||
# We document these edge cases here
|
||
pytest.param("gov.uk.com", "gov.uk.com", marks=pytest.mark.xfail),
|
||
pytest.param("gov.ukandi.com", "gov.ukandi.com", marks=pytest.mark.xfail),
|
||
pytest.param("gov.uks", "gov.uks", marks=pytest.mark.xfail),
|
||
],
|
||
)
|
||
def test_unlink_govuk_escaped(template_content, expected):
|
||
assert unlink_govuk_escaped(template_content) == expected
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("prefix", "body", "expected"),
|
||
[
|
||
("a", "b", "a: b"),
|
||
(None, "b", "b"),
|
||
],
|
||
)
|
||
def test_sms_message_adds_prefix(prefix, body, expected):
|
||
template = SMSMessageTemplate({"content": body, "template_type": "sms"})
|
||
template.prefix = prefix
|
||
template.sender = None
|
||
assert str(template) == expected
|
||
|
||
|
||
def test_sms_preview_adds_newlines():
|
||
template = SMSPreviewTemplate(
|
||
{
|
||
"content": """
|
||
the
|
||
quick
|
||
|
||
brown fox
|
||
""",
|
||
"template_type": "sms",
|
||
}
|
||
)
|
||
template.prefix = None
|
||
template.sender = None
|
||
assert "<br>" in str(template)
|
||
|
||
|
||
def test_sms_encode(mocker):
|
||
sanitise_mock = mocker.patch("notifications_utils.formatters.SanitiseSMS")
|
||
assert sms_encode("foo") == sanitise_mock.encode.return_value
|
||
sanitise_mock.encode.assert_called_once_with("foo")
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("items", "kwargs", "expected_output"),
|
||
[
|
||
([1], {}, "‘1’"),
|
||
([1, 2], {}, "‘1’ and ‘2’"),
|
||
([1, 2, 3], {}, "‘1’, ‘2’ and ‘3’"),
|
||
([1, 2, 3], {"prefix": "foo", "prefix_plural": "bar"}, "bar ‘1’, ‘2’ and ‘3’"),
|
||
([1], {"prefix": "foo", "prefix_plural": "bar"}, "foo ‘1’"),
|
||
([1, 2, 3], {"before_each": "a", "after_each": "b"}, "a1b, a2b and a3b"),
|
||
([1, 2, 3], {"conjunction": "foo"}, "‘1’, ‘2’ foo ‘3’"),
|
||
(["&"], {"before_each": "<i>", "after_each": "</i>"}, "<i>&</i>"),
|
||
(
|
||
[1, 2, 3],
|
||
{"before_each": "<i>", "after_each": "</i>"},
|
||
"<i>1</i>, <i>2</i> and <i>3</i>",
|
||
),
|
||
],
|
||
)
|
||
def test_formatted_list(items, kwargs, expected_output):
|
||
assert formatted_list(items, **kwargs) == expected_output
|
||
|
||
|
||
def test_formatted_list_returns_markup():
|
||
assert isinstance(formatted_list([0]), Markup)
|
||
|
||
|
||
def test_bleach_doesnt_try_to_make_valid_html_before_cleaning():
|
||
assert escape_html("<to cancel daily cat facts reply 'cancel'>") == (
|
||
"<to cancel daily cat facts reply 'cancel'>"
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("content", "expected_escaped"),
|
||
[
|
||
("&?a;", "&?a;"),
|
||
("&>a;", "&>a;"),
|
||
("&*a;", "&*a;"),
|
||
("&a?;", "&a?;"),
|
||
("&x?xa;", "&x?xa;"),
|
||
# We need to be careful that query arguments don’t get turned into entities
|
||
("×tamp=×", "&timestamp=×"),
|
||
("×=1,2,3", "&times=1,2,3"),
|
||
# − should have a trailing semicolon according to the HTML5
|
||
# spec but µ doesn’t need one
|
||
("2−1", "2−1"),
|
||
("200µg", "200µg"),
|
||
# …we ignore it when it’s ambiguous
|
||
("2&minus1", "2&minus1"),
|
||
("200µg", "200&microg"),
|
||
# …we still ignore when there’s a space afterwards
|
||
("2 &minus 1", "2 &minus 1"),
|
||
("200µ g", "200&micro g"),
|
||
# Things which aren’t real entities are ignored, not removed
|
||
("This &isnotarealentity;", "This &isnotarealentity;"),
|
||
# We let users use for backwards compatibility
|
||
("Before after", "Before after"),
|
||
# We let users use & because it’s often pasted in URLs
|
||
("?a=1&b=2", "?a=1&b=2"),
|
||
# We let users use ( and ) because otherwise it’s
|
||
# impossible to put brackets in the body of conditional placeholders
|
||
("((var??(in brackets)))", "((var??(in brackets)))"),
|
||
],
|
||
)
|
||
def test_escaping_html_entities(
|
||
content,
|
||
expected_escaped,
|
||
):
|
||
assert escape_html(content) == expected_escaped
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("dirty", "clean"),
|
||
[
|
||
(
|
||
"Hello ((name)) ,\n\nThis is a message",
|
||
"Hello ((name)),\n\nThis is a message",
|
||
),
|
||
("Hello Jo ,\n\nThis is a message", "Hello Jo,\n\nThis is a message"),
|
||
(
|
||
"\n \t , word",
|
||
"\n, word",
|
||
),
|
||
],
|
||
)
|
||
def test_removing_whitespace_before_commas(dirty, clean):
|
||
assert remove_whitespace_before_punctuation(dirty) == clean
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("dirty", "clean"),
|
||
[
|
||
(
|
||
"Hello ((name)) .\n\nThis is a message",
|
||
"Hello ((name)).\n\nThis is a message",
|
||
),
|
||
("Hello Jo .\n\nThis is a message", "Hello Jo.\n\nThis is a message"),
|
||
(
|
||
"\n \t . word",
|
||
"\n. word",
|
||
),
|
||
],
|
||
)
|
||
def test_removing_whitespace_before_full_stops(dirty, clean):
|
||
assert remove_whitespace_before_punctuation(dirty) == clean
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("dumb", "smart"),
|
||
[
|
||
(
|
||
"""And I said, "what about breakfast at Tiffany's"?""",
|
||
"""And I said, “what about breakfast at Tiffany’s”?""",
|
||
),
|
||
(
|
||
"""
|
||
<a href="http://example.com?q='foo'">http://example.com?q='foo'</a>
|
||
""",
|
||
"""
|
||
<a href="http://example.com?q='foo'">http://example.com?q='foo'</a>
|
||
""",
|
||
),
|
||
],
|
||
)
|
||
def test_smart_quotes(dumb, smart):
|
||
assert make_quotes_smart(dumb) == smart
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("nasty", "nice"),
|
||
[
|
||
(
|
||
(
|
||
"The en dash - always with spaces in running text when, as "
|
||
"discussed in this section, indicating a parenthesis or "
|
||
"pause - and the spaced em dash both have a certain "
|
||
"technical advantage over the unspaced em dash. "
|
||
),
|
||
(
|
||
"The en dash \u2013 always with spaces in running text when, as "
|
||
"discussed in this section, indicating a parenthesis or "
|
||
"pause \u2013 and the spaced em dash both have a certain "
|
||
"technical advantage over the unspaced em dash. "
|
||
),
|
||
),
|
||
(
|
||
"double -- dash",
|
||
"double \u2013 dash",
|
||
),
|
||
(
|
||
"triple --- dash",
|
||
"triple \u2013 dash",
|
||
),
|
||
(
|
||
"quadruple ---- dash",
|
||
"quadruple ---- dash",
|
||
),
|
||
(
|
||
"em — dash",
|
||
"em – dash",
|
||
),
|
||
(
|
||
"already\u0020–\u0020correct", # \u0020 is a normal space character
|
||
"already\u0020–\u0020correct",
|
||
),
|
||
(
|
||
"2004-2008",
|
||
"2004-2008", # no replacement
|
||
),
|
||
],
|
||
)
|
||
def test_en_dashes(nasty, nice):
|
||
assert replace_hyphens_with_en_dashes(nasty) == nice
|
||
|
||
|
||
def test_unicode_dash_lookup():
|
||
en_dash_replacement_sequence = "\u0020\u2013"
|
||
hyphen = "-"
|
||
en_dash = "–"
|
||
space = " "
|
||
non_breaking_space = " "
|
||
assert en_dash_replacement_sequence == space + en_dash
|
||
assert non_breaking_space not in en_dash_replacement_sequence
|
||
assert hyphen not in en_dash_replacement_sequence
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"value",
|
||
[
|
||
"bar",
|
||
" bar ",
|
||
"""
|
||
\t bar
|
||
""",
|
||
" \u180E\u200B \u200C bar \u200D \u2060\uFEFF ",
|
||
],
|
||
)
|
||
def test_strip_all_whitespace(value):
|
||
assert strip_all_whitespace(value) == "bar"
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"value",
|
||
[
|
||
"notifications-email",
|
||
" \tnotifications-email \x0c ",
|
||
"\rn\u200Coti\u200Dfi\u200Bcati\u2060ons-\u180Eemai\uFEFFl\uFEFF",
|
||
],
|
||
)
|
||
def test_strip_and_remove_obscure_whitespace(value):
|
||
assert strip_and_remove_obscure_whitespace(value) == "notifications-email"
|
||
|
||
|
||
def test_strip_and_remove_obscure_whitespace_only_removes_normal_whitespace_from_ends():
|
||
sentence = " words \n over multiple lines with \ttabs\t "
|
||
assert (
|
||
strip_and_remove_obscure_whitespace(sentence)
|
||
== "words \n over multiple lines with \ttabs"
|
||
)
|
||
|
||
|
||
def test_remove_smart_quotes_from_email_addresses():
|
||
assert (
|
||
remove_smart_quotes_from_email_addresses(
|
||
"""
|
||
line one’s quote
|
||
first.o’last@example.com is someone’s email address
|
||
line ‘three’
|
||
"""
|
||
)
|
||
== (
|
||
"""
|
||
line one’s quote
|
||
first.o'last@example.com is someone’s email address
|
||
line ‘three’
|
||
"""
|
||
)
|
||
)
|
||
|
||
|
||
def test_strip_unsupported_characters():
|
||
assert strip_unsupported_characters("line one\u2028line two") == (
|
||
"line oneline two"
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"value",
|
||
[
|
||
"\u200C Your tax is\ndue\n\n",
|
||
" Your tax is due ",
|
||
# Non breaking spaces replaced by single spaces
|
||
"\u00A0Your\u00A0tax\u00A0 is\u00A0\u00A0due\u00A0",
|
||
# zero width spaces are removed
|
||
"\u180EYour \u200Btax\u200C is \u200D\u2060due \uFEFF",
|
||
# tabs are replaced by single spaces
|
||
"\tYour tax\tis due ",
|
||
],
|
||
)
|
||
def test_normalise_whitespace(value):
|
||
assert normalise_whitespace(value) == "Your tax is due"
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("content", "expected_html"),
|
||
[
|
||
(
|
||
"http://example.com",
|
||
'<a href="http://example.com">http://example.com</a>',
|
||
),
|
||
(
|
||
"https://example.com",
|
||
'<a href="https://example.com">https://example.com</a>',
|
||
),
|
||
(
|
||
"example.com",
|
||
'<a href="http://example.com">example.com</a>',
|
||
),
|
||
(
|
||
"www.foo.bar.example.com",
|
||
'<a href="http://www.foo.bar.example.com">www.foo.bar.example.com</a>',
|
||
),
|
||
(
|
||
"example.com/",
|
||
'<a href="http://example.com/">example.com/</a>',
|
||
),
|
||
(
|
||
"www.foo.bar.example.com/",
|
||
'<a href="http://www.foo.bar.example.com/">www.foo.bar.example.com/</a>',
|
||
),
|
||
(
|
||
"example.com/foo",
|
||
'<a href="http://example.com/foo">example.com/foo</a>',
|
||
),
|
||
(
|
||
"example.com?foo",
|
||
'<a href="http://example.com?foo">example.com?foo</a>',
|
||
),
|
||
(
|
||
"example.com#foo",
|
||
'<a href="http://example.com#foo">example.com#foo</a>',
|
||
),
|
||
(
|
||
"Go to gov.uk/example.",
|
||
"Go to " '<a href="http://gov.uk/example">gov.uk/example</a>.',
|
||
),
|
||
(
|
||
"Go to gov.uk/example:",
|
||
"Go to " '<a href="http://gov.uk/example">gov.uk/example</a>:',
|
||
),
|
||
(
|
||
"Go to gov.uk/example;",
|
||
"Go to " '<a href="http://gov.uk/example;">gov.uk/example;</a>',
|
||
),
|
||
(
|
||
"(gov.uk/example)",
|
||
"(" '<a href="http://gov.uk/example">gov.uk/example</a>)',
|
||
),
|
||
(
|
||
"(gov.uk/example)...",
|
||
"(" '<a href="http://gov.uk/example">gov.uk/example</a>)...',
|
||
),
|
||
(
|
||
"(gov.uk/example.)",
|
||
"(" '<a href="http://gov.uk/example">gov.uk/example</a>.)',
|
||
),
|
||
(
|
||
"(see example.com/foo_(bar))",
|
||
"(see "
|
||
'<a href="http://example.com/foo_%28bar%29">example.com/foo_(bar)</a>)',
|
||
),
|
||
(
|
||
"example.com/foo(((((((bar",
|
||
'<a href="http://example.com/foo%28%28%28%28%28%28%28bar">example.com/foo(((((((bar</a>',
|
||
),
|
||
(
|
||
"government website (gov.uk). Other websites…",
|
||
"government website ("
|
||
'<a href="http://gov.uk">gov.uk</a>). Other websites…',
|
||
),
|
||
(
|
||
"[gov.uk/example]",
|
||
"[" '<a href="http://gov.uk/example">gov.uk/example</a>]',
|
||
),
|
||
(
|
||
"gov.uk/foo, gov.uk/bar",
|
||
'<a href="http://gov.uk/foo">gov.uk/foo</a>, '
|
||
'<a href="http://gov.uk/bar">gov.uk/bar</a>',
|
||
),
|
||
(
|
||
"<p>gov.uk/foo</p>",
|
||
"<p>" '<a href="http://gov.uk/foo">gov.uk/foo</a></p>',
|
||
),
|
||
(
|
||
"gov.uk?foo&",
|
||
'<a href="http://gov.uk?foo&">gov.uk?foo&</a>',
|
||
),
|
||
(
|
||
"a .service.gov.uk domain",
|
||
"a .service.gov.uk domain",
|
||
),
|
||
(
|
||
'http://foo.com/"bar"?x=1#2',
|
||
'<a href="http://foo.com/%22bar%22?x=1#2">http://foo.com/"bar"?x=1#2</a>',
|
||
),
|
||
(
|
||
"firstname.lastname@example.com",
|
||
"firstname.lastname@example.com",
|
||
),
|
||
(
|
||
"with-subdomain@test.example.com",
|
||
"with-subdomain@test.example.com",
|
||
),
|
||
],
|
||
)
|
||
def test_autolink_urls_matches_correctly(content, expected_html):
|
||
assert autolink_urls(content) == expected_html
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("extra_kwargs", "expected_html"),
|
||
[
|
||
(
|
||
{},
|
||
'<a href="http://example.com">http://example.com</a>',
|
||
),
|
||
(
|
||
{
|
||
"classes": "govuk-link",
|
||
},
|
||
'<a class="govuk-link" href="http://example.com">http://example.com</a>',
|
||
),
|
||
],
|
||
)
|
||
def test_autolink_urls_applies_correct_attributes(extra_kwargs, expected_html):
|
||
assert autolink_urls("http://example.com", **extra_kwargs) == expected_html
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"content", ["without link", "with link to https://example.com"]
|
||
)
|
||
def test_autolink_urls_returns_markup(content):
|
||
assert isinstance(autolink_urls(content), Markup)
|