mirror of
https://github.com/GSA/notifications-admin.git
synced 2026-02-24 12:21:22 -05:00
This changeset pulls in all of the notification_utils code directly into the admin and removes it as an external dependency. We are doing this to cut down on operational maintenance of the project and will begin removing parts of it no longer needed for the admin. Signed-off-by: Carlo Costino <carlo.costino@gsa.gov>
578 lines
18 KiB
Python
578 lines
18 KiB
Python
import pytest
|
||
from markupsafe import Markup
|
||
|
||
from notifications_utils.formatters import (
|
||
autolink_urls,
|
||
escape_html,
|
||
formatted_list,
|
||
make_quotes_smart,
|
||
normalise_whitespace,
|
||
remove_smart_quotes_from_email_addresses,
|
||
remove_whitespace_before_punctuation,
|
||
replace_hyphens_with_en_dashes,
|
||
sms_encode,
|
||
strip_all_whitespace,
|
||
strip_and_remove_obscure_whitespace,
|
||
strip_unsupported_characters,
|
||
unlink_govuk_escaped,
|
||
)
|
||
from notifications_utils.template import (
|
||
HTMLEmailTemplate,
|
||
PlainTextEmailTemplate,
|
||
SMSMessageTemplate,
|
||
SMSPreviewTemplate,
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"url, expected_html",
|
||
[
|
||
(
|
||
"""https://example.com/"onclick="alert('hi')""",
|
||
"""<a class="govuk-link govuk-link--no-visited-state" href="https://example.com/%22onclick=%22alert%28%27hi%27%29">https://example.com/"onclick="alert('hi')</a>""", # noqa
|
||
),
|
||
(
|
||
"""https://example.com/"style='text-decoration:blink'""",
|
||
"""<a class="govuk-link govuk-link--no-visited-state" href="https://example.com/%22style=%27text-decoration:blink%27">https://example.com/"style='text-decoration:blink'</a>""", # noqa
|
||
),
|
||
],
|
||
)
|
||
def test_URLs_get_escaped_in_sms(url, expected_html):
|
||
assert expected_html in str(
|
||
SMSPreviewTemplate({"content": url, "template_type": "sms"})
|
||
)
|
||
|
||
|
||
def test_HTML_template_has_URLs_replaced_with_links():
|
||
assert (
|
||
'<a style="word-wrap: break-word; color: #1D70B8;" href="https://service.example.com/accept_invite/a1b2c3d4">'
|
||
"https://service.example.com/accept_invite/a1b2c3d4"
|
||
"</a>"
|
||
) in str(
|
||
HTMLEmailTemplate(
|
||
{
|
||
"content": (
|
||
"You’ve been invited to a service. Click this link:\n"
|
||
"https://service.example.com/accept_invite/a1b2c3d4\n"
|
||
"\n"
|
||
"Thanks\n"
|
||
),
|
||
"subject": "",
|
||
"template_type": "email",
|
||
}
|
||
)
|
||
)
|
||
|
||
|
||
def test_escaping_govuk_in_email_templates():
|
||
template_content = "GOV.UK"
|
||
expected = "GOV.\u200BUK"
|
||
assert unlink_govuk_escaped(template_content) == expected
|
||
template_json = {
|
||
"content": template_content,
|
||
"subject": "",
|
||
"template_type": "email",
|
||
}
|
||
assert expected in str(PlainTextEmailTemplate(template_json))
|
||
assert expected in str(HTMLEmailTemplate(template_json))
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"template_content,expected",
|
||
[
|
||
# Cases that we add the breaking space
|
||
("GOV.UK", "GOV.\u200BUK"),
|
||
("gov.uk", "gov.\u200Buk"),
|
||
(
|
||
"content with space infront GOV.UK",
|
||
"content with space infront GOV.\u200BUK",
|
||
),
|
||
("content with tab infront\tGOV.UK", "content with tab infront\tGOV.\u200BUK"),
|
||
(
|
||
"content with newline infront\nGOV.UK",
|
||
"content with newline infront\nGOV.\u200BUK",
|
||
),
|
||
("*GOV.UK", "*GOV.\u200BUK"),
|
||
("#GOV.UK", "#GOV.\u200BUK"),
|
||
("^GOV.UK", "^GOV.\u200BUK"),
|
||
(" #GOV.UK", " #GOV.\u200BUK"),
|
||
("GOV.UK with CONTENT after", "GOV.\u200BUK with CONTENT after"),
|
||
("#GOV.UK with CONTENT after", "#GOV.\u200BUK with CONTENT after"),
|
||
# Cases that we don't add the breaking space
|
||
("https://gov.uk", "https://gov.uk"),
|
||
("https://www.gov.uk", "https://www.gov.uk"),
|
||
("www.gov.uk", "www.gov.uk"),
|
||
("WWW.GOV.UK", "WWW.GOV.UK"),
|
||
("WWW.GOV.UK.", "WWW.GOV.UK."),
|
||
(
|
||
"https://www.gov.uk/?utm_source=gov.uk",
|
||
"https://www.gov.uk/?utm_source=gov.uk",
|
||
),
|
||
("mygov.uk", "mygov.uk"),
|
||
("www.this-site-is-not-gov.uk", "www.this-site-is-not-gov.uk"),
|
||
(
|
||
"www.gov.uk?websites=bbc.co.uk;gov.uk;nsh.scot",
|
||
"www.gov.uk?websites=bbc.co.uk;gov.uk;nsh.scot",
|
||
),
|
||
("reply to: xxxx@xxx.gov.uk", "reply to: xxxx@xxx.gov.uk"),
|
||
("southwark.gov.uk", "southwark.gov.uk"),
|
||
("data.gov.uk", "data.gov.uk"),
|
||
("gov.uk/foo", "gov.uk/foo"),
|
||
("*GOV.UK/foo", "*GOV.UK/foo"),
|
||
("#GOV.UK/foo", "#GOV.UK/foo"),
|
||
("^GOV.UK/foo", "^GOV.UK/foo"),
|
||
("gov.uk#departments-and-policy", "gov.uk#departments-and-policy"),
|
||
# Cases that we know currently aren't supported by our regex and have a non breaking space added when they
|
||
# shouldn't however, we accept the fact that our regex isn't perfect as we think the chance of a user using a
|
||
# URL like this in their content is very small.
|
||
# We document these edge cases here
|
||
pytest.param("gov.uk.com", "gov.uk.com", marks=pytest.mark.xfail),
|
||
pytest.param("gov.ukandi.com", "gov.ukandi.com", marks=pytest.mark.xfail),
|
||
pytest.param("gov.uks", "gov.uks", marks=pytest.mark.xfail),
|
||
],
|
||
)
|
||
def test_unlink_govuk_escaped(template_content, expected):
|
||
assert unlink_govuk_escaped(template_content) == expected
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"prefix, body, expected",
|
||
[
|
||
("a", "b", "a: b"),
|
||
(None, "b", "b"),
|
||
],
|
||
)
|
||
def test_sms_message_adds_prefix(prefix, body, expected):
|
||
template = SMSMessageTemplate({"content": body, "template_type": "sms"})
|
||
template.prefix = prefix
|
||
template.sender = None
|
||
assert str(template) == expected
|
||
|
||
|
||
def test_sms_preview_adds_newlines():
|
||
template = SMSPreviewTemplate(
|
||
{
|
||
"content": """
|
||
the
|
||
quick
|
||
|
||
brown fox
|
||
""",
|
||
"template_type": "sms",
|
||
}
|
||
)
|
||
template.prefix = None
|
||
template.sender = None
|
||
assert "<br>" in str(template)
|
||
|
||
|
||
def test_sms_encode(mocker):
|
||
sanitise_mock = mocker.patch("notifications_utils.formatters.SanitiseSMS")
|
||
assert sms_encode("foo") == sanitise_mock.encode.return_value
|
||
sanitise_mock.encode.assert_called_once_with("foo")
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"items, kwargs, expected_output",
|
||
[
|
||
([1], {}, "‘1’"),
|
||
([1, 2], {}, "‘1’ and ‘2’"),
|
||
([1, 2, 3], {}, "‘1’, ‘2’ and ‘3’"),
|
||
([1, 2, 3], {"prefix": "foo", "prefix_plural": "bar"}, "bar ‘1’, ‘2’ and ‘3’"),
|
||
([1], {"prefix": "foo", "prefix_plural": "bar"}, "foo ‘1’"),
|
||
([1, 2, 3], {"before_each": "a", "after_each": "b"}, "a1b, a2b and a3b"),
|
||
([1, 2, 3], {"conjunction": "foo"}, "‘1’, ‘2’ foo ‘3’"),
|
||
(["&"], {"before_each": "<i>", "after_each": "</i>"}, "<i>&</i>"),
|
||
(
|
||
[1, 2, 3],
|
||
{"before_each": "<i>", "after_each": "</i>"},
|
||
"<i>1</i>, <i>2</i> and <i>3</i>",
|
||
),
|
||
],
|
||
)
|
||
def test_formatted_list(items, kwargs, expected_output):
|
||
assert formatted_list(items, **kwargs) == expected_output
|
||
|
||
|
||
def test_formatted_list_returns_markup():
|
||
assert isinstance(formatted_list([0]), Markup)
|
||
|
||
|
||
def test_bleach_doesnt_try_to_make_valid_html_before_cleaning():
|
||
assert escape_html("<to cancel daily cat facts reply 'cancel'>") == (
|
||
"<to cancel daily cat facts reply 'cancel'>"
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"content, expected_escaped",
|
||
(
|
||
("&?a;", "&?a;"),
|
||
("&>a;", "&>a;"),
|
||
("&*a;", "&*a;"),
|
||
("&a?;", "&a?;"),
|
||
("&x?xa;", "&x?xa;"),
|
||
# We need to be careful that query arguments don’t get turned into entities
|
||
("×tamp=×", "&timestamp=×"),
|
||
("×=1,2,3", "&times=1,2,3"),
|
||
# − should have a trailing semicolon according to the HTML5
|
||
# spec but µ doesn’t need one
|
||
("2−1", "2−1"),
|
||
("200µg", "200µg"),
|
||
# …we ignore it when it’s ambiguous
|
||
("2&minus1", "2&minus1"),
|
||
("200µg", "200&microg"),
|
||
# …we still ignore when there’s a space afterwards
|
||
("2 &minus 1", "2 &minus 1"),
|
||
("200µ g", "200&micro g"),
|
||
# Things which aren’t real entities are ignored, not removed
|
||
("This &isnotarealentity;", "This &isnotarealentity;"),
|
||
# We let users use for backwards compatibility
|
||
("Before after", "Before after"),
|
||
# We let users use & because it’s often pasted in URLs
|
||
("?a=1&b=2", "?a=1&b=2"),
|
||
# We let users use ( and ) because otherwise it’s
|
||
# impossible to put brackets in the body of conditional placeholders
|
||
("((var??(in brackets)))", "((var??(in brackets)))"),
|
||
),
|
||
)
|
||
def test_escaping_html_entities(
|
||
content,
|
||
expected_escaped,
|
||
):
|
||
assert escape_html(content) == expected_escaped
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"dirty, clean",
|
||
[
|
||
(
|
||
"Hello ((name)) ,\n\nThis is a message",
|
||
"Hello ((name)),\n\nThis is a message",
|
||
),
|
||
("Hello Jo ,\n\nThis is a message", "Hello Jo,\n\nThis is a message"),
|
||
(
|
||
"\n \t , word",
|
||
"\n, word",
|
||
),
|
||
],
|
||
)
|
||
def test_removing_whitespace_before_commas(dirty, clean):
|
||
assert remove_whitespace_before_punctuation(dirty) == clean
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"dirty, clean",
|
||
[
|
||
(
|
||
"Hello ((name)) .\n\nThis is a message",
|
||
"Hello ((name)).\n\nThis is a message",
|
||
),
|
||
("Hello Jo .\n\nThis is a message", "Hello Jo.\n\nThis is a message"),
|
||
(
|
||
"\n \t . word",
|
||
"\n. word",
|
||
),
|
||
],
|
||
)
|
||
def test_removing_whitespace_before_full_stops(dirty, clean):
|
||
assert remove_whitespace_before_punctuation(dirty) == clean
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"dumb, smart",
|
||
[
|
||
(
|
||
"""And I said, "what about breakfast at Tiffany's"?""",
|
||
"""And I said, “what about breakfast at Tiffany’s”?""",
|
||
),
|
||
(
|
||
"""
|
||
<a href="http://example.com?q='foo'">http://example.com?q='foo'</a>
|
||
""",
|
||
"""
|
||
<a href="http://example.com?q='foo'">http://example.com?q='foo'</a>
|
||
""",
|
||
),
|
||
],
|
||
)
|
||
def test_smart_quotes(dumb, smart):
|
||
assert make_quotes_smart(dumb) == smart
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"nasty, nice",
|
||
[
|
||
(
|
||
(
|
||
"The en dash - always with spaces in running text when, as "
|
||
"discussed in this section, indicating a parenthesis or "
|
||
"pause - and the spaced em dash both have a certain "
|
||
"technical advantage over the unspaced em dash. "
|
||
),
|
||
(
|
||
"The en dash \u2013 always with spaces in running text when, as "
|
||
"discussed in this section, indicating a parenthesis or "
|
||
"pause \u2013 and the spaced em dash both have a certain "
|
||
"technical advantage over the unspaced em dash. "
|
||
),
|
||
),
|
||
(
|
||
"double -- dash",
|
||
"double \u2013 dash",
|
||
),
|
||
(
|
||
"triple --- dash",
|
||
"triple \u2013 dash",
|
||
),
|
||
(
|
||
"quadruple ---- dash",
|
||
"quadruple ---- dash",
|
||
),
|
||
(
|
||
"em — dash",
|
||
"em – dash",
|
||
),
|
||
(
|
||
"already\u0020–\u0020correct", # \u0020 is a normal space character
|
||
"already\u0020–\u0020correct",
|
||
),
|
||
(
|
||
"2004-2008",
|
||
"2004-2008", # no replacement
|
||
),
|
||
],
|
||
)
|
||
def test_en_dashes(nasty, nice):
|
||
assert replace_hyphens_with_en_dashes(nasty) == nice
|
||
|
||
|
||
def test_unicode_dash_lookup():
|
||
en_dash_replacement_sequence = "\u0020\u2013"
|
||
hyphen = "-"
|
||
en_dash = "–"
|
||
space = " "
|
||
non_breaking_space = " "
|
||
assert en_dash_replacement_sequence == space + en_dash
|
||
assert non_breaking_space not in en_dash_replacement_sequence
|
||
assert hyphen not in en_dash_replacement_sequence
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"value",
|
||
[
|
||
"bar",
|
||
" bar ",
|
||
"""
|
||
\t bar
|
||
""",
|
||
" \u180E\u200B \u200C bar \u200D \u2060\uFEFF ",
|
||
],
|
||
)
|
||
def test_strip_all_whitespace(value):
|
||
assert strip_all_whitespace(value) == "bar"
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"value",
|
||
[
|
||
"notifications-email",
|
||
" \tnotifications-email \x0c ",
|
||
"\rn\u200Coti\u200Dfi\u200Bcati\u2060ons-\u180Eemai\uFEFFl\uFEFF",
|
||
],
|
||
)
|
||
def test_strip_and_remove_obscure_whitespace(value):
|
||
assert strip_and_remove_obscure_whitespace(value) == "notifications-email"
|
||
|
||
|
||
def test_strip_and_remove_obscure_whitespace_only_removes_normal_whitespace_from_ends():
|
||
sentence = " words \n over multiple lines with \ttabs\t "
|
||
assert (
|
||
strip_and_remove_obscure_whitespace(sentence)
|
||
== "words \n over multiple lines with \ttabs"
|
||
)
|
||
|
||
|
||
def test_remove_smart_quotes_from_email_addresses():
|
||
assert (
|
||
remove_smart_quotes_from_email_addresses(
|
||
"""
|
||
line one’s quote
|
||
first.o’last@example.com is someone’s email address
|
||
line ‘three’
|
||
"""
|
||
)
|
||
== (
|
||
"""
|
||
line one’s quote
|
||
first.o'last@example.com is someone’s email address
|
||
line ‘three’
|
||
"""
|
||
)
|
||
)
|
||
|
||
|
||
def test_strip_unsupported_characters():
|
||
assert strip_unsupported_characters("line one\u2028line two") == (
|
||
"line oneline two"
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"value",
|
||
[
|
||
"\u200C Your tax is\ndue\n\n",
|
||
" Your tax is due ",
|
||
# Non breaking spaces replaced by single spaces
|
||
"\u00A0Your\u00A0tax\u00A0 is\u00A0\u00A0due\u00A0",
|
||
# zero width spaces are removed
|
||
"\u180EYour \u200Btax\u200C is \u200D\u2060due \uFEFF",
|
||
# tabs are replaced by single spaces
|
||
"\tYour tax\tis due ",
|
||
],
|
||
)
|
||
def test_normalise_whitespace(value):
|
||
assert normalise_whitespace(value) == "Your tax is due"
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"content, expected_html",
|
||
(
|
||
(
|
||
"http://example.com",
|
||
'<a href="http://example.com">http://example.com</a>',
|
||
),
|
||
(
|
||
"https://example.com",
|
||
'<a href="https://example.com">https://example.com</a>',
|
||
),
|
||
(
|
||
"example.com",
|
||
'<a href="http://example.com">example.com</a>',
|
||
),
|
||
(
|
||
"www.foo.bar.example.com",
|
||
'<a href="http://www.foo.bar.example.com">www.foo.bar.example.com</a>',
|
||
),
|
||
(
|
||
"example.com/",
|
||
'<a href="http://example.com/">example.com/</a>',
|
||
),
|
||
(
|
||
"www.foo.bar.example.com/",
|
||
'<a href="http://www.foo.bar.example.com/">www.foo.bar.example.com/</a>',
|
||
),
|
||
(
|
||
"example.com/foo",
|
||
'<a href="http://example.com/foo">example.com/foo</a>',
|
||
),
|
||
(
|
||
"example.com?foo",
|
||
'<a href="http://example.com?foo">example.com?foo</a>',
|
||
),
|
||
(
|
||
"example.com#foo",
|
||
'<a href="http://example.com#foo">example.com#foo</a>',
|
||
),
|
||
(
|
||
"Go to gov.uk/example.",
|
||
"Go to " '<a href="http://gov.uk/example">gov.uk/example</a>.',
|
||
),
|
||
(
|
||
"Go to gov.uk/example:",
|
||
"Go to " '<a href="http://gov.uk/example">gov.uk/example</a>:',
|
||
),
|
||
(
|
||
"Go to gov.uk/example;",
|
||
"Go to " '<a href="http://gov.uk/example;">gov.uk/example;</a>',
|
||
),
|
||
(
|
||
"(gov.uk/example)",
|
||
"(" '<a href="http://gov.uk/example">gov.uk/example</a>)',
|
||
),
|
||
(
|
||
"(gov.uk/example)...",
|
||
"(" '<a href="http://gov.uk/example">gov.uk/example</a>)...',
|
||
),
|
||
(
|
||
"(gov.uk/example.)",
|
||
"(" '<a href="http://gov.uk/example">gov.uk/example</a>.)',
|
||
),
|
||
(
|
||
"(see example.com/foo_(bar))",
|
||
"(see "
|
||
'<a href="http://example.com/foo_%28bar%29">example.com/foo_(bar)</a>)',
|
||
),
|
||
(
|
||
"example.com/foo(((((((bar",
|
||
'<a href="http://example.com/foo%28%28%28%28%28%28%28bar">example.com/foo(((((((bar</a>',
|
||
),
|
||
(
|
||
"government website (gov.uk). Other websites…",
|
||
"government website ("
|
||
'<a href="http://gov.uk">gov.uk</a>). Other websites…',
|
||
),
|
||
(
|
||
"[gov.uk/example]",
|
||
"[" '<a href="http://gov.uk/example">gov.uk/example</a>]',
|
||
),
|
||
(
|
||
"gov.uk/foo, gov.uk/bar",
|
||
'<a href="http://gov.uk/foo">gov.uk/foo</a>, '
|
||
'<a href="http://gov.uk/bar">gov.uk/bar</a>',
|
||
),
|
||
(
|
||
"<p>gov.uk/foo</p>",
|
||
"<p>" '<a href="http://gov.uk/foo">gov.uk/foo</a></p>',
|
||
),
|
||
(
|
||
"gov.uk?foo&",
|
||
'<a href="http://gov.uk?foo&">gov.uk?foo&</a>',
|
||
),
|
||
(
|
||
"a .service.gov.uk domain",
|
||
"a .service.gov.uk domain",
|
||
),
|
||
(
|
||
'http://foo.com/"bar"?x=1#2',
|
||
'<a href="http://foo.com/%22bar%22?x=1#2">http://foo.com/"bar"?x=1#2</a>',
|
||
),
|
||
(
|
||
"firstname.lastname@example.com",
|
||
"firstname.lastname@example.com",
|
||
),
|
||
(
|
||
"with-subdomain@test.example.com",
|
||
"with-subdomain@test.example.com",
|
||
),
|
||
),
|
||
)
|
||
def test_autolink_urls_matches_correctly(content, expected_html):
|
||
assert autolink_urls(content) == expected_html
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"extra_kwargs, expected_html",
|
||
(
|
||
(
|
||
{},
|
||
'<a href="http://example.com">http://example.com</a>',
|
||
),
|
||
(
|
||
{
|
||
"classes": "govuk-link",
|
||
},
|
||
'<a class="govuk-link" href="http://example.com">http://example.com</a>',
|
||
),
|
||
),
|
||
)
|
||
def test_autolink_urls_applies_correct_attributes(extra_kwargs, expected_html):
|
||
assert autolink_urls("http://example.com", **extra_kwargs) == expected_html
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"content", ("without link", "with link to https://example.com")
|
||
)
|
||
def test_autolink_urls_returns_markup(content):
|
||
assert isinstance(autolink_urls(content), Markup)
|