import pytest from markupsafe import Markup from notifications_utils.formatters import ( autolink_urls, escape_html, formatted_list, make_quotes_smart, normalise_whitespace, remove_smart_quotes_from_email_addresses, remove_whitespace_before_punctuation, replace_hyphens_with_en_dashes, sms_encode, strip_all_whitespace, strip_and_remove_obscure_whitespace, strip_unsupported_characters, unlink_govuk_escaped, ) from notifications_utils.template import ( HTMLEmailTemplate, PlainTextEmailTemplate, SMSMessageTemplate, SMSPreviewTemplate, ) @pytest.mark.parametrize( ("url", "expected_html"), [ ( """https://example.com/"onclick="alert('hi')""", """https://example.com/"onclick="alert('hi')""", # noqa ), ( """https://example.com/"style='text-decoration:blink'""", """https://example.com/"style='text-decoration:blink'""", # noqa ), ], ) def test_URLs_get_escaped_in_sms(url, expected_html): assert expected_html in str( SMSPreviewTemplate({"content": url, "template_type": "sms"}) ) def test_HTML_template_has_URLs_replaced_with_links(): assert ( '' "https://service.example.com/accept_invite/a1b2c3d4" "" ) in str( HTMLEmailTemplate( { "content": ( "You’ve been invited to a service. Click this link:\n" "https://service.example.com/accept_invite/a1b2c3d4\n" "\n" "Thanks\n" ), "subject": "", "template_type": "email", } ) ) def test_escaping_govuk_in_email_templates(): template_content = "GOV.UK" expected = "GOV.\u200BUK" assert unlink_govuk_escaped(template_content) == expected template_json = { "content": template_content, "subject": "", "template_type": "email", } assert expected in str(PlainTextEmailTemplate(template_json)) assert expected in str(HTMLEmailTemplate(template_json)) @pytest.mark.parametrize( ("template_content", "expected"), [ # Cases that we add the breaking space ("GOV.UK", "GOV.\u200BUK"), ("gov.uk", "gov.\u200Buk"), ( "content with space infront GOV.UK", "content with space infront GOV.\u200BUK", ), ("content with tab infront\tGOV.UK", "content with tab infront\tGOV.\u200BUK"), ( "content with newline infront\nGOV.UK", "content with newline infront\nGOV.\u200BUK", ), ("*GOV.UK", "*GOV.\u200BUK"), ("#GOV.UK", "#GOV.\u200BUK"), ("^GOV.UK", "^GOV.\u200BUK"), (" #GOV.UK", " #GOV.\u200BUK"), ("GOV.UK with CONTENT after", "GOV.\u200BUK with CONTENT after"), ("#GOV.UK with CONTENT after", "#GOV.\u200BUK with CONTENT after"), # Cases that we don't add the breaking space ("https://gov.uk", "https://gov.uk"), ("https://www.gov.uk", "https://www.gov.uk"), ("www.gov.uk", "www.gov.uk"), ("WWW.GOV.UK", "WWW.GOV.UK"), ("WWW.GOV.UK.", "WWW.GOV.UK."), ( "https://www.gov.uk/?utm_source=gov.uk", "https://www.gov.uk/?utm_source=gov.uk", ), ("mygov.uk", "mygov.uk"), ("www.this-site-is-not-gov.uk", "www.this-site-is-not-gov.uk"), ( "www.gov.uk?websites=bbc.co.uk;gov.uk;nsh.scot", "www.gov.uk?websites=bbc.co.uk;gov.uk;nsh.scot", ), ("reply to: xxxx@xxx.gov.uk", "reply to: xxxx@xxx.gov.uk"), ("southwark.gov.uk", "southwark.gov.uk"), ("data.gov.uk", "data.gov.uk"), ("gov.uk/foo", "gov.uk/foo"), ("*GOV.UK/foo", "*GOV.UK/foo"), ("#GOV.UK/foo", "#GOV.UK/foo"), ("^GOV.UK/foo", "^GOV.UK/foo"), ("gov.uk#departments-and-policy", "gov.uk#departments-and-policy"), # Cases that we know currently aren't supported by our regex and have a non breaking space added when they # shouldn't however, we accept the fact that our regex isn't perfect as we think the chance of a user using a # URL like this in their content is very small. # We document these edge cases here pytest.param("gov.uk.com", "gov.uk.com", marks=pytest.mark.xfail), pytest.param("gov.ukandi.com", "gov.ukandi.com", marks=pytest.mark.xfail), pytest.param("gov.uks", "gov.uks", marks=pytest.mark.xfail), ], ) def test_unlink_govuk_escaped(template_content, expected): assert unlink_govuk_escaped(template_content) == expected @pytest.mark.parametrize( ("prefix", "body", "expected"), [ ("a", "b", "a: b"), (None, "b", "b"), ], ) def test_sms_message_adds_prefix(prefix, body, expected): template = SMSMessageTemplate({"content": body, "template_type": "sms"}) template.prefix = prefix template.sender = None assert str(template) == expected def test_sms_preview_adds_newlines(): template = SMSPreviewTemplate( { "content": """ the quick brown fox """, "template_type": "sms", } ) template.prefix = None template.sender = None assert "
" in str(template) def test_sms_encode(mocker): sanitise_mock = mocker.patch("notifications_utils.formatters.SanitiseSMS") assert sms_encode("foo") == sanitise_mock.encode.return_value sanitise_mock.encode.assert_called_once_with("foo") @pytest.mark.parametrize( ("items", "kwargs", "expected_output"), [ ([1], {}, "‘1’"), ([1, 2], {}, "‘1’ and ‘2’"), ([1, 2, 3], {}, "‘1’, ‘2’ and ‘3’"), ([1, 2, 3], {"prefix": "foo", "prefix_plural": "bar"}, "bar ‘1’, ‘2’ and ‘3’"), ([1], {"prefix": "foo", "prefix_plural": "bar"}, "foo ‘1’"), ([1, 2, 3], {"before_each": "a", "after_each": "b"}, "a1b, a2b and a3b"), ([1, 2, 3], {"conjunction": "foo"}, "‘1’, ‘2’ foo ‘3’"), (["&"], {"before_each": "", "after_each": ""}, "&"), ( [1, 2, 3], {"before_each": "", "after_each": ""}, "1, 2 and 3", ), ], ) def test_formatted_list(items, kwargs, expected_output): assert formatted_list(items, **kwargs) == expected_output def test_formatted_list_returns_markup(): assert isinstance(formatted_list([0]), Markup) def test_bleach_doesnt_try_to_make_valid_html_before_cleaning(): assert escape_html("") == ( "<to cancel daily cat facts reply 'cancel'>" ) @pytest.mark.parametrize( ("content", "expected_escaped"), [ ("&?a;", "&?a;"), ("&>a;", "&>a;"), ("&*a;", "&*a;"), ("&a?;", "&a?;"), ("&x?xa;", "&x?xa;"), # We need to be careful that query arguments don’t get turned into entities ("×tamp=×", "&timestamp=×"), ("×=1,2,3", "&times=1,2,3"), # − should have a trailing semicolon according to the HTML5 # spec but µ doesn’t need one ("2−1", "2−1"), ("200µg", "200µg"), # …we ignore it when it’s ambiguous ("2&minus1", "2&minus1"), ("200µg", "200&microg"), # …we still ignore when there’s a space afterwards ("2 &minus 1", "2 &minus 1"), ("200µ g", "200&micro g"), # Things which aren’t real entities are ignored, not removed ("This &isnotarealentity;", "This &isnotarealentity;"), # We let users use   for backwards compatibility ("Before after", "Before after"), # We let users use & because it’s often pasted in URLs ("?a=1&b=2", "?a=1&b=2"), # We let users use ( and ) because otherwise it’s # impossible to put brackets in the body of conditional placeholders ("((var??(in brackets)))", "((var??(in brackets)))"), ], ) def test_escaping_html_entities( content, expected_escaped, ): assert escape_html(content) == expected_escaped @pytest.mark.parametrize( ("dirty", "clean"), [ ( "Hello ((name)) ,\n\nThis is a message", "Hello ((name)),\n\nThis is a message", ), ("Hello Jo ,\n\nThis is a message", "Hello Jo,\n\nThis is a message"), ( "\n \t , word", "\n, word", ), ], ) def test_removing_whitespace_before_commas(dirty, clean): assert remove_whitespace_before_punctuation(dirty) == clean @pytest.mark.parametrize( ("dirty", "clean"), [ ( "Hello ((name)) .\n\nThis is a message", "Hello ((name)).\n\nThis is a message", ), ("Hello Jo .\n\nThis is a message", "Hello Jo.\n\nThis is a message"), ( "\n \t . word", "\n. word", ), ], ) def test_removing_whitespace_before_full_stops(dirty, clean): assert remove_whitespace_before_punctuation(dirty) == clean @pytest.mark.parametrize( ("dumb", "smart"), [ ( """And I said, "what about breakfast at Tiffany's"?""", """And I said, “what about breakfast at Tiffany’s”?""", ), ( """ http://example.com?q='foo' """, """ http://example.com?q='foo' """, ), ], ) def test_smart_quotes(dumb, smart): assert make_quotes_smart(dumb) == smart @pytest.mark.parametrize( ("nasty", "nice"), [ ( ( "The en dash - always with spaces in running text when, as " "discussed in this section, indicating a parenthesis or " "pause - and the spaced em dash both have a certain " "technical advantage over the unspaced em dash. " ), ( "The en dash \u2013 always with spaces in running text when, as " "discussed in this section, indicating a parenthesis or " "pause \u2013 and the spaced em dash both have a certain " "technical advantage over the unspaced em dash. " ), ), ( "double -- dash", "double \u2013 dash", ), ( "triple --- dash", "triple \u2013 dash", ), ( "quadruple ---- dash", "quadruple ---- dash", ), ( "em — dash", "em – dash", ), ( "already\u0020–\u0020correct", # \u0020 is a normal space character "already\u0020–\u0020correct", ), ( "2004-2008", "2004-2008", # no replacement ), ], ) def test_en_dashes(nasty, nice): assert replace_hyphens_with_en_dashes(nasty) == nice def test_unicode_dash_lookup(): en_dash_replacement_sequence = "\u0020\u2013" hyphen = "-" en_dash = "–" space = " " non_breaking_space = " " assert en_dash_replacement_sequence == space + en_dash assert non_breaking_space not in en_dash_replacement_sequence assert hyphen not in en_dash_replacement_sequence @pytest.mark.parametrize( "value", [ "bar", " bar ", """ \t bar """, " \u180E\u200B \u200C bar \u200D \u2060\uFEFF ", ], ) def test_strip_all_whitespace(value): assert strip_all_whitespace(value) == "bar" @pytest.mark.parametrize( "value", [ "notifications-email", " \tnotifications-email \x0c ", "\rn\u200Coti\u200Dfi\u200Bcati\u2060ons-\u180Eemai\uFEFFl\uFEFF", ], ) def test_strip_and_remove_obscure_whitespace(value): assert strip_and_remove_obscure_whitespace(value) == "notifications-email" def test_strip_and_remove_obscure_whitespace_only_removes_normal_whitespace_from_ends(): sentence = " words \n over multiple lines with \ttabs\t " assert ( strip_and_remove_obscure_whitespace(sentence) == "words \n over multiple lines with \ttabs" ) def test_remove_smart_quotes_from_email_addresses(): assert ( remove_smart_quotes_from_email_addresses( """ line one’s quote first.o’last@example.com is someone’s email address line ‘three’ """ ) == ( """ line one’s quote first.o'last@example.com is someone’s email address line ‘three’ """ ) ) def test_strip_unsupported_characters(): assert strip_unsupported_characters("line one\u2028line two") == ( "line oneline two" ) @pytest.mark.parametrize( "value", [ "\u200C Your tax is\ndue\n\n", " Your tax is due ", # Non breaking spaces replaced by single spaces "\u00A0Your\u00A0tax\u00A0 is\u00A0\u00A0due\u00A0", # zero width spaces are removed "\u180EYour \u200Btax\u200C is \u200D\u2060due \uFEFF", # tabs are replaced by single spaces "\tYour tax\tis due ", ], ) def test_normalise_whitespace(value): assert normalise_whitespace(value) == "Your tax is due" @pytest.mark.parametrize( ("content", "expected_html"), [ ( "http://example.com", 'http://example.com', ), ( "https://example.com", 'https://example.com', ), ( "example.com", 'example.com', ), ( "www.foo.bar.example.com", 'www.foo.bar.example.com', ), ( "example.com/", 'example.com/', ), ( "www.foo.bar.example.com/", 'www.foo.bar.example.com/', ), ( "example.com/foo", 'example.com/foo', ), ( "example.com?foo", 'example.com?foo', ), ( "example.com#foo", 'example.com#foo', ), ( "Go to gov.uk/example.", "Go to " 'gov.uk/example.', ), ( "Go to gov.uk/example:", "Go to " 'gov.uk/example:', ), ( "Go to gov.uk/example;", "Go to " 'gov.uk/example;', ), ( "(gov.uk/example)", "(" 'gov.uk/example)', ), ( "(gov.uk/example)...", "(" 'gov.uk/example)...', ), ( "(gov.uk/example.)", "(" 'gov.uk/example.)', ), ( "(see example.com/foo_(bar))", "(see " 'example.com/foo_(bar))', ), ( "example.com/foo(((((((bar", 'example.com/foo(((((((bar', ), ( "government website (gov.uk). Other websites…", "government website (" 'gov.uk). Other websites…', ), ( "[gov.uk/example]", "[" 'gov.uk/example]', ), ( "gov.uk/foo, gov.uk/bar", 'gov.uk/foo, ' 'gov.uk/bar', ), ( "

gov.uk/foo

", "

" 'gov.uk/foo

', ), ( "gov.uk?foo&", 'gov.uk?foo&', ), ( "a .service.gov.uk domain", "a .service.gov.uk domain", ), ( 'http://foo.com/"bar"?x=1#2', 'http://foo.com/"bar"?x=1#2', ), ( "firstname.lastname@example.com", "firstname.lastname@example.com", ), ( "with-subdomain@test.example.com", "with-subdomain@test.example.com", ), ], ) def test_autolink_urls_matches_correctly(content, expected_html): assert autolink_urls(content) == expected_html @pytest.mark.parametrize( ("extra_kwargs", "expected_html"), [ ( {}, 'http://example.com', ), ( { "classes": "govuk-link", }, 'http://example.com', ), ], ) def test_autolink_urls_applies_correct_attributes(extra_kwargs, expected_html): assert autolink_urls("http://example.com", **extra_kwargs) == expected_html @pytest.mark.parametrize( "content", ["without link", "with link to https://example.com"] ) def test_autolink_urls_returns_markup(content): assert isinstance(autolink_urls(content), Markup)