This commit is contained in:
Kenneth Kehl
2025-04-15 11:36:09 -07:00
parent d1fab496f4
commit e93e3f3690
22 changed files with 96 additions and 302 deletions

View File

@@ -12,15 +12,15 @@ from notifications_utils.sanitise_text import SanitiseSMS
from . import email_with_smart_quotes_regex
OBSCURE_ZERO_WIDTH_WHITESPACE = (
"\u180E" # Mongolian vowel separator
"\u200B" # zero width space
"\u200C" # zero width non-joiner
"\u200D" # zero width joiner
"\u180e" # Mongolian vowel separator
"\u200b" # zero width space
"\u200c" # zero width non-joiner
"\u200d" # zero width joiner
"\u2060" # word joiner
"\uFEFF" # zero width non-breaking space
"\ufeff" # zero width non-breaking space
)
OBSCURE_FULL_WIDTH_WHITESPACE = "\u00A0" # non breaking space
OBSCURE_FULL_WIDTH_WHITESPACE = "\u00a0" # non breaking space
ALL_WHITESPACE = (
string.whitespace + OBSCURE_ZERO_WIDTH_WHITESPACE + OBSCURE_FULL_WIDTH_WHITESPACE
@@ -61,7 +61,7 @@ more_than_two_newlines_in_a_row = re.compile(r"\n{3,}")
def unlink_govuk_escaped(message):
return re.sub(
govuk_not_a_link,
r"\1\2\3" + ".\u200B" + r"\4", # Unicode zero-width space
r"\1\2\3" + ".\u200b" + r"\4", # Unicode zero-width space
message,
)

View File

@@ -15,13 +15,13 @@ class SanitiseText:
"": "'", # RIGHT SINGLE QUOTATION MARK (U+2019)
"": '"', # LEFT DOUBLE QUOTATION MARK (U+201C)
"": '"', # RIGHT DOUBLE QUOTATION MARK (U+201D)
"\u180E": "", # Mongolian vowel separator
"\u200B": "", # zero width space
"\u200C": "", # zero width non-joiner
"\u200D": "", # zero width joiner
"\u180e": "", # Mongolian vowel separator
"\u200b": "", # zero width space
"\u200c": "", # zero width non-joiner
"\u200d": "", # zero width joiner
"\u2060": "", # word joiner
"\uFEFF": "", # zero width non-breaking space
"\u00A0": " ", # NON BREAKING WHITE SPACE (U+200B)
"\ufeff": "", # zero width non-breaking space
"\u00a0": " ", # NON BREAKING WHITE SPACE (U+200B)
"\t": " ", # TAB
}