This commit is contained in:
Kenneth Kehl
2025-03-31 08:45:33 -07:00
parent 5741c572f0
commit 7cd8be22f6
13 changed files with 322 additions and 313 deletions

View File

@@ -15,13 +15,13 @@ class SanitiseText:
"": "'", # RIGHT SINGLE QUOTATION MARK (U+2019)
"": '"', # LEFT DOUBLE QUOTATION MARK (U+201C)
"": '"', # RIGHT DOUBLE QUOTATION MARK (U+201D)
"\u180E": "", # Mongolian vowel separator
"\u200B": "", # zero width space
"\u200C": "", # zero width non-joiner
"\u200D": "", # zero width joiner
"\u180e": "", # Mongolian vowel separator
"\u200b": "", # zero width space
"\u200c": "", # zero width non-joiner
"\u200d": "", # zero width joiner
"\u2060": "", # word joiner
"\uFEFF": "", # zero width non-breaking space
"\u00A0": " ", # NON BREAKING WHITE SPACE (U+200B)
"\ufeff": "", # zero width non-breaking space
"\u00a0": " ", # NON BREAKING WHITE SPACE (U+200B)
"\t": " ", # TAB
}