From 9e78c5f57582f2cfd78920e852519b9749ca1ba2 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Wed, 25 Apr 2018 15:48:01 +0100 Subject: [PATCH] Strip obscure whitespace from form submissions We strip most whitespace as of: https://github.com/alphagov/notifications-admin/pull/1701 However we are still getting some bad email addresses through, for example one that had a leading zero-width space character. This means that the user sees a validation error; really we should just deal with the mess for them. So this commit also includes characters without Unicode character property "WSpace=Y" (which includes zero-width space) to those which are stripped from form submissions. List taken from here: https://en.wikipedia.org/wiki/Whitespace_character See issue and discussion here: https://bugs.python.org/issue13391 --- app/main/forms.py | 12 +++++++- tests/app/main/test_strip_whitespace_form.py | 32 ++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tests/app/main/test_strip_whitespace_form.py diff --git a/app/main/forms.py b/app/main/forms.py index 82791a443..1ee8d25e3 100644 --- a/app/main/forms.py +++ b/app/main/forms.py @@ -1,3 +1,4 @@ +import string import weakref from datetime import datetime, timedelta from itertools import chain @@ -40,6 +41,15 @@ from app.main.validators import ( ValidGovEmail, ) +OBSCURE_WHITESPACE = ( + '\u180E' # Mongolian vowel separator + '\u200B' # zero width space + '\u200C' # zero width non-joiner + '\u200D' # zero width joiner + '\u2060' # word joiner + '\uFEFF' # zero width non-breaking space +) + def get_time_value_and_label(future_time): return ( @@ -110,7 +120,7 @@ def email_address(label='Email address', gov_user=True): def strip_whitespace(value): if value is not None and hasattr(value, 'strip'): - return value.strip() + return value.strip(string.whitespace + OBSCURE_WHITESPACE) return value diff --git a/tests/app/main/test_strip_whitespace_form.py b/tests/app/main/test_strip_whitespace_form.py new file mode 100644 index 000000000..496d8e835 --- /dev/null +++ b/tests/app/main/test_strip_whitespace_form.py @@ -0,0 +1,32 @@ +import pytest +from wtforms import Form, StringField + +from app.main.forms import StripWhitespaceForm, StripWhitespaceStringField + + +class ExampleForm(StripWhitespaceForm): + foo = StringField('Foo') + + +class ExampleFormSpecialField(Form): + foo = StripWhitespaceStringField('foo') + + +@pytest.mark.parametrize('submitted_data', [ + 'bar', + ' bar ', + """ + \t bar + """, + ' \u180E\u200B \u200C bar \u200D \u2060\uFEFF ', +]) +@pytest.mark.parametrize('form', [ + ExampleForm, + ExampleFormSpecialField, +]) +def test_form_strips_all_whitespace( + app_, + form, + submitted_data, +): + assert form(foo=submitted_data).foo.data == 'bar'