ensure robustness of email_safe function

* remove leading, trailing, or consecutive periods
* strip unicode accents, umlauts, diacritics etc
This commit is contained in:
Leo Hemsted
2016-10-27 17:31:13 +01:00
parent 847bb8ed57
commit a5d228d837
2 changed files with 25 additions and 9 deletions

View File

@@ -3,8 +3,11 @@ import csv
from io import StringIO
from os import path
from functools import wraps
import unicodedata
from flask import (abort, current_app, session, request, redirect, url_for)
from flask_login import current_user
import pyexcel
import pyexcel.ext.io
import pyexcel.ext.xls
@@ -141,10 +144,14 @@ def generate_previous_next_dict(view, service_id, page, title, url_args):
def email_safe(string, whitespace='.'):
return "".join([
character.lower() if character.isalnum() or character == whitespace else ""
for character in re.sub(r"\s+", whitespace, string.strip())
])
# strips accents, diacritics etc
string = ''.join(c for c in unicodedata.normalize('NFD', string) if unicodedata.category(c) != 'Mn')
string = ''.join(
word.lower() if word.isalnum() or word == whitespace else ''
for word in re.sub(r'\s+', whitespace, string.strip())
)
string = re.sub(r'\.{2,}', '.', string)
return string.strip('.')
class Spreadsheet():

View File

@@ -8,11 +8,20 @@ from freezegun import freeze_time
from app.utils import email_safe, generate_notifications_csv, generate_previous_dict, generate_next_dict, Spreadsheet
def test_email_safe_return_dot_separated_email_domain():
test_name = 'SOME service with+stuff+ b123'
expected = 'some.service.withstuff.b123'
actual = email_safe(test_name)
assert actual == expected
@pytest.mark.parametrize('service_name, safe_email', [
('name with spaces', 'name.with.spaces'),
('singleword', 'singleword'),
('UPPER CASE', 'upper.case'),
('Service - with dash', 'service.with.dash'),
('lots of spaces', 'lots.of.spaces'),
('name.with.dots', 'name.with.dots'),
('name-with-other-delimiters', 'namewithotherdelimiters'),
('.leading', 'leading'),
('trailing.', 'trailing'),
('üńïçödë wördś', 'unicode.words'),
])
def test_email_safe_return_dot_separated_email_domain(service_name, safe_email):
assert email_safe(service_name) == safe_email
@pytest.mark.parametrize(