mirror of
https://github.com/GSA/notifications-admin.git
synced 2026-02-06 11:23:48 -05:00
Make the guessing a bit more sophisticated
Things we’ve noticed from looking at real data that we could handle in a smarter way: - removing numbers (there might be a tom.smith2@dept.gov.uk if tom.smith is already taken) - removing middle initials (again, these tend to be used for disambiguation and aren’t included when we ask people for their names) - ignoring email addresses which only have someone’s initial, not their first name (because we can’t make a decent guess in this case)
This commit is contained in:
35
app/utils.py
35
app/utils.py
@@ -27,6 +27,7 @@ from flask import (
|
||||
from flask_login import current_user
|
||||
from notifications_utils.formatters import make_quotes_smart
|
||||
from notifications_utils.recipients import RecipientCSV
|
||||
from notifications_utils.take import Take
|
||||
from notifications_utils.template import (
|
||||
EmailPreviewTemplate,
|
||||
LetterImageTemplate,
|
||||
@@ -616,11 +617,41 @@ def unicode_truncate(s, length):
|
||||
return encoded.decode('utf-8', 'ignore')
|
||||
|
||||
|
||||
def starts_with_initial(name):
|
||||
return bool(re.match(r'^.\.', name))
|
||||
|
||||
|
||||
def remove_middle_initial(name):
|
||||
return re.sub(r'\s+.\s+', ' ', name)
|
||||
|
||||
|
||||
def remove_digits(name):
|
||||
return ''.join(c for c in name if not c.isdigit())
|
||||
|
||||
|
||||
def normalize_spaces(name):
|
||||
return ' '.join(name.split())
|
||||
|
||||
|
||||
def guess_name_from_email_address(email_address):
|
||||
|
||||
possible_name = re.split(r'[\@\+]', email_address)[0]
|
||||
|
||||
if '.' not in possible_name:
|
||||
if '.' not in possible_name or starts_with_initial(possible_name):
|
||||
return ''
|
||||
|
||||
return make_quotes_smart(possible_name.replace('.', ' ').title())
|
||||
return Take(
|
||||
possible_name
|
||||
).then(
|
||||
str.replace, '.', ' '
|
||||
).then(
|
||||
remove_digits
|
||||
).then(
|
||||
remove_middle_initial
|
||||
).then(
|
||||
str.title
|
||||
).then(
|
||||
make_quotes_smart
|
||||
).then(
|
||||
normalize_spaces
|
||||
)
|
||||
|
||||
@@ -171,13 +171,23 @@ def test_register_with_existing_email_sends_emails(
|
||||
|
||||
|
||||
@pytest.mark.parametrize('email_address, expected_value', [
|
||||
("example123@example.com", ""),
|
||||
("first.last@example.com", "First Last"),
|
||||
("first.middle.last@example.com", "First Middle Last"),
|
||||
("first.m.last@example.com", "First Last"),
|
||||
("first.last-last@example.com", "First Last-Last"),
|
||||
("first.o'last@example.com", "First O’Last"),
|
||||
("first.last+testing@example.com", "First Last"),
|
||||
("first.last+testing+testing@example.com", "First Last"),
|
||||
("first.last6@example.com", "First Last"),
|
||||
("first.last.212@example.com", "First Last"),
|
||||
("first.2.last@example.com", "First Last"),
|
||||
("first.2b.last@example.com", "First Last"),
|
||||
("first.1.2.3.last@example.com", "First Last"),
|
||||
("first.last.1.2.3@example.com", "First Last"),
|
||||
# Instances where we can’t make a good-enough guess:
|
||||
("example123@example.com", ""),
|
||||
("f.last@example.com", ""),
|
||||
("f.m.last@example.com", ""),
|
||||
])
|
||||
def test_shows_registration_page_from_invite(
|
||||
client_request,
|
||||
|
||||
Reference in New Issue
Block a user