mirror of
https://github.com/GSA/notifications-admin.git
synced 2026-05-02 23:20:56 -04:00
Make the guessing a bit more sophisticated
Things we’ve noticed from looking at real data that we could handle in a smarter way: - removing numbers (there might be a tom.smith2@dept.gov.uk if tom.smith is already taken) - removing middle initials (again, these tend to be used for disambiguation and aren’t included when we ask people for their names) - ignoring email addresses which only have someone’s initial, not their first name (because we can’t make a decent guess in this case)
This commit is contained in:
35
app/utils.py
35
app/utils.py
@@ -27,6 +27,7 @@ from flask import (
|
||||
from flask_login import current_user
|
||||
from notifications_utils.formatters import make_quotes_smart
|
||||
from notifications_utils.recipients import RecipientCSV
|
||||
from notifications_utils.take import Take
|
||||
from notifications_utils.template import (
|
||||
EmailPreviewTemplate,
|
||||
LetterImageTemplate,
|
||||
@@ -616,11 +617,41 @@ def unicode_truncate(s, length):
|
||||
return encoded.decode('utf-8', 'ignore')
|
||||
|
||||
|
||||
def starts_with_initial(name):
|
||||
return bool(re.match(r'^.\.', name))
|
||||
|
||||
|
||||
def remove_middle_initial(name):
|
||||
return re.sub(r'\s+.\s+', ' ', name)
|
||||
|
||||
|
||||
def remove_digits(name):
|
||||
return ''.join(c for c in name if not c.isdigit())
|
||||
|
||||
|
||||
def normalize_spaces(name):
|
||||
return ' '.join(name.split())
|
||||
|
||||
|
||||
def guess_name_from_email_address(email_address):
|
||||
|
||||
possible_name = re.split(r'[\@\+]', email_address)[0]
|
||||
|
||||
if '.' not in possible_name:
|
||||
if '.' not in possible_name or starts_with_initial(possible_name):
|
||||
return ''
|
||||
|
||||
return make_quotes_smart(possible_name.replace('.', ' ').title())
|
||||
return Take(
|
||||
possible_name
|
||||
).then(
|
||||
str.replace, '.', ' '
|
||||
).then(
|
||||
remove_digits
|
||||
).then(
|
||||
remove_middle_initial
|
||||
).then(
|
||||
str.title
|
||||
).then(
|
||||
make_quotes_smart
|
||||
).then(
|
||||
normalize_spaces
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user