Pull main

This commit is contained in:
Andrew Shumway
2024-06-25 09:05:52 -06:00
5 changed files with 55 additions and 40 deletions

View File

@@ -51,7 +51,6 @@ from app.utils.templates import get_template
from app.utils.user import user_has_permissions
from notifications_utils import SMS_CHAR_COUNT_LIMIT
from notifications_utils.insensitive_dict import InsensitiveDict
from notifications_utils.logging import scrub
from notifications_utils.recipients import RecipientCSV, first_column_headings
from notifications_utils.sanitise_text import SanitiseASCII
@@ -953,9 +952,6 @@ def send_notification(service_id, template_id):
)
)
current_app.logger.info(
hilite(scrub(f"Recipient for the one-off will be {recipient}"))
)
keys = []
values = []
for k, v in session["placeholders"].items():
@@ -971,6 +967,12 @@ def send_notification(service_id, template_id):
)
my_data = {"filename": filename, "template_id": template_id, "data": data}
upload_id = s3upload(service_id, my_data)
# To debug messages that the user reports have not been sent, we log
# the csv filename and the job id. The user will give us the file name,
# so we can search on that to obtain the job id, which we can use elsewhere
# on the API side to find out what happens to the message.
current_app.logger.info(hilite(f"One-off file: {filename} job_id: {upload_id}"))
form = CsvUploadForm()
form.file.data = my_data
form.file.name = filename
@@ -989,19 +991,6 @@ def send_notification(service_id, template_id):
valid="True",
)
# Here we are attempting to cleverly link the job id to the one-off recipient
# If we know the partial phone number of the recipient, we can search
# on that initially and find this, which will give us the job_id
# And once we know the job_id, we can search on that and it might tell us something
# about report generation.
current_app.logger.info(
hilite(
scrub(
f"Created job to send one-off, recipient is {recipient}, job_id is {upload_id}"
)
)
)
session.pop("recipient")
session.pop("placeholders")
@@ -1033,7 +1022,12 @@ def send_notification(service_id, template_id):
job_id=upload_id,
)
)
total = notifications["total"]
current_app.logger.info(
hilite(
f"job_id: {upload_id} has notifications: {total} and attempts: {attempts}"
)
)
return redirect(
url_for(
".view_job",

View File

@@ -7,7 +7,6 @@ from flask_login import current_user
from app.models.spreadsheet import Spreadsheet
from app.utils import hilite
from app.utils.templates import get_sample_template
from notifications_utils.logging import scrub
from notifications_utils.recipients import RecipientCSV
@@ -74,12 +73,11 @@ def generate_notifications_csv(**kwargs):
# This generates the "batch" csv report
if kwargs.get("job_id"):
# The kwargs contain the job id, which is linked to the recipient's partial phone number in other debug
# Some unit tests are mocking the kwargs and turning them into a function instead of dict,
# hence the try/except.
try:
current_app.logger.info(
hilite(f"Setting up report with kwargs {scrub(json.dumps(kwargs))}")
hilite(f"Setting up report with kwargs {json.dumps(kwargs)}")
)
except TypeError:
pass
@@ -89,7 +87,7 @@ def generate_notifications_csv(**kwargs):
# we display to 999 characters, because we don't want to show the contents for reports with thousands of rows.
current_app.logger.info(
hilite(
f"Original csv for job_id {kwargs['job_id']}: {scrub(original_file_contents[0:999])}"
f"Original csv for job_id {kwargs['job_id']}: {original_file_contents[0:999]}"
)
)
original_upload = RecipientCSV(

View File

@@ -1,6 +1,6 @@
env: production
instances: 2
memory: 1G
memory: 2G
public_admin_route: beta.notify.gov
cloud_dot_gov_route: notify.app.cloud.gov
redis_enabled: 1

View File

@@ -70,6 +70,7 @@ def configure_handler(handler, app, formatter):
handler.addFilter(AppNameFilter(app.config["NOTIFY_APP_NAME"]))
handler.addFilter(RequestIdFilter())
handler.addFilter(ServiceIdFilter())
handler.addFilter(PIIFilter())
return handler
@@ -134,13 +135,30 @@ class JSONFormatter(BaseJSONFormatter):
return log_record
def scrub(msg):
# Eventually we want to scrub all messages in all logs for phone numbers
# and email addresses, masking them. Ultimately this will probably get
# refactored into a 'SafeLogger' subclass or something, but let's start here
# with phones.
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, f"1XXXXX{phone[-5:]}")
return msg
class PIIFilter(logging.Filter):
def scrub(self, msg):
# Eventually we want to scrub all messages in all logs for phone numbers
# and email addresses, masking them. Ultimately this will probably get
# refactored into a 'SafeLogger' subclass or something, but let's start here
# with phones.
# Sometimes just an exception object is passed in for the message, skip those.
if not isinstance(msg, str):
return msg
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, "1XXXXXXXXXX")
emails = re.findall(
r"[\w\.-]+@[\w\.-]+", msg
) # ['alice@google.com', 'bob@abc.com']
for email in emails:
# do something with each found email string
masked_email = "XXXXX@XXXXXXX"
msg = msg.replace(email, masked_email)
return msg
def filter(self, record):
record.msg = self.scrub(record.msg)
return record

View File

@@ -51,11 +51,16 @@ def test_base_json_formatter_contains_service_id():
assert service_id_filter.filter(record).service_id == "no-service-id"
def test_scrub():
result = logging.scrub(
"This is a message with 17775554324, and also 18884449323 and also 17775554324"
)
assert (
result
== "This is a message with 1XXXXX54324, and also 1XXXXX49323 and also 1XXXXX54324"
def test_pii_filter():
record = builtin_logging.LogRecord(
name="log thing",
level="info",
pathname="path",
lineno=123,
msg="phone1: 1555555555, phone2: 1555555554, email1: fake@fake.gov, email2: fake@fake2.fake.gov",
exc_info=None,
args=None,
)
pii_filter = logging.PIIFilter()
clean_msg = "phone1: 1XXXXXXXXXX, phone2: 1XXXXXXXXXX, email1: XXXXX@XXXXXXX, email2: XXXXX@XXXXXXX"
assert pii_filter.filter(record).msg == clean_msg