diff --git a/app/main/views/send.py b/app/main/views/send.py index 8bb6ce24c..bb63f10a9 100644 --- a/app/main/views/send.py +++ b/app/main/views/send.py @@ -51,7 +51,6 @@ from app.utils.templates import get_template from app.utils.user import user_has_permissions from notifications_utils import SMS_CHAR_COUNT_LIMIT from notifications_utils.insensitive_dict import InsensitiveDict -from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV, first_column_headings from notifications_utils.sanitise_text import SanitiseASCII @@ -953,9 +952,6 @@ def send_notification(service_id, template_id): ) ) - current_app.logger.info( - hilite(scrub(f"Recipient for the one-off will be {recipient}")) - ) keys = [] values = [] for k, v in session["placeholders"].items(): @@ -971,6 +967,12 @@ def send_notification(service_id, template_id): ) my_data = {"filename": filename, "template_id": template_id, "data": data} upload_id = s3upload(service_id, my_data) + + # To debug messages that the user reports have not been sent, we log + # the csv filename and the job id. The user will give us the file name, + # so we can search on that to obtain the job id, which we can use elsewhere + # on the API side to find out what happens to the message. + current_app.logger.info(hilite(f"One-off file: {filename} job_id: {upload_id}")) form = CsvUploadForm() form.file.data = my_data form.file.name = filename @@ -989,19 +991,6 @@ def send_notification(service_id, template_id): valid="True", ) - # Here we are attempting to cleverly link the job id to the one-off recipient - # If we know the partial phone number of the recipient, we can search - # on that initially and find this, which will give us the job_id - # And once we know the job_id, we can search on that and it might tell us something - # about report generation. - current_app.logger.info( - hilite( - scrub( - f"Created job to send one-off, recipient is {recipient}, job_id is {upload_id}" - ) - ) - ) - session.pop("recipient") session.pop("placeholders") @@ -1033,7 +1022,12 @@ def send_notification(service_id, template_id): job_id=upload_id, ) ) - + total = notifications["total"] + current_app.logger.info( + hilite( + f"job_id: {upload_id} has notifications: {total} and attempts: {attempts}" + ) + ) return redirect( url_for( ".view_job", diff --git a/app/utils/csv.py b/app/utils/csv.py index 5c5b794de..4ed6d16b5 100644 --- a/app/utils/csv.py +++ b/app/utils/csv.py @@ -7,7 +7,6 @@ from flask_login import current_user from app.models.spreadsheet import Spreadsheet from app.utils import hilite from app.utils.templates import get_sample_template -from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV @@ -74,12 +73,11 @@ def generate_notifications_csv(**kwargs): # This generates the "batch" csv report if kwargs.get("job_id"): - # The kwargs contain the job id, which is linked to the recipient's partial phone number in other debug # Some unit tests are mocking the kwargs and turning them into a function instead of dict, # hence the try/except. try: current_app.logger.info( - hilite(f"Setting up report with kwargs {scrub(json.dumps(kwargs))}") + hilite(f"Setting up report with kwargs {json.dumps(kwargs)}") ) except TypeError: pass @@ -89,7 +87,7 @@ def generate_notifications_csv(**kwargs): # we display to 999 characters, because we don't want to show the contents for reports with thousands of rows. current_app.logger.info( hilite( - f"Original csv for job_id {kwargs['job_id']}: {scrub(original_file_contents[0:999])}" + f"Original csv for job_id {kwargs['job_id']}: {original_file_contents[0:999]}" ) ) original_upload = RecipientCSV( diff --git a/deploy-config/production.yml b/deploy-config/production.yml index 18b5cfdcc..b38094f28 100644 --- a/deploy-config/production.yml +++ b/deploy-config/production.yml @@ -1,6 +1,6 @@ env: production instances: 2 -memory: 1G +memory: 2G public_admin_route: beta.notify.gov cloud_dot_gov_route: notify.app.cloud.gov redis_enabled: 1 diff --git a/notifications_utils/logging.py b/notifications_utils/logging.py index 7c56a00ad..4abe5bcf7 100644 --- a/notifications_utils/logging.py +++ b/notifications_utils/logging.py @@ -70,6 +70,7 @@ def configure_handler(handler, app, formatter): handler.addFilter(AppNameFilter(app.config["NOTIFY_APP_NAME"])) handler.addFilter(RequestIdFilter()) handler.addFilter(ServiceIdFilter()) + handler.addFilter(PIIFilter()) return handler @@ -134,13 +135,30 @@ class JSONFormatter(BaseJSONFormatter): return log_record -def scrub(msg): - # Eventually we want to scrub all messages in all logs for phone numbers - # and email addresses, masking them. Ultimately this will probably get - # refactored into a 'SafeLogger' subclass or something, but let's start here - # with phones. - phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) - phones = [phone.replace("-", "").replace(" ", "") for phone in phones] - for phone in phones: - msg = msg.replace(phone, f"1XXXXX{phone[-5:]}") - return msg +class PIIFilter(logging.Filter): + def scrub(self, msg): + # Eventually we want to scrub all messages in all logs for phone numbers + # and email addresses, masking them. Ultimately this will probably get + # refactored into a 'SafeLogger' subclass or something, but let's start here + # with phones. + + # Sometimes just an exception object is passed in for the message, skip those. + if not isinstance(msg, str): + return msg + phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) + phones = [phone.replace("-", "").replace(" ", "") for phone in phones] + for phone in phones: + msg = msg.replace(phone, "1XXXXXXXXXX") + + emails = re.findall( + r"[\w\.-]+@[\w\.-]+", msg + ) # ['alice@google.com', 'bob@abc.com'] + for email in emails: + # do something with each found email string + masked_email = "XXXXX@XXXXXXX" + msg = msg.replace(email, masked_email) + return msg + + def filter(self, record): + record.msg = self.scrub(record.msg) + return record diff --git a/tests/notifications_utils/test_logging.py b/tests/notifications_utils/test_logging.py index 858b9352b..767c1f6f8 100644 --- a/tests/notifications_utils/test_logging.py +++ b/tests/notifications_utils/test_logging.py @@ -51,11 +51,16 @@ def test_base_json_formatter_contains_service_id(): assert service_id_filter.filter(record).service_id == "no-service-id" -def test_scrub(): - result = logging.scrub( - "This is a message with 17775554324, and also 18884449323 and also 17775554324" - ) - assert ( - result - == "This is a message with 1XXXXX54324, and also 1XXXXX49323 and also 1XXXXX54324" +def test_pii_filter(): + record = builtin_logging.LogRecord( + name="log thing", + level="info", + pathname="path", + lineno=123, + msg="phone1: 1555555555, phone2: 1555555554, email1: fake@fake.gov, email2: fake@fake2.fake.gov", + exc_info=None, + args=None, ) + pii_filter = logging.PIIFilter() + clean_msg = "phone1: 1XXXXXXXXXX, phone2: 1XXXXXXXXXX, email1: XXXXX@XXXXXXX, email2: XXXXX@XXXXXXX" + assert pii_filter.filter(record).msg == clean_msg