From 096658ab75540b40f8eaf7d4fc0306b19325c6e3 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 11 Jun 2024 13:24:05 -0700 Subject: [PATCH 1/5] Need magic PII-free debugging method for Admin --- app/main/views/send.py | 9 ++----- app/utils/csv.py | 5 ++-- notifications_utils/logging.py | 33 ++++++++++++++++------- tests/app/main/views/test_dashboard.py | 28 +++++++++---------- tests/notifications_utils/test_logging.py | 19 ++++++++----- 5 files changed, 51 insertions(+), 43 deletions(-) diff --git a/app/main/views/send.py b/app/main/views/send.py index 8bb6ce24c..1dcb8fa59 100644 --- a/app/main/views/send.py +++ b/app/main/views/send.py @@ -51,7 +51,6 @@ from app.utils.templates import get_template from app.utils.user import user_has_permissions from notifications_utils import SMS_CHAR_COUNT_LIMIT from notifications_utils.insensitive_dict import InsensitiveDict -from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV, first_column_headings from notifications_utils.sanitise_text import SanitiseASCII @@ -953,9 +952,7 @@ def send_notification(service_id, template_id): ) ) - current_app.logger.info( - hilite(scrub(f"Recipient for the one-off will be {recipient}")) - ) + current_app.logger.info(hilite(f"Recipient for the one-off will be {recipient}")) keys = [] values = [] for k, v in session["placeholders"].items(): @@ -996,9 +993,7 @@ def send_notification(service_id, template_id): # about report generation. current_app.logger.info( hilite( - scrub( - f"Created job to send one-off, recipient is {recipient}, job_id is {upload_id}" - ) + f"Created job to send one-off, recipient is {recipient}, job_id is {upload_id}" ) ) diff --git a/app/utils/csv.py b/app/utils/csv.py index 5c5b794de..159234786 100644 --- a/app/utils/csv.py +++ b/app/utils/csv.py @@ -7,7 +7,6 @@ from flask_login import current_user from app.models.spreadsheet import Spreadsheet from app.utils import hilite from app.utils.templates import get_sample_template -from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV @@ -79,7 +78,7 @@ def generate_notifications_csv(**kwargs): # hence the try/except. try: current_app.logger.info( - hilite(f"Setting up report with kwargs {scrub(json.dumps(kwargs))}") + hilite(f"Setting up report with kwargs {json.dumps(kwargs)}") ) except TypeError: pass @@ -89,7 +88,7 @@ def generate_notifications_csv(**kwargs): # we display to 999 characters, because we don't want to show the contents for reports with thousands of rows. current_app.logger.info( hilite( - f"Original csv for job_id {kwargs['job_id']}: {scrub(original_file_contents[0:999])}" + f"Original csv for job_id {kwargs['job_id']}: {original_file_contents[0:999]}" ) ) original_upload = RecipientCSV( diff --git a/notifications_utils/logging.py b/notifications_utils/logging.py index 7c56a00ad..285aeee58 100644 --- a/notifications_utils/logging.py +++ b/notifications_utils/logging.py @@ -70,6 +70,7 @@ def configure_handler(handler, app, formatter): handler.addFilter(AppNameFilter(app.config["NOTIFY_APP_NAME"])) handler.addFilter(RequestIdFilter()) handler.addFilter(ServiceIdFilter()) + handler.addFilter(PIIFilter()) return handler @@ -134,13 +135,25 @@ class JSONFormatter(BaseJSONFormatter): return log_record -def scrub(msg): - # Eventually we want to scrub all messages in all logs for phone numbers - # and email addresses, masking them. Ultimately this will probably get - # refactored into a 'SafeLogger' subclass or something, but let's start here - # with phones. - phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) - phones = [phone.replace("-", "").replace(" ", "") for phone in phones] - for phone in phones: - msg = msg.replace(phone, f"1XXXXX{phone[-5:]}") - return msg +class PIIFilter(logging.Filter): + def scrub(self, msg): + # Eventually we want to scrub all messages in all logs for phone numbers + # and email addresses, masking them. Ultimately this will probably get + # refactored into a 'SafeLogger' subclass or something, but let's start here + # with phones. + phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) + phones = [phone.replace("-", "").replace(" ", "") for phone in phones] + for phone in phones: + msg = msg.replace(phone, f"1XXXXX{phone[-5:]}") + + emails = re.findall( + r"[\w\.-]+@[\w\.-]+", msg + ) # ['alice@google.com', 'bob@abc.com'] + for email in emails: + # do something with each found email string + msg = msg.replace(email, f"XXXXX{email[-10:]}") + return msg + + def filter(self, record): + record.msg = self.scrub(record.msg) + return record diff --git a/tests/app/main/views/test_dashboard.py b/tests/app/main/views/test_dashboard.py index 285444b1b..e83dacbae 100644 --- a/tests/app/main/views/test_dashboard.py +++ b/tests/app/main/views/test_dashboard.py @@ -1893,26 +1893,22 @@ def app_with_socketio(): ( SERVICE_ONE_ID, {"start_date": "2024-01-01", "days": 7}, - {"service_id": SERVICE_ONE_ID, "start_date": "2024-01-01", "days": 7} + {"service_id": SERVICE_ONE_ID, "start_date": "2024-01-01", "days": 7}, ), ( SERVICE_TWO_ID, {"start_date": "2023-06-01", "days": 7}, - {"service_id": SERVICE_TWO_ID, "start_date": "2023-06-01", "days": 7} + {"service_id": SERVICE_TWO_ID, "start_date": "2023-06-01", "days": 7}, ), - ] + ], ) def test_fetch_daily_stats( - app_with_socketio, mocker, - service_id, - date_range, - expected_call_args + app_with_socketio, mocker, service_id, date_range, expected_call_args ): app, socketio = app_with_socketio mocker.patch( - "app.main.views.dashboard.get_stats_date_range", - return_value=date_range + "app.main.views.dashboard.get_stats_date_range", return_value=date_range ) mock_service_api = mocker.patch( @@ -1920,9 +1916,9 @@ def test_fetch_daily_stats( return_value={ date_range["start_date"]: { "email": {"delivered": 0, "failure": 0, "requested": 0}, - "sms": {"delivered": 0, "failure": 1, "requested": 1} + "sms": {"delivered": 0, "failure": 1, "requested": 1}, }, - } + }, ) client = SocketIOTestClient(app, socketio) @@ -1930,22 +1926,22 @@ def test_fetch_daily_stats( connected = client.is_connected() assert connected, "Client should be connected" - client.emit('fetch_daily_stats', service_id) + client.emit("fetch_daily_stats", service_id) received = client.get_received() assert received, "Should receive a response message" - assert received[0]['name'] == 'daily_stats_update' - assert received[0]['args'][0] == { + assert received[0]["name"] == "daily_stats_update" + assert received[0]["args"][0] == { date_range["start_date"]: { "email": {"delivered": 0, "failure": 0, "requested": 0}, - "sms": {"delivered": 0, "failure": 1, "requested": 1} + "sms": {"delivered": 0, "failure": 1, "requested": 1}, }, } mock_service_api.assert_called_once_with( service_id, start_date=expected_call_args["start_date"], - days=expected_call_args["days"] + days=expected_call_args["days"], ) finally: client.disconnect() diff --git a/tests/notifications_utils/test_logging.py b/tests/notifications_utils/test_logging.py index 858b9352b..89b220324 100644 --- a/tests/notifications_utils/test_logging.py +++ b/tests/notifications_utils/test_logging.py @@ -51,11 +51,16 @@ def test_base_json_formatter_contains_service_id(): assert service_id_filter.filter(record).service_id == "no-service-id" -def test_scrub(): - result = logging.scrub( - "This is a message with 17775554324, and also 18884449323 and also 17775554324" - ) - assert ( - result - == "This is a message with 1XXXXX54324, and also 1XXXXX49323 and also 1XXXXX54324" +def test_pii_filter(): + record = builtin_logging.LogRecord( + name="log thing", + level="info", + pathname="path", + lineno=123, + msg="phone1: 1555555555, phone2: 1555555554, email1: fake@fake.gov, email2: fake@fake2.fake.gov", + exc_info=None, + args=None, ) + pii_filter = logging.PIIFilter() + clean_msg = "phone1: 1XXXXX55555, phone2: 1XXXXX55554, email1: XXXXXe@fake.gov, email2: XXXXX2.fake.gov" + assert pii_filter.filter(record).msg == clean_msg From f6f99b612460d0bdc70ced1d8acb2a54d79e8112 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Wed, 12 Jun 2024 10:20:06 -0700 Subject: [PATCH 2/5] fix bug where only exception is passed to logger --- notifications_utils/logging.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/notifications_utils/logging.py b/notifications_utils/logging.py index 285aeee58..71c82aacb 100644 --- a/notifications_utils/logging.py +++ b/notifications_utils/logging.py @@ -141,6 +141,10 @@ class PIIFilter(logging.Filter): # and email addresses, masking them. Ultimately this will probably get # refactored into a 'SafeLogger' subclass or something, but let's start here # with phones. + + # Sometimes just an exception object is passed in for the message, skip those. + if not isinstance(msg, str): + return msg phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) phones = [phone.replace("-", "").replace(" ", "") for phone in phones] for phone in phones: From 4e8e1acb5b17bd9b6da9506623945c0b0e61a2a1 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 13 Jun 2024 10:17:47 -0700 Subject: [PATCH 3/5] fix email masking algorithm --- notifications_utils/logging.py | 4 +++- tests/notifications_utils/test_logging.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/notifications_utils/logging.py b/notifications_utils/logging.py index 71c82aacb..ba5c862fe 100644 --- a/notifications_utils/logging.py +++ b/notifications_utils/logging.py @@ -155,7 +155,9 @@ class PIIFilter(logging.Filter): ) # ['alice@google.com', 'bob@abc.com'] for email in emails: # do something with each found email string - msg = msg.replace(email, f"XXXXX{email[-10:]}") + email_parts = email.split("@") + masked_email = f"{email_parts[0][0:3]}XXX@{email_parts[1][0:7]}XXX" + msg = msg.replace(email, masked_email) return msg def filter(self, record): diff --git a/tests/notifications_utils/test_logging.py b/tests/notifications_utils/test_logging.py index 89b220324..1b9f999c3 100644 --- a/tests/notifications_utils/test_logging.py +++ b/tests/notifications_utils/test_logging.py @@ -62,5 +62,5 @@ def test_pii_filter(): args=None, ) pii_filter = logging.PIIFilter() - clean_msg = "phone1: 1XXXXX55555, phone2: 1XXXXX55554, email1: XXXXXe@fake.gov, email2: XXXXX2.fake.gov" + clean_msg = "phone1: 1XXXXX55555, phone2: 1XXXXX55554, email1: fakXXX@fake.goXXX, email2: fakXXX@fake2.fXXX" assert pii_filter.filter(record).msg == clean_msg From 7e533731ed8c223e3e165c684a265971d4fdd0f5 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Mon, 17 Jun 2024 11:40:13 -0700 Subject: [PATCH 4/5] fix flake 8 --- app/main/views/send.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/main/views/send.py b/app/main/views/send.py index 3d8bcd512..bb63f10a9 100644 --- a/app/main/views/send.py +++ b/app/main/views/send.py @@ -991,7 +991,6 @@ def send_notification(service_id, template_id): valid="True", ) - session.pop("recipient") session.pop("placeholders") @@ -1023,8 +1022,12 @@ def send_notification(service_id, template_id): job_id=upload_id, ) ) - total = notifications['total'] - current_app.logger.info(hilite(f"job_id: {upload_id} has notifications: {total} and attempts: {attempts}")) + total = notifications["total"] + current_app.logger.info( + hilite( + f"job_id: {upload_id} has notifications: {total} and attempts: {attempts}" + ) + ) return redirect( url_for( ".view_job", From a887f3f5273ab97c7fe893a708a6bb0824e5d181 Mon Sep 17 00:00:00 2001 From: Carlo Costino Date: Mon, 24 Jun 2024 12:07:28 -0400 Subject: [PATCH 5/5] Increased production memory to 2 GB This changeset increases our memory for the API app and its corresponding workers to 2 GB each. Signed-off-by: Carlo Costino --- deploy-config/production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy-config/production.yml b/deploy-config/production.yml index 18b5cfdcc..b38094f28 100644 --- a/deploy-config/production.yml +++ b/deploy-config/production.yml @@ -1,6 +1,6 @@ env: production instances: 2 -memory: 1G +memory: 2G public_admin_route: beta.notify.gov cloud_dot_gov_route: notify.app.cloud.gov redis_enabled: 1