diff --git a/app/main/views/send.py b/app/main/views/send.py index 8bb6ce24c..1dcb8fa59 100644 --- a/app/main/views/send.py +++ b/app/main/views/send.py @@ -51,7 +51,6 @@ from app.utils.templates import get_template from app.utils.user import user_has_permissions from notifications_utils import SMS_CHAR_COUNT_LIMIT from notifications_utils.insensitive_dict import InsensitiveDict -from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV, first_column_headings from notifications_utils.sanitise_text import SanitiseASCII @@ -953,9 +952,7 @@ def send_notification(service_id, template_id): ) ) - current_app.logger.info( - hilite(scrub(f"Recipient for the one-off will be {recipient}")) - ) + current_app.logger.info(hilite(f"Recipient for the one-off will be {recipient}")) keys = [] values = [] for k, v in session["placeholders"].items(): @@ -996,9 +993,7 @@ def send_notification(service_id, template_id): # about report generation. current_app.logger.info( hilite( - scrub( - f"Created job to send one-off, recipient is {recipient}, job_id is {upload_id}" - ) + f"Created job to send one-off, recipient is {recipient}, job_id is {upload_id}" ) ) diff --git a/app/utils/csv.py b/app/utils/csv.py index 5c5b794de..159234786 100644 --- a/app/utils/csv.py +++ b/app/utils/csv.py @@ -7,7 +7,6 @@ from flask_login import current_user from app.models.spreadsheet import Spreadsheet from app.utils import hilite from app.utils.templates import get_sample_template -from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV @@ -79,7 +78,7 @@ def generate_notifications_csv(**kwargs): # hence the try/except. try: current_app.logger.info( - hilite(f"Setting up report with kwargs {scrub(json.dumps(kwargs))}") + hilite(f"Setting up report with kwargs {json.dumps(kwargs)}") ) except TypeError: pass @@ -89,7 +88,7 @@ def generate_notifications_csv(**kwargs): # we display to 999 characters, because we don't want to show the contents for reports with thousands of rows. current_app.logger.info( hilite( - f"Original csv for job_id {kwargs['job_id']}: {scrub(original_file_contents[0:999])}" + f"Original csv for job_id {kwargs['job_id']}: {original_file_contents[0:999]}" ) ) original_upload = RecipientCSV( diff --git a/notifications_utils/logging.py b/notifications_utils/logging.py index 7c56a00ad..285aeee58 100644 --- a/notifications_utils/logging.py +++ b/notifications_utils/logging.py @@ -70,6 +70,7 @@ def configure_handler(handler, app, formatter): handler.addFilter(AppNameFilter(app.config["NOTIFY_APP_NAME"])) handler.addFilter(RequestIdFilter()) handler.addFilter(ServiceIdFilter()) + handler.addFilter(PIIFilter()) return handler @@ -134,13 +135,25 @@ class JSONFormatter(BaseJSONFormatter): return log_record -def scrub(msg): - # Eventually we want to scrub all messages in all logs for phone numbers - # and email addresses, masking them. Ultimately this will probably get - # refactored into a 'SafeLogger' subclass or something, but let's start here - # with phones. - phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) - phones = [phone.replace("-", "").replace(" ", "") for phone in phones] - for phone in phones: - msg = msg.replace(phone, f"1XXXXX{phone[-5:]}") - return msg +class PIIFilter(logging.Filter): + def scrub(self, msg): + # Eventually we want to scrub all messages in all logs for phone numbers + # and email addresses, masking them. Ultimately this will probably get + # refactored into a 'SafeLogger' subclass or something, but let's start here + # with phones. + phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) + phones = [phone.replace("-", "").replace(" ", "") for phone in phones] + for phone in phones: + msg = msg.replace(phone, f"1XXXXX{phone[-5:]}") + + emails = re.findall( + r"[\w\.-]+@[\w\.-]+", msg + ) # ['alice@google.com', 'bob@abc.com'] + for email in emails: + # do something with each found email string + msg = msg.replace(email, f"XXXXX{email[-10:]}") + return msg + + def filter(self, record): + record.msg = self.scrub(record.msg) + return record diff --git a/tests/app/main/views/test_dashboard.py b/tests/app/main/views/test_dashboard.py index 285444b1b..e83dacbae 100644 --- a/tests/app/main/views/test_dashboard.py +++ b/tests/app/main/views/test_dashboard.py @@ -1893,26 +1893,22 @@ def app_with_socketio(): ( SERVICE_ONE_ID, {"start_date": "2024-01-01", "days": 7}, - {"service_id": SERVICE_ONE_ID, "start_date": "2024-01-01", "days": 7} + {"service_id": SERVICE_ONE_ID, "start_date": "2024-01-01", "days": 7}, ), ( SERVICE_TWO_ID, {"start_date": "2023-06-01", "days": 7}, - {"service_id": SERVICE_TWO_ID, "start_date": "2023-06-01", "days": 7} + {"service_id": SERVICE_TWO_ID, "start_date": "2023-06-01", "days": 7}, ), - ] + ], ) def test_fetch_daily_stats( - app_with_socketio, mocker, - service_id, - date_range, - expected_call_args + app_with_socketio, mocker, service_id, date_range, expected_call_args ): app, socketio = app_with_socketio mocker.patch( - "app.main.views.dashboard.get_stats_date_range", - return_value=date_range + "app.main.views.dashboard.get_stats_date_range", return_value=date_range ) mock_service_api = mocker.patch( @@ -1920,9 +1916,9 @@ def test_fetch_daily_stats( return_value={ date_range["start_date"]: { "email": {"delivered": 0, "failure": 0, "requested": 0}, - "sms": {"delivered": 0, "failure": 1, "requested": 1} + "sms": {"delivered": 0, "failure": 1, "requested": 1}, }, - } + }, ) client = SocketIOTestClient(app, socketio) @@ -1930,22 +1926,22 @@ def test_fetch_daily_stats( connected = client.is_connected() assert connected, "Client should be connected" - client.emit('fetch_daily_stats', service_id) + client.emit("fetch_daily_stats", service_id) received = client.get_received() assert received, "Should receive a response message" - assert received[0]['name'] == 'daily_stats_update' - assert received[0]['args'][0] == { + assert received[0]["name"] == "daily_stats_update" + assert received[0]["args"][0] == { date_range["start_date"]: { "email": {"delivered": 0, "failure": 0, "requested": 0}, - "sms": {"delivered": 0, "failure": 1, "requested": 1} + "sms": {"delivered": 0, "failure": 1, "requested": 1}, }, } mock_service_api.assert_called_once_with( service_id, start_date=expected_call_args["start_date"], - days=expected_call_args["days"] + days=expected_call_args["days"], ) finally: client.disconnect() diff --git a/tests/notifications_utils/test_logging.py b/tests/notifications_utils/test_logging.py index 858b9352b..89b220324 100644 --- a/tests/notifications_utils/test_logging.py +++ b/tests/notifications_utils/test_logging.py @@ -51,11 +51,16 @@ def test_base_json_formatter_contains_service_id(): assert service_id_filter.filter(record).service_id == "no-service-id" -def test_scrub(): - result = logging.scrub( - "This is a message with 17775554324, and also 18884449323 and also 17775554324" - ) - assert ( - result - == "This is a message with 1XXXXX54324, and also 1XXXXX49323 and also 1XXXXX54324" +def test_pii_filter(): + record = builtin_logging.LogRecord( + name="log thing", + level="info", + pathname="path", + lineno=123, + msg="phone1: 1555555555, phone2: 1555555554, email1: fake@fake.gov, email2: fake@fake2.fake.gov", + exc_info=None, + args=None, ) + pii_filter = logging.PIIFilter() + clean_msg = "phone1: 1XXXXX55555, phone2: 1XXXXX55554, email1: XXXXXe@fake.gov, email2: XXXXX2.fake.gov" + assert pii_filter.filter(record).msg == clean_msg