diff --git a/notifications_utils/logging.py b/notifications_utils/logging.py index 4abe5bcf7..a78d3dd75 100644 --- a/notifications_utils/logging.py +++ b/notifications_utils/logging.py @@ -3,6 +3,7 @@ import logging.handlers import re import sys from itertools import product +from typing import Any, override from flask import g, request from flask.ctx import has_app_context, has_request_context @@ -18,6 +19,39 @@ TIME_FORMAT = "%Y-%m-%dT%H:%M:%S" logger = logging.getLogger(__name__) +def _scrub(msg: Any) -> Any: + # Sometimes just an exception object is passed in for the message, skip those. + if not isinstance(msg, str): + return msg + phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) + phones = [phone.replace("-", "").replace(" ", "") for phone in phones] + for phone in phones: + msg = msg.replace(phone, "1XXXXXXXXXX") + + emails = re.findall( + r"[\w\.-]+@[\w\.-]+", msg + ) # ['alice@google.com', 'bob@abc.com'] + for email in emails: + # do something with each found email string + masked_email = "XXXXX@XXXXXXX" + msg = msg.replace(email, masked_email) + + return msg + + +class PIIFilter(logging.Filter): + @override + def filter(self, record: logging.LogRecord) -> logging.LogRecord: + record.msg = _scrub(record.msg) + return record + + +class PIIFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + record.msg = _scrub(record.msg) + return super().format(record) + + def init_app(app): app.config.setdefault("NOTIFY_LOG_LEVEL", "INFO") app.config.setdefault("NOTIFY_APP_NAME", "none") @@ -130,35 +164,7 @@ class JSONFormatter(BaseJSONFormatter): log_record["logType"] = "application" try: log_record["message"] = log_record["message"].format(**log_record) + log_record["message"] = _scrub(log_record["message"]) # PII Scrubbing except (KeyError, IndexError) as e: - logger.exception("failed to format log message: {} not found".format(e)) + logger.exception(f"failed to format log message: {e} not found") return log_record - - -class PIIFilter(logging.Filter): - def scrub(self, msg): - # Eventually we want to scrub all messages in all logs for phone numbers - # and email addresses, masking them. Ultimately this will probably get - # refactored into a 'SafeLogger' subclass or something, but let's start here - # with phones. - - # Sometimes just an exception object is passed in for the message, skip those. - if not isinstance(msg, str): - return msg - phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) - phones = [phone.replace("-", "").replace(" ", "") for phone in phones] - for phone in phones: - msg = msg.replace(phone, "1XXXXXXXXXX") - - emails = re.findall( - r"[\w\.-]+@[\w\.-]+", msg - ) # ['alice@google.com', 'bob@abc.com'] - for email in emails: - # do something with each found email string - masked_email = "XXXXX@XXXXXXX" - msg = msg.replace(email, masked_email) - return msg - - def filter(self, record): - record.msg = self.scrub(record.msg) - return record