Scrubbing log records with the formatter.

Signed-off-by: Cliff Hill <Clifford.hill@gsa.gov>
This commit is contained in:
Cliff Hill
2024-10-24 12:36:20 -04:00
parent 2d22a44d05
commit 4311403f67

View File

@@ -3,6 +3,7 @@ import logging.handlers
import re import re
import sys import sys
from itertools import product from itertools import product
from typing import Any, override
from flask import g, request from flask import g, request
from flask.ctx import has_app_context, has_request_context from flask.ctx import has_app_context, has_request_context
@@ -18,6 +19,39 @@ TIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _scrub(msg: Any) -> Any:
# Sometimes just an exception object is passed in for the message, skip those.
if not isinstance(msg, str):
return msg
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, "1XXXXXXXXXX")
emails = re.findall(
r"[\w\.-]+@[\w\.-]+", msg
) # ['alice@google.com', 'bob@abc.com']
for email in emails:
# do something with each found email string
masked_email = "XXXXX@XXXXXXX"
msg = msg.replace(email, masked_email)
return msg
class PIIFilter(logging.Filter):
@override
def filter(self, record: logging.LogRecord) -> logging.LogRecord:
record.msg = _scrub(record.msg)
return record
class PIIFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
record.msg = _scrub(record.msg)
return super().format(record)
def init_app(app): def init_app(app):
app.config.setdefault("NOTIFY_LOG_LEVEL", "INFO") app.config.setdefault("NOTIFY_LOG_LEVEL", "INFO")
app.config.setdefault("NOTIFY_APP_NAME", "none") app.config.setdefault("NOTIFY_APP_NAME", "none")
@@ -50,7 +84,7 @@ def init_app(app):
def get_handlers(app): def get_handlers(app):
handlers = [] handlers = []
standard_formatter = logging.Formatter(LOG_FORMAT, TIME_FORMAT) standard_formatter = PIIFormatter(LOG_FORMAT, TIME_FORMAT)
json_formatter = JSONFormatter(LOG_FORMAT, TIME_FORMAT) json_formatter = JSONFormatter(LOG_FORMAT, TIME_FORMAT)
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
@@ -123,36 +157,6 @@ class ServiceIdFilter(logging.Filter):
return record return record
class PIIFilter(logging.Filter):
def scrub(self, msg):
# Eventually we want to scrub all messages in all logs for phone numbers
# and email addresses, masking them. Ultimately this will probably get
# refactored into a 'SafeLogger' subclass or something, but let's start here
# with phones.
# Sometimes just an exception object is passed in for the message, skip those.
if not isinstance(msg, str):
return msg
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, "1XXXXXXXXXX")
emails = re.findall(
r"[\w\.-]+@[\w\.-]+", msg
) # ['alice@google.com', 'bob@abc.com']
for email in emails:
# do something with each found email string
masked_email = "XXXXX@XXXXXXX"
msg = msg.replace(email, masked_email)
return msg
def filter(self, record):
record.msg = self.scrub(record.msg)
return record
class JSONFormatter(BaseJSONFormatter): class JSONFormatter(BaseJSONFormatter):
def process_log_record(self, log_record): def process_log_record(self, log_record):
rename_map = { rename_map = {
@@ -166,6 +170,7 @@ class JSONFormatter(BaseJSONFormatter):
log_record["logType"] = "application" log_record["logType"] = "application"
try: try:
log_record["message"] = log_record["message"].format(**log_record) log_record["message"] = log_record["message"].format(**log_record)
log_record["message"] = _scrub(log_record["message"]) # PII Scrubbing
except KeyError as e: except KeyError as e:
# We get occasional log messages that are nested dictionaries, # We get occasional log messages that are nested dictionaries,
# for example, delivery receipts, where the formatting fails # for example, delivery receipts, where the formatting fails