Merge pull request #1076 from GSA/notify-api-937

Need magic PII-free debugging method for API
This commit is contained in:
Carlo Costino
2024-06-25 09:50:13 -04:00
committed by GitHub
8 changed files with 75 additions and 37 deletions

View File

@@ -24,7 +24,7 @@ from app.notifications.process_notifications import persist_notification
from app.notifications.validators import check_service_over_total_message_limit from app.notifications.validators import check_service_over_total_message_limit
from app.serialised_models import SerialisedService, SerialisedTemplate from app.serialised_models import SerialisedService, SerialisedTemplate
from app.service.utils import service_allowed_to_send_to from app.service.utils import service_allowed_to_send_to
from app.utils import DATETIME_FORMAT, hilite, scrub, utc_now from app.utils import DATETIME_FORMAT, hilite, utc_now
from notifications_utils.recipients import RecipientCSV from notifications_utils.recipients import RecipientCSV
@@ -191,7 +191,7 @@ def save_sms(self, service_id, notification_id, encrypted_notification, sender_i
if not service_allowed_to_send_to(notification["to"], service, KeyType.NORMAL): if not service_allowed_to_send_to(notification["to"], service, KeyType.NORMAL):
current_app.logger.info( current_app.logger.info(
hilite( hilite(
scrub(f"service not allowed to send to {notification['to']}, aborting") f"service not allowed to send for job_id {notification.get('job', None)}, aborting"
) )
) )
current_app.logger.debug( current_app.logger.debug(
@@ -224,8 +224,11 @@ def save_sms(self, service_id, notification_id, encrypted_notification, sender_i
) )
# Kick off sns process in provider_tasks.py # Kick off sns process in provider_tasks.py
sn = saved_notification
current_app.logger.info( current_app.logger.info(
hilite(scrub(f"Going to deliver sms for recipient: {notification['to']}")) hilite(
f"Deliver sms for job_id: {sn.job_id} row_number: {sn.job_row_number}"
)
) )
provider_tasks.deliver_sms.apply_async( provider_tasks.deliver_sms.apply_async(
[str(saved_notification.id)], queue=QueueNames.SEND_SMS [str(saved_notification.id)], queue=QueueNames.SEND_SMS

View File

@@ -9,7 +9,7 @@ from flask import current_app
from app.clients import AWS_CLIENT_CONFIG, Client from app.clients import AWS_CLIENT_CONFIG, Client
from app.cloudfoundry_config import cloud_config from app.cloudfoundry_config import cloud_config
from app.exceptions import NotificationTechnicalFailureException from app.exceptions import NotificationTechnicalFailureException
from app.utils import hilite, scrub, utc_now from app.utils import hilite, utc_now
class AwsCloudwatchClient(Client): class AwsCloudwatchClient(Client):
@@ -124,7 +124,14 @@ class AwsCloudwatchClient(Client):
self.warn_if_dev_is_opted_out( self.warn_if_dev_is_opted_out(
message["delivery"]["providerResponse"], notification_id message["delivery"]["providerResponse"], notification_id
) )
current_app.logger.info(hilite(scrub(f"DELIVERED: {message}"))) # Here we map the answer from aws to the message_id.
# Previously, in send_to_providers, we mapped the job_id and row number
# to the message id. And on the admin side we mapped the csv filename
# to the job_id. So by tracing through all the logs we can go:
# filename->job_id->message_id->what really happened
current_app.logger.info(
hilite(f"DELIVERED: {message} for message_id {message_id}")
)
return ( return (
"success", "success",
message["delivery"]["providerResponse"], message["delivery"]["providerResponse"],
@@ -142,7 +149,9 @@ class AwsCloudwatchClient(Client):
message["delivery"]["providerResponse"], notification_id message["delivery"]["providerResponse"], notification_id
) )
current_app.logger.info(hilite(scrub(f"FAILED: {message}"))) current_app.logger.info(
hilite(f"FAILED: {message} for message_id {message_id}")
)
return ( return (
"failure", "failure",
message["delivery"]["providerResponse"], message["delivery"]["providerResponse"],

View File

@@ -13,7 +13,7 @@ from app.dao.provider_details_dao import get_provider_details_by_notification_ty
from app.enums import BrandType, KeyType, NotificationStatus, NotificationType from app.enums import BrandType, KeyType, NotificationStatus, NotificationType
from app.exceptions import NotificationTechnicalFailureException from app.exceptions import NotificationTechnicalFailureException
from app.serialised_models import SerialisedService, SerialisedTemplate from app.serialised_models import SerialisedService, SerialisedTemplate
from app.utils import hilite, scrub, utc_now from app.utils import hilite, utc_now
from notifications_utils.template import ( from notifications_utils.template import (
HTMLEmailTemplate, HTMLEmailTemplate,
PlainTextEmailTemplate, PlainTextEmailTemplate,
@@ -113,15 +113,18 @@ def send_sms_to_provider(notification):
message_id = provider.send_sms(**send_sms_kwargs) message_id = provider.send_sms(**send_sms_kwargs)
current_app.logger.info(f"got message_id {message_id}") current_app.logger.info(f"got message_id {message_id}")
except Exception as e: except Exception as e:
msg = f"FAILED sending message for this recipient: {recipient} to sms" n = notification
current_app.logger.error(hilite(scrub(f"{msg} {e}"))) msg = f"FAILED send to sms, job_id: {n.job_id} row_number {n.job_row_number} message_id {message_id}"
current_app.logger.error(hilite(f"{msg} {e}"))
notification.billable_units = template.fragment_count notification.billable_units = template.fragment_count
dao_update_notification(notification) dao_update_notification(notification)
raise e raise e
else: else:
msg = f"Sending message for this recipient: {recipient} to sms" # Here we map the job_id and row number to the aws message_id
current_app.logger.info(hilite(scrub(msg))) n = notification
msg = f"Send to aws for job_id {n.job_id} row_number {n.job_row_number} message_id {message_id}"
current_app.logger.info(hilite(msg))
notification.billable_units = template.fragment_count notification.billable_units = template.fragment_count
update_notification_to_sending(notification, provider) update_notification_to_sending(notification, provider)
return message_id return message_id

View File

@@ -12,7 +12,7 @@ from app.dao.notifications_dao import (
from app.enums import KeyType, NotificationStatus, NotificationType from app.enums import KeyType, NotificationStatus, NotificationType
from app.errors import BadRequestError from app.errors import BadRequestError
from app.models import Notification from app.models import Notification
from app.utils import hilite, scrub, utc_now from app.utils import hilite, utc_now
from notifications_utils.recipients import ( from notifications_utils.recipients import (
format_email_address, format_email_address,
get_international_phone_info, get_international_phone_info,
@@ -112,7 +112,7 @@ def persist_notification(
) )
current_app.logger.info( current_app.logger.info(
hilite( hilite(
scrub(f"Persisting notification with recipient {formatted_recipient}") f"Persisting notification with job_id: {job_id} row_number: {job_row_number}"
) )
) )
recipient_info = get_international_phone_info(formatted_recipient) recipient_info = get_international_phone_info(formatted_recipient)

View File

@@ -1,4 +1,3 @@
import re
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from flask import url_for from flask import url_for
@@ -145,15 +144,3 @@ def naive_utcnow():
def utc_now(): def utc_now():
return naive_utcnow() return naive_utcnow()
def scrub(msg):
# Eventually we want to scrub all messages in all logs for phone numbers
# and email addresses, masking them. Ultimately this will probably get
# refactored into a 'SafeLogger' subclass or something, but let's start here
# with phones.
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, f"1XXXXX{phone[-5:]}")
return msg

View File

@@ -1,5 +1,6 @@
import logging import logging
import logging.handlers import logging.handlers
import re
import sys import sys
from itertools import product from itertools import product
@@ -69,6 +70,7 @@ def configure_handler(handler, app, formatter):
handler.addFilter(AppNameFilter(app.config["NOTIFY_APP_NAME"])) handler.addFilter(AppNameFilter(app.config["NOTIFY_APP_NAME"]))
handler.addFilter(RequestIdFilter()) handler.addFilter(RequestIdFilter())
handler.addFilter(ServiceIdFilter()) handler.addFilter(ServiceIdFilter())
handler.addFilter(PIIFilter())
return handler return handler
@@ -115,6 +117,36 @@ class ServiceIdFilter(logging.Filter):
return record return record
class PIIFilter(logging.Filter):
def scrub(self, msg):
# Eventually we want to scrub all messages in all logs for phone numbers
# and email addresses, masking them. Ultimately this will probably get
# refactored into a 'SafeLogger' subclass or something, but let's start here
# with phones.
# Sometimes just an exception object is passed in for the message, skip those.
if not isinstance(msg, str):
return msg
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, "1XXXXXXXXXX")
emails = re.findall(
r"[\w\.-]+@[\w\.-]+", msg
) # ['alice@google.com', 'bob@abc.com']
for email in emails:
# do something with each found email string
masked_email = "XXXXX@XXXXXXX"
msg = msg.replace(email, masked_email)
return msg
def filter(self, record):
record.msg = self.scrub(record.msg)
return record
class JSONFormatter(BaseJSONFormatter): class JSONFormatter(BaseJSONFormatter):
def process_log_record(self, log_record): def process_log_record(self, log_record):
rename_map = { rename_map = {

View File

@@ -13,7 +13,6 @@ from app.utils import (
get_reference_from_personalisation, get_reference_from_personalisation,
get_uuid_string_or_none, get_uuid_string_or_none,
midnight_n_days_ago, midnight_n_days_ago,
scrub,
) )
@@ -95,16 +94,6 @@ def test_get_public_notify_type_text():
) )
def test_scrub():
result = scrub(
"This is a message with 17775554324, and also 18884449323 and also 17775554324"
)
assert (
result
== "This is a message with 1XXXXX54324, and also 1XXXXX49323 and also 1XXXXX54324"
)
# This method is used for simulating bulk sends. We use localstack and run on a developer's machine to do the # This method is used for simulating bulk sends. We use localstack and run on a developer's machine to do the
# simulation. Please see docs->bulk_testing.md for instructions. # simulation. Please see docs->bulk_testing.md for instructions.
# def test_generate_csv_for_bulk_testing(): # def test_generate_csv_for_bulk_testing():

View File

@@ -49,3 +49,18 @@ def test_base_json_formatter_contains_service_id():
== "message to log" == "message to log"
) )
assert service_id_filter.filter(record).service_id == "notify-admin" assert service_id_filter.filter(record).service_id == "notify-admin"
def test_pii_filter():
record = builtin_logging.LogRecord(
name="log thing",
level="info",
pathname="path",
lineno=123,
msg="phone1: 1555555555, phone2: 1555555554, email1: fake@fake.gov, email2: fake@fake2.fake.gov",
exc_info=None,
args=None,
)
pii_filter = logging.PIIFilter()
clean_msg = "phone1: 1XXXXXXXXXX, phone2: 1XXXXXXXXXX, email1: XXXXX@XXXXXXX, email2: XXXXX@XXXXXXX"
assert pii_filter.filter(record).msg == clean_msg