Need magic PII-free debugging method for API

This commit is contained in:
Kenneth Kehl
2024-06-11 10:34:57 -07:00
parent bb6b0936d3
commit 76c34ffae6
9 changed files with 65 additions and 38 deletions

View File

@@ -23,7 +23,7 @@ from app.notifications.process_notifications import persist_notification
from app.notifications.validators import check_service_over_total_message_limit
from app.serialised_models import SerialisedService, SerialisedTemplate
from app.service.utils import service_allowed_to_send_to
from app.utils import DATETIME_FORMAT, hilite, scrub, utc_now
from app.utils import DATETIME_FORMAT, hilite, utc_now
from app.v2.errors import TotalRequestsError
from notifications_utils.recipients import RecipientCSV
@@ -189,7 +189,9 @@ def save_sms(self, service_id, notification_id, encrypted_notification, sender_i
# Return False when trial mode services try sending notifications
# to non-team and non-simulated recipients.
if not service_allowed_to_send_to(notification["to"], service, KeyType.NORMAL):
current_app.logger.info(hilite(scrub(f"service not allowed to send to {notification['to']}, aborting")))
current_app.logger.info(
hilite(f"service not allowed to send to {notification['to']}, aborting")
)
current_app.logger.debug(
"SMS {} failed as restricted service".format(notification_id)
)
@@ -220,7 +222,9 @@ def save_sms(self, service_id, notification_id, encrypted_notification, sender_i
)
# Kick off sns process in provider_tasks.py
current_app.logger.info(hilite(scrub(f"Going to deliver sms for recipient: {notification['to']}")))
current_app.logger.info(
hilite(f"Going to deliver sms for recipient: {notification['to']}")
)
provider_tasks.deliver_sms.apply_async(
[str(saved_notification.id)], queue=QueueNames.SEND_SMS
)

View File

@@ -9,7 +9,7 @@ from flask import current_app
from app.clients import AWS_CLIENT_CONFIG, Client
from app.cloudfoundry_config import cloud_config
from app.exceptions import NotificationTechnicalFailureException
from app.utils import hilite, scrub, utc_now
from app.utils import hilite, utc_now
class AwsCloudwatchClient(Client):
@@ -124,7 +124,7 @@ class AwsCloudwatchClient(Client):
self.warn_if_dev_is_opted_out(
message["delivery"]["providerResponse"], notification_id
)
current_app.logger.info(hilite(scrub(f"DELIVERED: {message}")))
current_app.logger.info(hilite(f"DELIVERED: {message}"))
return (
"success",
message["delivery"]["providerResponse"],
@@ -142,7 +142,7 @@ class AwsCloudwatchClient(Client):
message["delivery"]["providerResponse"], notification_id
)
current_app.logger.info(hilite(scrub(f"FAILED: {message}")))
current_app.logger.info(hilite(f"FAILED: {message}"))
return (
"failure",
message["delivery"]["providerResponse"],

View File

@@ -13,7 +13,7 @@ from app.dao.provider_details_dao import get_provider_details_by_notification_ty
from app.enums import BrandType, KeyType, NotificationStatus, NotificationType
from app.exceptions import NotificationTechnicalFailureException
from app.serialised_models import SerialisedService, SerialisedTemplate
from app.utils import hilite, scrub, utc_now
from app.utils import hilite, utc_now
from notifications_utils.template import (
HTMLEmailTemplate,
PlainTextEmailTemplate,
@@ -114,14 +114,14 @@ def send_sms_to_provider(notification):
current_app.logger.info(f"got message_id {message_id}")
except Exception as e:
msg = f"FAILED sending message for this recipient: {recipient} to sms"
current_app.logger.error(hilite(scrub(f"{msg} {e}")))
current_app.logger.error(hilite(f"{msg} {e}"))
notification.billable_units = template.fragment_count
dao_update_notification(notification)
raise e
else:
msg = f"Sending message for this recipient: {recipient} to sms"
current_app.logger.info(hilite(scrub(msg)))
current_app.logger.info(hilite(msg))
notification.billable_units = template.fragment_count
update_notification_to_sending(notification, provider)
return message_id

View File

@@ -11,7 +11,7 @@ from app.dao.notifications_dao import (
)
from app.enums import KeyType, NotificationStatus, NotificationType
from app.models import Notification
from app.utils import hilite, scrub, utc_now
from app.utils import hilite, utc_now
from app.v2.errors import BadRequestError
from notifications_utils.recipients import (
format_email_address,
@@ -110,7 +110,9 @@ def persist_notification(
formatted_recipient = validate_and_format_phone_number(
recipient, international=True
)
current_app.logger.info(hilite(scrub(f"Persisting notification with recipient {formatted_recipient}")))
current_app.logger.info(
hilite(f"Persisting notification with recipient {formatted_recipient}")
)
recipient_info = get_international_phone_info(formatted_recipient)
notification.normalised_to = formatted_recipient
notification.international = recipient_info.international

View File

@@ -1,4 +1,3 @@
import re
from datetime import datetime, timedelta, timezone
from flask import url_for
@@ -145,15 +144,3 @@ def naive_utcnow():
def utc_now():
return naive_utcnow()
def scrub(msg):
# Eventually we want to scrub all messages in all logs for phone numbers
# and email addresses, masking them. Ultimately this will probably get
# refactored into a 'SafeLogger' subclass or something, but let's start here
# with phones.
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, f"1XXXXX{phone[-5:]}")
return msg

View File

@@ -1,5 +1,6 @@
import logging
import logging.handlers
import re
import sys
from itertools import product
@@ -131,3 +132,31 @@ class JSONFormatter(BaseJSONFormatter):
except (KeyError, IndexError) as e:
logger.exception("failed to format log message: {} not found".format(e))
return log_record
class PIIFormatter(BaseJSONFormatter):
def scrub(self, msg):
# Eventually we want to scrub all messages in all logs for phone numbers
# and email addresses, masking them. Ultimately this will probably get
# refactored into a 'SafeLogger' subclass or something, but let's start here
# with phones.
phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg)
phones = [phone.replace("-", "").replace(" ", "") for phone in phones]
for phone in phones:
msg = msg.replace(phone, f"1XXXXX{phone[-5:]}")
emails = re.findall(
r"[\w\.-]+@[\w\.-]+", msg
) # ['alice@google.com', 'bob@abc.com']
for email in emails:
# do something with each found email string
msg = msg.replace(email, f"XXXXX{email[-10:]}")
return msg
def process_log_record(self, log_record):
try:
log_record["message"] = self.scrub(log_record["message"])
except (KeyError, IndexError, ValueError) as e:
logger.exception(f"failed to scrub log message: {e}")
return log_record

8
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "aiohttp"
@@ -2098,13 +2098,9 @@ files = [
{file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"},
{file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"},
{file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"},
{file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"},
{file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"},
{file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"},
{file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"},
{file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"},
{file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"},
{file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"},
{file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"},
{file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"},
{file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"},
@@ -2493,6 +2489,7 @@ files = [
{file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fbb160554e319f7b22ecf530a80a3ff496d38e8e07ae763b9e82fadfe96f273"},
{file = "msgpack-1.0.8-cp39-cp39-win32.whl", hash = "sha256:f9af38a89b6a5c04b7d18c492c8ccf2aee7048aff1ce8437c4683bb5a1df893d"},
{file = "msgpack-1.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:ed59dd52075f8fc91da6053b12e8c89e37aa043f8986efd89e61fae69dc1b011"},
{file = "msgpack-1.0.8-py3-none-any.whl", hash = "sha256:24f727df1e20b9876fa6e95f840a2a2651e34c0ad147676356f4bf5fbb0206ca"},
{file = "msgpack-1.0.8.tar.gz", hash = "sha256:95c02b0e27e706e48d0e5426d1710ca78e0f0628d6e89d5b5a5b91a5f12274f3"},
]
@@ -3475,7 +3472,6 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},

View File

@@ -13,7 +13,6 @@ from app.utils import (
get_reference_from_personalisation,
get_uuid_string_or_none,
midnight_n_days_ago,
scrub,
)
@@ -95,13 +94,6 @@ def test_get_public_notify_type_text():
)
def test_scrub():
result = scrub(
"This is a message with 17775554324, and also 18884449323 and also 17775554324"
)
assert result == "This is a message with 1XXXXX54324, and also 1XXXXX49323 and also 1XXXXX54324"
# This method is used for simulating bulk sends. We use localstack and run on a developer's machine to do the
# simulation. Please see docs->bulk_testing.md for instructions.
# def test_generate_csv_for_bulk_testing():

View File

@@ -49,3 +49,20 @@ def test_base_json_formatter_contains_service_id():
== "message to log"
)
assert service_id_filter.filter(record).service_id == "notify-admin"
def test_scrub():
record = builtin_logging.LogRecord(
name="log thing",
level="info",
pathname="path",
lineno=123,
msg="phone1: 1555555555, phone2: 1555555554, email1: fake@fake.gov, email2: fake@fake2.fake.gov",
exc_info=None,
args=None,
)
assert (
json.loads(logging.PIIFormatter().format(record))["message"]
== "phone1: 1XXXXX55555, phone2: 1XXXXX55554, email1: XXXXXe@fake.gov, email2: XXXXX2.fake.gov"
)