diff --git a/app/main/views/send.py b/app/main/views/send.py index aab2db104..8bb6ce24c 100644 --- a/app/main/views/send.py +++ b/app/main/views/send.py @@ -3,7 +3,16 @@ import uuid from string import ascii_uppercase from zipfile import BadZipFile -from flask import abort, flash, redirect, render_template, request, session, url_for +from flask import ( + abort, + current_app, + flash, + redirect, + render_template, + request, + session, + url_for, +) from flask_login import current_user from markupsafe import Markup from notifications_python_client.errors import HTTPError @@ -31,12 +40,18 @@ from app.s3_client.s3_csv_client import ( s3upload, set_metadata_on_csv_upload, ) -from app.utils import PermanentRedirect, should_skip_template_page, unicode_truncate +from app.utils import ( + PermanentRedirect, + hilite, + should_skip_template_page, + unicode_truncate, +) from app.utils.csv import Spreadsheet, get_errors_for_csv from app.utils.templates import get_template from app.utils.user import user_has_permissions from notifications_utils import SMS_CHAR_COUNT_LIMIT from notifications_utils.insensitive_dict import InsensitiveDict +from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV, first_column_headings from notifications_utils.sanitise_text import SanitiseASCII @@ -938,6 +953,9 @@ def send_notification(service_id, template_id): ) ) + current_app.logger.info( + hilite(scrub(f"Recipient for the one-off will be {recipient}")) + ) keys = [] values = [] for k, v in session["placeholders"].items(): @@ -971,6 +989,19 @@ def send_notification(service_id, template_id): valid="True", ) + # Here we are attempting to cleverly link the job id to the one-off recipient + # If we know the partial phone number of the recipient, we can search + # on that initially and find this, which will give us the job_id + # And once we know the job_id, we can search on that and it might tell us something + # about report generation. + current_app.logger.info( + hilite( + scrub( + f"Created job to send one-off, recipient is {recipient}, job_id is {upload_id}" + ) + ) + ) + session.pop("recipient") session.pop("placeholders") diff --git a/app/utils/csv.py b/app/utils/csv.py index c3c27ec18..5c5b794de 100644 --- a/app/utils/csv.py +++ b/app/utils/csv.py @@ -1,10 +1,13 @@ import datetime import pytz +from flask import current_app, json from flask_login import current_user from app.models.spreadsheet import Spreadsheet +from app.utils import hilite from app.utils.templates import get_sample_template +from notifications_utils.logging import scrub from notifications_utils.recipients import RecipientCSV @@ -71,7 +74,24 @@ def generate_notifications_csv(**kwargs): # This generates the "batch" csv report if kwargs.get("job_id"): + # The kwargs contain the job id, which is linked to the recipient's partial phone number in other debug + # Some unit tests are mocking the kwargs and turning them into a function instead of dict, + # hence the try/except. + try: + current_app.logger.info( + hilite(f"Setting up report with kwargs {scrub(json.dumps(kwargs))}") + ) + except TypeError: + pass + original_file_contents = s3download(kwargs["service_id"], kwargs["job_id"]) + # This will verify that the user actually did successfully upload a csv for a one-off. Limit the size + # we display to 999 characters, because we don't want to show the contents for reports with thousands of rows. + current_app.logger.info( + hilite( + f"Original csv for job_id {kwargs['job_id']}: {scrub(original_file_contents[0:999])}" + ) + ) original_upload = RecipientCSV( original_file_contents, template=get_sample_template(kwargs["template_type"]), diff --git a/notifications_utils/logging.py b/notifications_utils/logging.py index 6a209cdd3..7c56a00ad 100644 --- a/notifications_utils/logging.py +++ b/notifications_utils/logging.py @@ -1,5 +1,6 @@ import logging import logging.handlers +import re import sys from itertools import product @@ -131,3 +132,15 @@ class JSONFormatter(BaseJSONFormatter): except (KeyError, IndexError) as e: logger.exception("failed to format log message: {} not found".format(e)) return log_record + + +def scrub(msg): + # Eventually we want to scrub all messages in all logs for phone numbers + # and email addresses, masking them. Ultimately this will probably get + # refactored into a 'SafeLogger' subclass or something, but let's start here + # with phones. + phones = re.findall("(?:\\+ *)?\\d[\\d\\- ]{7,}\\d", msg) + phones = [phone.replace("-", "").replace(" ", "") for phone in phones] + for phone in phones: + msg = msg.replace(phone, f"1XXXXX{phone[-5:]}") + return msg diff --git a/poetry.lock b/poetry.lock index 5fa01c2b7..0289c6df4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1641,6 +1641,7 @@ files = [ {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fbb160554e319f7b22ecf530a80a3ff496d38e8e07ae763b9e82fadfe96f273"}, {file = "msgpack-1.0.8-cp39-cp39-win32.whl", hash = "sha256:f9af38a89b6a5c04b7d18c492c8ccf2aee7048aff1ce8437c4683bb5a1df893d"}, {file = "msgpack-1.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:ed59dd52075f8fc91da6053b12e8c89e37aa043f8986efd89e61fae69dc1b011"}, + {file = "msgpack-1.0.8-py3-none-any.whl", hash = "sha256:24f727df1e20b9876fa6e95f840a2a2651e34c0ad147676356f4bf5fbb0206ca"}, {file = "msgpack-1.0.8.tar.gz", hash = "sha256:95c02b0e27e706e48d0e5426d1710ca78e0f0628d6e89d5b5a5b91a5f12274f3"}, ] diff --git a/tests/notifications_utils/test_logging.py b/tests/notifications_utils/test_logging.py index cc7f5ee34..858b9352b 100644 --- a/tests/notifications_utils/test_logging.py +++ b/tests/notifications_utils/test_logging.py @@ -49,3 +49,13 @@ def test_base_json_formatter_contains_service_id(): == "message to log" ) assert service_id_filter.filter(record).service_id == "no-service-id" + + +def test_scrub(): + result = logging.scrub( + "This is a message with 17775554324, and also 18884449323 and also 17775554324" + ) + assert ( + result + == "This is a message with 1XXXXX54324, and also 1XXXXX49323 and also 1XXXXX54324" + )