diff --git a/app/aws/s3.py b/app/aws/s3.py index e8841b20c..bc1728541 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -118,7 +118,7 @@ def list_s3_objects(): break except Exception: current_app.logger.exception( - "An error occurred while regenerating cache #notify-admin-1200", + "An error occurred while regenerating cache #notify-debug-admin-1200", ) @@ -200,7 +200,7 @@ def read_s3_file(bucket_name, object_key, s3res): except LookupError: # perhaps our key is not formatted as we expected. If so skip it. - current_app.logger.exception("LookupError #notify-admin-1200") + current_app.logger.exception("LookupError #notify-debug-admin-1200") def get_s3_files(): @@ -213,7 +213,7 @@ def get_s3_files(): s3res = get_s3_resource() current_app.logger.info( - f"job_cache length before regen: {len_job_cache()} #notify-admin-1200" + f"job_cache length before regen: {len_job_cache()} #notify-debug-admin-1200" ) try: with ThreadPoolExecutor() as executor: @@ -222,7 +222,7 @@ def get_s3_files(): current_app.logger.exception("Connection pool issue") current_app.logger.info( - f"job_cache length after regen: {len_job_cache()} #notify-admin-1200" + f"job_cache length after regen: {len_job_cache()} #notify-debug-admin-1200" ) @@ -290,7 +290,7 @@ def file_exists(file_location): def get_job_location(service_id, job_id): current_app.logger.debug( - f"#s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}" + f"#notify-debug-s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}" ) return ( current_app.config["CSV_UPLOAD_BUCKET"]["bucket"], @@ -308,7 +308,7 @@ def get_old_job_location(service_id, job_id): Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE. """ current_app.logger.debug( - f"#s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}" + f"#notify-debug-s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}" ) return ( current_app.config["CSV_UPLOAD_BUCKET"]["bucket"], @@ -507,7 +507,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number): def get_job_metadata_from_s3(service_id, job_id): current_app.logger.debug( - f"#s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}" + f"#notify-debug-s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}" ) obj = get_s3_object(*get_job_location(service_id, job_id)) return obj.get()["Metadata"] diff --git a/app/clients/pinpoint/aws_pinpoint.py b/app/clients/pinpoint/aws_pinpoint.py index d15d94601..3b4de7682 100644 --- a/app/clients/pinpoint/aws_pinpoint.py +++ b/app/clients/pinpoint/aws_pinpoint.py @@ -39,7 +39,7 @@ class AwsPinpointClient(Client): current_app.logger.info(hilite(response)) except ClientError: current_app.logger.exception( - "#validate-phone-number Could not validate with pinpoint" + "#notify-debug-validate-phone-number Could not validate with pinpoint" ) # TODO This is the structure of the response. When the phone validation diff --git a/app/clients/sms/aws_sns.py b/app/clients/sms/aws_sns.py index d36af600c..3c2282752 100644 --- a/app/clients/sms/aws_sns.py +++ b/app/clients/sms/aws_sns.py @@ -68,15 +68,16 @@ class AwsSnsClient(SmsClient): non_scrubbable = " ".join(sender) self.current_app.logger.info( - f"notify-api-1385 sender {non_scrubbable} is a {type(sender)} default is a {type(default_num)}" + f"notify-debug-api-1385 sender {non_scrubbable} is a {type(sender)} \ + default is a {type(default_num)}" ) else: self.current_app.logger.warning( - f"notify-api-1385 sender is type {type(sender)}!! {sender}" + f"notify-debug-api-1385 sender is type {type(sender)}!! {sender}" ) if self._valid_sender_number(sender): self.current_app.logger.info( - f"notify-api-1385 use valid sender {non_scrubbable} instead of default {default_num}" + f"notify-debug-api-1385 use valid sender {non_scrubbable} instead of default {default_num}" ) attributes["AWS.MM.SMS.OriginationNumber"] = { @@ -85,7 +86,7 @@ class AwsSnsClient(SmsClient): } else: self.current_app.logger.info( - f"notify-api-1385 use default {default_num} instead of invalid sender" + f"notify-debug-api-1385 use default {default_num} instead of invalid sender" ) attributes["AWS.MM.SMS.OriginationNumber"] = { diff --git a/app/dao/jobs_dao.py b/app/dao/jobs_dao.py index feec601a4..2e479a77d 100644 --- a/app/dao/jobs_dao.py +++ b/app/dao/jobs_dao.py @@ -158,17 +158,17 @@ def dao_create_job(job): now_time = utc_now() diff_time = now_time - orig_time current_app.logger.info( - f"#notify-admin-1859 dao_create_job orig created at {orig_time} and now {now_time}" + f"#notify-debug-admin-1859 dao_create_job orig created at {orig_time} and now {now_time}" ) if diff_time.total_seconds() > 300: # It should be only a few seconds diff at most current_app.logger.error( - "#notify-admin-1859 Something is wrong with job.created_at!" + "#notify-debug-admin-1859 Something is wrong with job.created_at!" ) if os.getenv("NOTIFY_ENVIRONMENT") not in ["test"]: job.created_at = now_time dao_update_job(job) current_app.logger.error( - f"#notify-admin-1859 Job created_at reset to {job.created_at}" + f"#notify-debug-admin-1859 Job created_at reset to {job.created_at}" ) diff --git a/app/delivery/send_to_providers.py b/app/delivery/send_to_providers.py index 4fd0d5fe7..8e90c08d4 100644 --- a/app/delivery/send_to_providers.py +++ b/app/delivery/send_to_providers.py @@ -107,7 +107,7 @@ def send_sms_to_provider(notification): sender_numbers = get_sender_numbers(notification) if notification.reply_to_text not in sender_numbers: raise ValueError( - f"{notification.reply_to_text} not in {sender_numbers} #notify-admin-1701" + f"{notification.reply_to_text} not in {sender_numbers} #notify-debug-admin-1701" ) send_sms_kwargs = { @@ -158,7 +158,7 @@ def _experimentally_validate_phone_numbers(recipient): if recipient_lookup in current_app.config["SIMULATED_SMS_NUMBERS"] and os.getenv( "NOTIFY_ENVIRONMENT" ) in ["development", "test"]: - current_app.logger.info(hilite("#validate-phone-number fired")) + current_app.logger.info(hilite("#notify-debug-validate-phone-number fired")) aws_pinpoint_client.validate_phone_number("01", recipient) diff --git a/app/job/rest.py b/app/job/rest.py index ed506a5aa..571c1e342 100644 --- a/app/job/rest.py +++ b/app/job/rest.py @@ -242,7 +242,9 @@ def create_job(service_id): original_file_name = data.get("original_file_name") data.update({"service": service_id}) try: - current_app.logger.info(f"#s3-partitioning DATA IN CREATE_JOB: {data}") + current_app.logger.info( + f"#notify-debug-s3-partitioning DATA IN CREATE_JOB: {data}" + ) data.update(**get_job_metadata_from_s3(service_id, data["id"])) except KeyError: raise InvalidRequest( diff --git a/docs/adrs/0013-log-debug-tags.md b/docs/adrs/0013-log-debug-tags.md new file mode 100644 index 000000000..83a88107b --- /dev/null +++ b/docs/adrs/0013-log-debug-tags.md @@ -0,0 +1,38 @@ +# Use of debug search tags for cloudwatch logs + +Status: Accepted +Date: 27 February 2025 + +### Context + +We encountered some recurrent problems in specific parts of the code that re-occurred regularly. + +We found it hard to debug these issues searching through cloudwatch logs as a particular issue +tends to be exploded into multiple log lines, and these lines are not even time sorted by default +so it can get quite hard on the eyes and test one's patience to debug this way. + +### Decision + +We decided to start using unique tags for individual issues so we could quickly search for related +groups of log lines. This worked fairly well and even though some of these issues at long last +have been resolved, it might be worthwhile to continue this debugging pattern. + +### Consequences + +Here are the existing tags and what they are used for: + +#notify-debug-admin-1200: job cache regeneration +#notify-debug-admin-1701: wrong sender phone number +#notify-debug-admin-1859: job creation time reset due to strange SqlAlchemy constructor issue setting wrong created_at time +#notify-debug-api-1385: a separate identify to resolve the wrong sender phone number issue +#notify-debug-s3-partitioning: modify s3 partitioning in line with best aws practices +#notify-debug-validate-phone-number: experimental code to try to validate phone numbers with aws pinpoint + + + + +### Author +@kenkehl + +### Stakeholders +@ccostino diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index e18e31f1b..e58a27275 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -451,9 +451,9 @@ def test_get_s3_files_success(client, mocker): mock_read_s3_file.assert_has_calls(calls, any_order=True) - # mock_current_app.info.assert_any_call("job_cache length before regen: 0 #notify-admin-1200") + # mock_current_app.info.assert_any_call("job_cache length before regen: 0 #notify-debug-admin-1200") - # mock_current_app.info.assert_any_call("job_cache length after regen: 0 #notify-admin-1200") + # mock_current_app.info.assert_any_call("job_cache length after regen: 0 #notify-debug-admin-1200") @patch("app.aws.s3.s3_client", None) # ensure it starts as None