add debug tag adr

This commit is contained in:
Kenneth Kehl
2025-02-27 13:24:32 -08:00
parent e9e69777de
commit 4cfb980ac6
10 changed files with 113 additions and 49 deletions

View File

@@ -118,7 +118,7 @@ def list_s3_objects():
break
except Exception:
current_app.logger.exception(
"An error occurred while regenerating cache #notify-admin-1200",
"An error occurred while regenerating cache #notify-debug-admin-1200",
)
@@ -200,7 +200,7 @@ def read_s3_file(bucket_name, object_key, s3res):
except LookupError:
# perhaps our key is not formatted as we expected. If so skip it.
current_app.logger.exception("LookupError #notify-admin-1200")
current_app.logger.exception("LookupError #notify-debug-admin-1200")
def get_s3_files():
@@ -213,7 +213,7 @@ def get_s3_files():
s3res = get_s3_resource()
current_app.logger.info(
f"job_cache length before regen: {len_job_cache()} #notify-admin-1200"
f"job_cache length before regen: {len_job_cache()} #notify-debug-admin-1200"
)
try:
with ThreadPoolExecutor() as executor:
@@ -222,7 +222,7 @@ def get_s3_files():
current_app.logger.exception("Connection pool issue")
current_app.logger.info(
f"job_cache length after regen: {len_job_cache()} #notify-admin-1200"
f"job_cache length after regen: {len_job_cache()} #notify-debug-admin-1200"
)
@@ -290,7 +290,7 @@ def file_exists(file_location):
def get_job_location(service_id, job_id):
current_app.logger.debug(
f"#s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
f"#notify-debug-s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
)
return (
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
@@ -308,7 +308,7 @@ def get_old_job_location(service_id, job_id):
Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE.
"""
current_app.logger.debug(
f"#s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
f"#notify-debug-s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
)
return (
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
@@ -507,7 +507,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
def get_job_metadata_from_s3(service_id, job_id):
current_app.logger.debug(
f"#s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}"
f"#notify-debug-s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}"
)
obj = get_s3_object(*get_job_location(service_id, job_id))
return obj.get()["Metadata"]

View File

@@ -39,7 +39,7 @@ class AwsPinpointClient(Client):
current_app.logger.info(hilite(response))
except ClientError:
current_app.logger.exception(
"#validate-phone-number Could not validate with pinpoint"
"#notify-debug-validate-phone-number Could not validate with pinpoint"
)
# TODO This is the structure of the response. When the phone validation

View File

@@ -68,15 +68,16 @@ class AwsSnsClient(SmsClient):
non_scrubbable = " ".join(sender)
self.current_app.logger.info(
f"notify-api-1385 sender {non_scrubbable} is a {type(sender)} default is a {type(default_num)}"
f"notify-debug-api-1385 sender {non_scrubbable} is a {type(sender)} \
default is a {type(default_num)}"
)
else:
self.current_app.logger.warning(
f"notify-api-1385 sender is type {type(sender)}!! {sender}"
f"notify-debug-api-1385 sender is type {type(sender)}!! {sender}"
)
if self._valid_sender_number(sender):
self.current_app.logger.info(
f"notify-api-1385 use valid sender {non_scrubbable} instead of default {default_num}"
f"notify-debug-api-1385 use valid sender {non_scrubbable} instead of default {default_num}"
)
attributes["AWS.MM.SMS.OriginationNumber"] = {
@@ -85,7 +86,7 @@ class AwsSnsClient(SmsClient):
}
else:
self.current_app.logger.info(
f"notify-api-1385 use default {default_num} instead of invalid sender"
f"notify-debug-api-1385 use default {default_num} instead of invalid sender"
)
attributes["AWS.MM.SMS.OriginationNumber"] = {

View File

@@ -158,17 +158,17 @@ def dao_create_job(job):
now_time = utc_now()
diff_time = now_time - orig_time
current_app.logger.info(
f"#notify-admin-1859 dao_create_job orig created at {orig_time} and now {now_time}"
f"#notify-debug-admin-1859 dao_create_job orig created at {orig_time} and now {now_time}"
)
if diff_time.total_seconds() > 300: # It should be only a few seconds diff at most
current_app.logger.error(
"#notify-admin-1859 Something is wrong with job.created_at!"
"#notify-debug-admin-1859 Something is wrong with job.created_at!"
)
if os.getenv("NOTIFY_ENVIRONMENT") not in ["test"]:
job.created_at = now_time
dao_update_job(job)
current_app.logger.error(
f"#notify-admin-1859 Job created_at reset to {job.created_at}"
f"#notify-debug-admin-1859 Job created_at reset to {job.created_at}"
)

View File

@@ -534,7 +534,9 @@ def dao_fetch_stats_for_service_from_hours(service_id, start_date, end_date):
# Update to group by HOUR instead of DAY
total_substmt = (
select(
func.date_trunc("hour", NotificationAllTimeView.created_at).label("hour"), # UPDATED
func.date_trunc("hour", NotificationAllTimeView.created_at).label(
"hour"
), # UPDATED
Job.notification_count.label("notification_count"),
)
.join(Job, NotificationAllTimeView.job_id == Job.id)
@@ -556,11 +558,14 @@ def dao_fetch_stats_for_service_from_hours(service_id, start_date, end_date):
total_stmt = select(
total_substmt.c.hour, # UPDATED
func.sum(total_substmt.c.notification_count).label("total_notifications"),
).group_by(total_substmt.c.hour) # UPDATED
).group_by(
total_substmt.c.hour
) # UPDATED
# Ensure we're using hourly timestamps in the response
total_notifications = {
row.hour: row.total_notifications for row in db.session.execute(total_stmt).all()
row.hour: row.total_notifications
for row in db.session.execute(total_stmt).all()
}
# Update the second query to also use "hour"
@@ -568,7 +573,9 @@ def dao_fetch_stats_for_service_from_hours(service_id, start_date, end_date):
select(
NotificationAllTimeView.notification_type,
NotificationAllTimeView.status,
func.date_trunc("hour", NotificationAllTimeView.created_at).label("hour"), # UPDATED
func.date_trunc("hour", NotificationAllTimeView.created_at).label(
"hour"
), # UPDATED
func.count(NotificationAllTimeView.id).label("count"),
)
.where(
@@ -895,7 +902,9 @@ def get_specific_days_stats(
return stats
def get_specific_hours_stats(data, start_date, hours=None, end_date=None, total_notifications=None):
def get_specific_hours_stats(
data, start_date, hours=None, end_date=None, total_notifications=None
):
if hours is not None and end_date is not None:
raise ValueError("Only set hours OR set end_date, not both.")
elif hours is not None:
@@ -919,10 +928,10 @@ def get_specific_hours_stats(data, start_date, hours=None, end_date=None, total_
# Format statistics, returning only hours with results
stats = {
hour.strftime("%Y-%m-%dT%H:00:00Z"): statistics.format_statistics(
rows,
total_notifications.get(hour, 0) if total_notifications else None
rows, total_notifications.get(hour, 0) if total_notifications else None
)
for hour, rows in grouped_data.items() if rows
for hour, rows in grouped_data.items()
if rows
}
return stats

View File

@@ -107,7 +107,7 @@ def send_sms_to_provider(notification):
sender_numbers = get_sender_numbers(notification)
if notification.reply_to_text not in sender_numbers:
raise ValueError(
f"{notification.reply_to_text} not in {sender_numbers} #notify-admin-1701"
f"{notification.reply_to_text} not in {sender_numbers} #notify-debug-admin-1701"
)
send_sms_kwargs = {
@@ -152,7 +152,7 @@ def _experimentally_validate_phone_numbers(recipient):
if recipient_lookup in current_app.config["SIMULATED_SMS_NUMBERS"] and os.getenv(
"NOTIFY_ENVIRONMENT"
) in ["development", "test"]:
current_app.logger.info(hilite("#validate-phone-number fired"))
current_app.logger.info(hilite("#notify-debug-validate-phone-number fired"))
aws_pinpoint_client.validate_phone_number("01", recipient)

View File

@@ -242,7 +242,9 @@ def create_job(service_id):
original_file_name = data.get("original_file_name")
data.update({"service": service_id})
try:
current_app.logger.info(f"#s3-partitioning DATA IN CREATE_JOB: {data}")
current_app.logger.info(
f"#notify-debug-s3-partitioning DATA IN CREATE_JOB: {data}"
)
data.update(**get_job_metadata_from_s3(service_id, data["id"]))
except KeyError:
raise InvalidRequest(

View File

@@ -0,0 +1,38 @@
# Use of debug search tags for cloudwatch logs
Status: Accepted
Date: 27 February 2025
### Context
We encountered some recurrent problems in specific parts of the code that re-occurred regularly.
We found it hard to debug these issues searching through cloudwatch logs as a particular issue
tends to be exploded into multiple log lines, and these lines are not even time sorted by default
so it can get quite hard on the eyes and test one's patience to debug this way.
### Decision
We decided to start using unique tags for individual issues so we could quickly search for related
groups of log lines. This worked fairly well and even though some of these issues at long last
have been resolved, it might be worthwhile to continue this debugging pattern.
### Consequences
Here are the existing tags and what they are used for:
#notify-debug-admin-1200: job cache regeneration
#notify-debug-admin-1701: wrong sender phone number
#notify-debug-admin-1859: job creation time reset due to strange SqlAlchemy constructor issue setting wrong created_at time
#notify-debug-api-1385: a separate identify to resolve the wrong sender phone number issue
#notify-debug-s3-partitioning: modify s3 partitioning in line with best aws practices
#notify-debug-validate-phone-number: experimental code to try to validate phone numbers with aws pinpoint
### Author
@kenkehl
### Stakeholders
@ccostino

View File

@@ -449,9 +449,9 @@ def test_get_s3_files_success(client, mocker):
mock_read_s3_file.assert_has_calls(calls, any_order=True)
# mock_current_app.info.assert_any_call("job_cache length before regen: 0 #notify-admin-1200")
# mock_current_app.info.assert_any_call("job_cache length before regen: 0 #notify-debug-admin-1200")
# mock_current_app.info.assert_any_call("job_cache length after regen: 0 #notify-admin-1200")
# mock_current_app.info.assert_any_call("job_cache length after regen: 0 #notify-debug-admin-1200")
@patch("app.aws.s3.s3_client", None) # ensure it starts as None

View File

@@ -7,7 +7,9 @@ from app.dao.services_dao import get_specific_hours_stats
from app.enums import StatisticsType
from app.models import TemplateType
NotificationRow = namedtuple("NotificationRow", ["notification_type", "status", "timestamp", "count"])
NotificationRow = namedtuple(
"NotificationRow", ["notification_type", "status", "timestamp", "count"]
)
def generate_expected_hourly_output(requested_sms_hours):
@@ -38,27 +40,31 @@ def create_mock_notification(notification_type, status, timestamp, count=1):
notification_type=notification_type,
status=status,
timestamp=timestamp.replace(minute=0, second=0, microsecond=0),
count=count
count=count,
)
test_cases = [
(
[create_mock_notification(
[
create_mock_notification(
TemplateType.SMS,
StatisticsType.REQUESTED,
datetime(2025, 2, 18, 14, 15, 0),
)],
)
],
datetime(2025, 2, 18, 12, 0),
6,
generate_expected_hourly_output(["2025-02-18T14:00:00Z"]),
),
(
[create_mock_notification(
[
create_mock_notification(
TemplateType.SMS,
StatisticsType.REQUESTED,
datetime(2025, 2, 18, 17, 59, 59),
)],
)
],
datetime(2025, 2, 18, 15, 0),
3,
generate_expected_hourly_output(["2025-02-18T17:00:00Z"]),
@@ -66,21 +72,29 @@ test_cases = [
([], datetime(2025, 2, 18, 10, 0), 4, {}),
(
[
create_mock_notification(TemplateType.SMS, StatisticsType.REQUESTED, datetime(2025, 2, 18, 9, 30, 0)),
create_mock_notification(TemplateType.SMS, StatisticsType.REQUESTED, datetime(2025, 2, 18, 11, 45, 0)),
create_mock_notification(
TemplateType.SMS,
StatisticsType.REQUESTED,
datetime(2025, 2, 18, 9, 30, 0),
),
create_mock_notification(
TemplateType.SMS,
StatisticsType.REQUESTED,
datetime(2025, 2, 18, 11, 45, 0),
),
],
datetime(2025, 2, 18, 8, 0),
5,
generate_expected_hourly_output(["2025-02-18T09:00:00Z", "2025-02-18T11:00:00Z"]),
generate_expected_hourly_output(
["2025-02-18T09:00:00Z", "2025-02-18T11:00:00Z"]
),
),
]
@pytest.mark.parametrize("mocked_notifications, start_date, hours, expected_output", test_cases)
@pytest.mark.parametrize(
"mocked_notifications, start_date, hours, expected_output", test_cases
)
def test_get_specific_hours(mocked_notifications, start_date, hours, expected_output):
results = get_specific_hours_stats(
mocked_notifications,
start_date,
hours=hours
)
results = get_specific_hours_stats(mocked_notifications, start_date, hours=hours)
assert results == expected_output, f"Expected {expected_output}, but got {results}"