mirror of
https://github.com/GSA/notifications-api.git
synced 2025-12-15 01:32:20 -05:00
add debug tag adr
This commit is contained in:
@@ -118,7 +118,7 @@ def list_s3_objects():
|
||||
break
|
||||
except Exception:
|
||||
current_app.logger.exception(
|
||||
"An error occurred while regenerating cache #notify-admin-1200",
|
||||
"An error occurred while regenerating cache #notify-debug-admin-1200",
|
||||
)
|
||||
|
||||
|
||||
@@ -200,7 +200,7 @@ def read_s3_file(bucket_name, object_key, s3res):
|
||||
|
||||
except LookupError:
|
||||
# perhaps our key is not formatted as we expected. If so skip it.
|
||||
current_app.logger.exception("LookupError #notify-admin-1200")
|
||||
current_app.logger.exception("LookupError #notify-debug-admin-1200")
|
||||
|
||||
|
||||
def get_s3_files():
|
||||
@@ -213,7 +213,7 @@ def get_s3_files():
|
||||
|
||||
s3res = get_s3_resource()
|
||||
current_app.logger.info(
|
||||
f"job_cache length before regen: {len_job_cache()} #notify-admin-1200"
|
||||
f"job_cache length before regen: {len_job_cache()} #notify-debug-admin-1200"
|
||||
)
|
||||
try:
|
||||
with ThreadPoolExecutor() as executor:
|
||||
@@ -222,7 +222,7 @@ def get_s3_files():
|
||||
current_app.logger.exception("Connection pool issue")
|
||||
|
||||
current_app.logger.info(
|
||||
f"job_cache length after regen: {len_job_cache()} #notify-admin-1200"
|
||||
f"job_cache length after regen: {len_job_cache()} #notify-debug-admin-1200"
|
||||
)
|
||||
|
||||
|
||||
@@ -290,7 +290,7 @@ def file_exists(file_location):
|
||||
|
||||
def get_job_location(service_id, job_id):
|
||||
current_app.logger.debug(
|
||||
f"#s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
|
||||
f"#notify-debug-s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
|
||||
)
|
||||
return (
|
||||
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
|
||||
@@ -308,7 +308,7 @@ def get_old_job_location(service_id, job_id):
|
||||
Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE.
|
||||
"""
|
||||
current_app.logger.debug(
|
||||
f"#s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
|
||||
f"#notify-debug-s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
|
||||
)
|
||||
return (
|
||||
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
|
||||
@@ -507,7 +507,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
|
||||
|
||||
def get_job_metadata_from_s3(service_id, job_id):
|
||||
current_app.logger.debug(
|
||||
f"#s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}"
|
||||
f"#notify-debug-s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}"
|
||||
)
|
||||
obj = get_s3_object(*get_job_location(service_id, job_id))
|
||||
return obj.get()["Metadata"]
|
||||
|
||||
@@ -39,7 +39,7 @@ class AwsPinpointClient(Client):
|
||||
current_app.logger.info(hilite(response))
|
||||
except ClientError:
|
||||
current_app.logger.exception(
|
||||
"#validate-phone-number Could not validate with pinpoint"
|
||||
"#notify-debug-validate-phone-number Could not validate with pinpoint"
|
||||
)
|
||||
|
||||
# TODO This is the structure of the response. When the phone validation
|
||||
|
||||
@@ -68,15 +68,16 @@ class AwsSnsClient(SmsClient):
|
||||
non_scrubbable = " ".join(sender)
|
||||
|
||||
self.current_app.logger.info(
|
||||
f"notify-api-1385 sender {non_scrubbable} is a {type(sender)} default is a {type(default_num)}"
|
||||
f"notify-debug-api-1385 sender {non_scrubbable} is a {type(sender)} \
|
||||
default is a {type(default_num)}"
|
||||
)
|
||||
else:
|
||||
self.current_app.logger.warning(
|
||||
f"notify-api-1385 sender is type {type(sender)}!! {sender}"
|
||||
f"notify-debug-api-1385 sender is type {type(sender)}!! {sender}"
|
||||
)
|
||||
if self._valid_sender_number(sender):
|
||||
self.current_app.logger.info(
|
||||
f"notify-api-1385 use valid sender {non_scrubbable} instead of default {default_num}"
|
||||
f"notify-debug-api-1385 use valid sender {non_scrubbable} instead of default {default_num}"
|
||||
)
|
||||
|
||||
attributes["AWS.MM.SMS.OriginationNumber"] = {
|
||||
@@ -85,7 +86,7 @@ class AwsSnsClient(SmsClient):
|
||||
}
|
||||
else:
|
||||
self.current_app.logger.info(
|
||||
f"notify-api-1385 use default {default_num} instead of invalid sender"
|
||||
f"notify-debug-api-1385 use default {default_num} instead of invalid sender"
|
||||
)
|
||||
|
||||
attributes["AWS.MM.SMS.OriginationNumber"] = {
|
||||
|
||||
@@ -158,17 +158,17 @@ def dao_create_job(job):
|
||||
now_time = utc_now()
|
||||
diff_time = now_time - orig_time
|
||||
current_app.logger.info(
|
||||
f"#notify-admin-1859 dao_create_job orig created at {orig_time} and now {now_time}"
|
||||
f"#notify-debug-admin-1859 dao_create_job orig created at {orig_time} and now {now_time}"
|
||||
)
|
||||
if diff_time.total_seconds() > 300: # It should be only a few seconds diff at most
|
||||
current_app.logger.error(
|
||||
"#notify-admin-1859 Something is wrong with job.created_at!"
|
||||
"#notify-debug-admin-1859 Something is wrong with job.created_at!"
|
||||
)
|
||||
if os.getenv("NOTIFY_ENVIRONMENT") not in ["test"]:
|
||||
job.created_at = now_time
|
||||
dao_update_job(job)
|
||||
current_app.logger.error(
|
||||
f"#notify-admin-1859 Job created_at reset to {job.created_at}"
|
||||
f"#notify-debug-admin-1859 Job created_at reset to {job.created_at}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -534,7 +534,9 @@ def dao_fetch_stats_for_service_from_hours(service_id, start_date, end_date):
|
||||
# Update to group by HOUR instead of DAY
|
||||
total_substmt = (
|
||||
select(
|
||||
func.date_trunc("hour", NotificationAllTimeView.created_at).label("hour"), # UPDATED
|
||||
func.date_trunc("hour", NotificationAllTimeView.created_at).label(
|
||||
"hour"
|
||||
), # UPDATED
|
||||
Job.notification_count.label("notification_count"),
|
||||
)
|
||||
.join(Job, NotificationAllTimeView.job_id == Job.id)
|
||||
@@ -556,11 +558,14 @@ def dao_fetch_stats_for_service_from_hours(service_id, start_date, end_date):
|
||||
total_stmt = select(
|
||||
total_substmt.c.hour, # UPDATED
|
||||
func.sum(total_substmt.c.notification_count).label("total_notifications"),
|
||||
).group_by(total_substmt.c.hour) # UPDATED
|
||||
).group_by(
|
||||
total_substmt.c.hour
|
||||
) # UPDATED
|
||||
|
||||
# Ensure we're using hourly timestamps in the response
|
||||
total_notifications = {
|
||||
row.hour: row.total_notifications for row in db.session.execute(total_stmt).all()
|
||||
row.hour: row.total_notifications
|
||||
for row in db.session.execute(total_stmt).all()
|
||||
}
|
||||
|
||||
# Update the second query to also use "hour"
|
||||
@@ -568,7 +573,9 @@ def dao_fetch_stats_for_service_from_hours(service_id, start_date, end_date):
|
||||
select(
|
||||
NotificationAllTimeView.notification_type,
|
||||
NotificationAllTimeView.status,
|
||||
func.date_trunc("hour", NotificationAllTimeView.created_at).label("hour"), # UPDATED
|
||||
func.date_trunc("hour", NotificationAllTimeView.created_at).label(
|
||||
"hour"
|
||||
), # UPDATED
|
||||
func.count(NotificationAllTimeView.id).label("count"),
|
||||
)
|
||||
.where(
|
||||
@@ -895,7 +902,9 @@ def get_specific_days_stats(
|
||||
return stats
|
||||
|
||||
|
||||
def get_specific_hours_stats(data, start_date, hours=None, end_date=None, total_notifications=None):
|
||||
def get_specific_hours_stats(
|
||||
data, start_date, hours=None, end_date=None, total_notifications=None
|
||||
):
|
||||
if hours is not None and end_date is not None:
|
||||
raise ValueError("Only set hours OR set end_date, not both.")
|
||||
elif hours is not None:
|
||||
@@ -919,10 +928,10 @@ def get_specific_hours_stats(data, start_date, hours=None, end_date=None, total_
|
||||
# Format statistics, returning only hours with results
|
||||
stats = {
|
||||
hour.strftime("%Y-%m-%dT%H:00:00Z"): statistics.format_statistics(
|
||||
rows,
|
||||
total_notifications.get(hour, 0) if total_notifications else None
|
||||
rows, total_notifications.get(hour, 0) if total_notifications else None
|
||||
)
|
||||
for hour, rows in grouped_data.items() if rows
|
||||
for hour, rows in grouped_data.items()
|
||||
if rows
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
@@ -107,7 +107,7 @@ def send_sms_to_provider(notification):
|
||||
sender_numbers = get_sender_numbers(notification)
|
||||
if notification.reply_to_text not in sender_numbers:
|
||||
raise ValueError(
|
||||
f"{notification.reply_to_text} not in {sender_numbers} #notify-admin-1701"
|
||||
f"{notification.reply_to_text} not in {sender_numbers} #notify-debug-admin-1701"
|
||||
)
|
||||
|
||||
send_sms_kwargs = {
|
||||
@@ -152,7 +152,7 @@ def _experimentally_validate_phone_numbers(recipient):
|
||||
if recipient_lookup in current_app.config["SIMULATED_SMS_NUMBERS"] and os.getenv(
|
||||
"NOTIFY_ENVIRONMENT"
|
||||
) in ["development", "test"]:
|
||||
current_app.logger.info(hilite("#validate-phone-number fired"))
|
||||
current_app.logger.info(hilite("#notify-debug-validate-phone-number fired"))
|
||||
aws_pinpoint_client.validate_phone_number("01", recipient)
|
||||
|
||||
|
||||
|
||||
@@ -242,7 +242,9 @@ def create_job(service_id):
|
||||
original_file_name = data.get("original_file_name")
|
||||
data.update({"service": service_id})
|
||||
try:
|
||||
current_app.logger.info(f"#s3-partitioning DATA IN CREATE_JOB: {data}")
|
||||
current_app.logger.info(
|
||||
f"#notify-debug-s3-partitioning DATA IN CREATE_JOB: {data}"
|
||||
)
|
||||
data.update(**get_job_metadata_from_s3(service_id, data["id"]))
|
||||
except KeyError:
|
||||
raise InvalidRequest(
|
||||
|
||||
38
docs/adrs/0013-log-debug-tags.md
Normal file
38
docs/adrs/0013-log-debug-tags.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Use of debug search tags for cloudwatch logs
|
||||
|
||||
Status: Accepted
|
||||
Date: 27 February 2025
|
||||
|
||||
### Context
|
||||
|
||||
We encountered some recurrent problems in specific parts of the code that re-occurred regularly.
|
||||
|
||||
We found it hard to debug these issues searching through cloudwatch logs as a particular issue
|
||||
tends to be exploded into multiple log lines, and these lines are not even time sorted by default
|
||||
so it can get quite hard on the eyes and test one's patience to debug this way.
|
||||
|
||||
### Decision
|
||||
|
||||
We decided to start using unique tags for individual issues so we could quickly search for related
|
||||
groups of log lines. This worked fairly well and even though some of these issues at long last
|
||||
have been resolved, it might be worthwhile to continue this debugging pattern.
|
||||
|
||||
### Consequences
|
||||
|
||||
Here are the existing tags and what they are used for:
|
||||
|
||||
#notify-debug-admin-1200: job cache regeneration
|
||||
#notify-debug-admin-1701: wrong sender phone number
|
||||
#notify-debug-admin-1859: job creation time reset due to strange SqlAlchemy constructor issue setting wrong created_at time
|
||||
#notify-debug-api-1385: a separate identify to resolve the wrong sender phone number issue
|
||||
#notify-debug-s3-partitioning: modify s3 partitioning in line with best aws practices
|
||||
#notify-debug-validate-phone-number: experimental code to try to validate phone numbers with aws pinpoint
|
||||
|
||||
|
||||
|
||||
|
||||
### Author
|
||||
@kenkehl
|
||||
|
||||
### Stakeholders
|
||||
@ccostino
|
||||
@@ -449,9 +449,9 @@ def test_get_s3_files_success(client, mocker):
|
||||
|
||||
mock_read_s3_file.assert_has_calls(calls, any_order=True)
|
||||
|
||||
# mock_current_app.info.assert_any_call("job_cache length before regen: 0 #notify-admin-1200")
|
||||
# mock_current_app.info.assert_any_call("job_cache length before regen: 0 #notify-debug-admin-1200")
|
||||
|
||||
# mock_current_app.info.assert_any_call("job_cache length after regen: 0 #notify-admin-1200")
|
||||
# mock_current_app.info.assert_any_call("job_cache length after regen: 0 #notify-debug-admin-1200")
|
||||
|
||||
|
||||
@patch("app.aws.s3.s3_client", None) # ensure it starts as None
|
||||
|
||||
@@ -7,7 +7,9 @@ from app.dao.services_dao import get_specific_hours_stats
|
||||
from app.enums import StatisticsType
|
||||
from app.models import TemplateType
|
||||
|
||||
NotificationRow = namedtuple("NotificationRow", ["notification_type", "status", "timestamp", "count"])
|
||||
NotificationRow = namedtuple(
|
||||
"NotificationRow", ["notification_type", "status", "timestamp", "count"]
|
||||
)
|
||||
|
||||
|
||||
def generate_expected_hourly_output(requested_sms_hours):
|
||||
@@ -38,27 +40,31 @@ def create_mock_notification(notification_type, status, timestamp, count=1):
|
||||
notification_type=notification_type,
|
||||
status=status,
|
||||
timestamp=timestamp.replace(minute=0, second=0, microsecond=0),
|
||||
count=count
|
||||
count=count,
|
||||
)
|
||||
|
||||
|
||||
test_cases = [
|
||||
(
|
||||
[create_mock_notification(
|
||||
[
|
||||
create_mock_notification(
|
||||
TemplateType.SMS,
|
||||
StatisticsType.REQUESTED,
|
||||
datetime(2025, 2, 18, 14, 15, 0),
|
||||
)],
|
||||
)
|
||||
],
|
||||
datetime(2025, 2, 18, 12, 0),
|
||||
6,
|
||||
generate_expected_hourly_output(["2025-02-18T14:00:00Z"]),
|
||||
),
|
||||
(
|
||||
[create_mock_notification(
|
||||
[
|
||||
create_mock_notification(
|
||||
TemplateType.SMS,
|
||||
StatisticsType.REQUESTED,
|
||||
datetime(2025, 2, 18, 17, 59, 59),
|
||||
)],
|
||||
)
|
||||
],
|
||||
datetime(2025, 2, 18, 15, 0),
|
||||
3,
|
||||
generate_expected_hourly_output(["2025-02-18T17:00:00Z"]),
|
||||
@@ -66,21 +72,29 @@ test_cases = [
|
||||
([], datetime(2025, 2, 18, 10, 0), 4, {}),
|
||||
(
|
||||
[
|
||||
create_mock_notification(TemplateType.SMS, StatisticsType.REQUESTED, datetime(2025, 2, 18, 9, 30, 0)),
|
||||
create_mock_notification(TemplateType.SMS, StatisticsType.REQUESTED, datetime(2025, 2, 18, 11, 45, 0)),
|
||||
create_mock_notification(
|
||||
TemplateType.SMS,
|
||||
StatisticsType.REQUESTED,
|
||||
datetime(2025, 2, 18, 9, 30, 0),
|
||||
),
|
||||
create_mock_notification(
|
||||
TemplateType.SMS,
|
||||
StatisticsType.REQUESTED,
|
||||
datetime(2025, 2, 18, 11, 45, 0),
|
||||
),
|
||||
],
|
||||
datetime(2025, 2, 18, 8, 0),
|
||||
5,
|
||||
generate_expected_hourly_output(["2025-02-18T09:00:00Z", "2025-02-18T11:00:00Z"]),
|
||||
generate_expected_hourly_output(
|
||||
["2025-02-18T09:00:00Z", "2025-02-18T11:00:00Z"]
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mocked_notifications, start_date, hours, expected_output", test_cases)
|
||||
@pytest.mark.parametrize(
|
||||
"mocked_notifications, start_date, hours, expected_output", test_cases
|
||||
)
|
||||
def test_get_specific_hours(mocked_notifications, start_date, hours, expected_output):
|
||||
results = get_specific_hours_stats(
|
||||
mocked_notifications,
|
||||
start_date,
|
||||
hours=hours
|
||||
)
|
||||
results = get_specific_hours_stats(mocked_notifications, start_date, hours=hours)
|
||||
assert results == expected_output, f"Expected {expected_output}, but got {results}"
|
||||
|
||||
Reference in New Issue
Block a user