Merge pull request #1899 from GSA/report_performance

cleanup backend report generation
This commit is contained in:
ccostino
2025-08-07 11:19:38 -04:00
committed by GitHub
2 changed files with 65 additions and 16 deletions

View File

@@ -267,9 +267,11 @@ def read_s3_file(bucket_name, object_key, s3res):
f"{job_id}_personalisation",
extract_personalisation(job),
)
except Exception as e:
current_app.logger.exception(str(e))
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey":
current_app.logger.error(f"NoSuchKey: {object_key}")
else:
raise
def get_s3_files():

View File

@@ -2,6 +2,7 @@ import csv
import io
import json
import os
import time
import gevent
from celery.signals import task_postrun
@@ -578,13 +579,15 @@ def _generate_notifications_report(service_id, report_id, limit_days):
include_from_test_key=include_from_test_key,
include_one_off=include_one_off,
)
count = 1
count = 0
if len(pagination.items) == 0:
current_app.logger.info(f"SKIP {service_id}")
return
start_time = time.time()
for notification in pagination.items:
count = count + 1
if notification.job_id is not None:
current_app.logger.debug(
f"Processing job_id {notification.job_id} which is row {count}"
)
count = count + 1
notification.personalisation = s3.get_personalisation_from_s3(
notification.service_id,
notification.job_id,
@@ -604,12 +607,17 @@ def _generate_notifications_report(service_id, report_id, limit_days):
notification.to = ""
notification.normalised_to = ""
current_app.logger.debug(
hilite(
f"Processing row {count} for service {service_id} and days {limit_days}"
)
)
notifications = [
notification.serialize_for_csv() for notification in pagination.items
]
current_app.logger.debug(
hilite(f"Number of notifications in report: {len(notifications)}")
)
current_app.logger.info(hilite(f"RAW: {notifications}"))
# We try and get the next page of results to work out if we need provide a pagination link to the next page
# in our response if it exists. Note, this could be done instead by changing `count_pages` in the previous
@@ -618,16 +626,49 @@ def _generate_notifications_report(service_id, report_id, limit_days):
# doesn't do an additional query to count all the results of which there could be millions but instead only
# asks for a single extra page of results).
# These columns are in the raw data but we don't show them in the report
columns_to_remove = {
"created_by_email_address",
"row_number",
"client_reference",
"template_type",
}
# cleanup for report presentation
header_renames = {
"recipient": "Phone Number",
"template_name": "Template",
"created_by_name": "Sent By",
"carrier": "Carrier",
"status": "Status",
"created_at": "Time",
"job_name": "Batch File",
"provider_response": "Carrier Response",
}
processed_notifications = []
for notification in notifications:
new_notification = {}
for old_key, new_key in header_renames.items():
if old_key not in columns_to_remove and old_key in notification:
new_notification[new_key] = notification[old_key]
processed_notifications.append(new_notification)
csv_bytes = io.BytesIO()
text_wrapper = io.TextIOWrapper(csv_bytes, encoding="utf-8", newline="")
writer = csv.writer(text_wrapper)
writer.writerows(notifications)
text_wrapper.flush()
text_stream = io.TextIOWrapper(csv_bytes, encoding="utf-8", newline="")
writer = csv.DictWriter(text_stream, fieldnames=header_renames.values())
writer.writeheader()
writer.writerows(processed_notifications)
text_stream.flush()
csv_bytes.seek(0)
bucket_name, file_location, access_key, secret_key, region = get_csv_location(
service_id, report_id
)
current_app.logger.info(
hilite(f"REPORT {file_location} {csv_bytes.getvalue().decode('utf-8')}")
)
if bucket_name == "":
exp_bucket = current_app.config["CSV_UPLOAD_BUCKET"]["bucket"]
exp_region = current_app.config["CSV_UPLOAD_BUCKET"]["region"]
@@ -645,7 +686,13 @@ def _generate_notifications_report(service_id, report_id, limit_days):
bucket_name=bucket_name,
file_location=file_location,
)
current_app.logger.info(f"generate-notifications-report uploaded {file_location}")
elapsed_time = str(time.time() - start_time)
elapsed_time = elapsed_time.split(".")
current_app.logger.info(
hilite(
f"generate-notifications-report uploaded {file_location} elapsed_time = {elapsed_time[0]} seconds"
)
)
@notify_celery.task(name="generate-notifications-reports")