Merge pull request #1899 from GSA/report_performance

cleanup backend report generation
2026-03-05 18:00:08 -05:00 · 2025-08-07 11:19:38 -04:00
parent 872a33d9af 2e1f8c4c15
commit 935546fd51
2 changed files with 65 additions and 16 deletions
--- a/app/aws/s3.py
+++ b/app/aws/s3.py
@@ -267,9 +267,11 @@ def read_s3_file(bucket_name, object_key, s3res):
                f"{job_id}_personalisation",
                extract_personalisation(job),
            )
-
-    except Exception as e:
-        current_app.logger.exception(str(e))
+    except botocore.exceptions.ClientError as e:
+        if e.response["Error"]["Code"] == "NoSuchKey":
+            current_app.logger.error(f"NoSuchKey: {object_key}")
+        else:
+            raise


 def get_s3_files():
--- a/app/celery/tasks.py
+++ b/app/celery/tasks.py
@@ -2,6 +2,7 @@ import csv
 import io
 import json
 import os
+import time

 import gevent
 from celery.signals import task_postrun
@@ -578,13 +579,15 @@ def _generate_notifications_report(service_id, report_id, limit_days):
        include_from_test_key=include_from_test_key,
        include_one_off=include_one_off,
    )
-    count = 1
+    count = 0
+    if len(pagination.items) == 0:
+        current_app.logger.info(f"SKIP {service_id}")
+        return
+    start_time = time.time()
    for notification in pagination.items:
+        count = count + 1
        if notification.job_id is not None:
-            current_app.logger.debug(
-                f"Processing job_id {notification.job_id} which is row {count}"
-            )
-            count = count + 1
+
            notification.personalisation = s3.get_personalisation_from_s3(
                notification.service_id,
                notification.job_id,
@@ -604,12 +607,17 @@ def _generate_notifications_report(service_id, report_id, limit_days):
            notification.to = ""
            notification.normalised_to = ""

+        current_app.logger.debug(
+            hilite(
+                f"Processing  row {count} for service {service_id} and days {limit_days}"
+            )
+        )
+
    notifications = [
        notification.serialize_for_csv() for notification in pagination.items
    ]
-    current_app.logger.debug(
-        hilite(f"Number of notifications in report: {len(notifications)}")
-    )
+
+    current_app.logger.info(hilite(f"RAW: {notifications}"))

    # We try and get the next page of results to work out if we need provide a pagination link to the next page
    # in our response if it exists. Note, this could be done instead by changing `count_pages` in the previous
@@ -618,16 +626,49 @@ def _generate_notifications_report(service_id, report_id, limit_days):
    # doesn't do an additional query to count all the results of which there could be millions but instead only
    # asks for a single extra page of results).

+    # These columns are in the raw data but we don't show them in the report
+    columns_to_remove = {
+        "created_by_email_address",
+        "row_number",
+        "client_reference",
+        "template_type",
+    }
+
+    # cleanup for report presentation
+    header_renames = {
+        "recipient": "Phone Number",
+        "template_name": "Template",
+        "created_by_name": "Sent By",
+        "carrier": "Carrier",
+        "status": "Status",
+        "created_at": "Time",
+        "job_name": "Batch File",
+        "provider_response": "Carrier Response",
+    }
+
+    processed_notifications = []
+    for notification in notifications:
+        new_notification = {}
+        for old_key, new_key in header_renames.items():
+            if old_key not in columns_to_remove and old_key in notification:
+                new_notification[new_key] = notification[old_key]
+        processed_notifications.append(new_notification)
+
    csv_bytes = io.BytesIO()
-    text_wrapper = io.TextIOWrapper(csv_bytes, encoding="utf-8", newline="")
-    writer = csv.writer(text_wrapper)
-    writer.writerows(notifications)
-    text_wrapper.flush()
+    text_stream = io.TextIOWrapper(csv_bytes, encoding="utf-8", newline="")
+    writer = csv.DictWriter(text_stream, fieldnames=header_renames.values())
+    writer.writeheader()
+    writer.writerows(processed_notifications)
+    text_stream.flush()
    csv_bytes.seek(0)

    bucket_name, file_location, access_key, secret_key, region = get_csv_location(
        service_id, report_id
    )
+
+    current_app.logger.info(
+        hilite(f"REPORT {file_location} {csv_bytes.getvalue().decode('utf-8')}")
+    )
    if bucket_name == "":
        exp_bucket = current_app.config["CSV_UPLOAD_BUCKET"]["bucket"]
        exp_region = current_app.config["CSV_UPLOAD_BUCKET"]["region"]
@@ -645,7 +686,13 @@ def _generate_notifications_report(service_id, report_id, limit_days):
        bucket_name=bucket_name,
        file_location=file_location,
    )
-    current_app.logger.info(f"generate-notifications-report uploaded {file_location}")
+    elapsed_time = str(time.time() - start_time)
+    elapsed_time = elapsed_time.split(".")
+    current_app.logger.info(
+        hilite(
+            f"generate-notifications-report uploaded {file_location} elapsed_time = {elapsed_time[0]} seconds"
+        )
+    )


@notify_celery.task(name="generate-notifications-reports")