Group uploaded letters by day of printing

Some teams have started uploading quite a lot of letters (in the hundreds per week). They’re also uploading CSVs of emails. This means the uploads page ends up quite jumbled. This is because: - there’s just a lot of items to scan through - conceptually it’s a bit odd to have batches of things displayed alongside individual things on the same page So instead this commit starts grouping together uploaded letters. It does this by the date on which we ‘start’ printing them, or in other words the time at which they can no longer be cancelled. This feels like a natural grouping, and it matches what we know about people’s mental models of ‘batches’ and ‘runs’ when talking about printing. The code for this is a bit gnarly because: - timezones - the print cutoff doesn’t align with the end of a day - we have to do this in SQL because it wouldn’t be efficient to query thousands of letters and then do the timezone calculations on them in Python
2026-02-03 09:51:11 -05:00 · 2020-05-11 10:51:33 +01:00
parent 79646f7b4a
commit 421c1aac96
4 changed files with 179 additions and 99 deletions
--- a/app/dao/uploads_dao.py
+++ b/app/dao/uploads_dao.py
@@ -1,6 +1,6 @@
 from datetime import datetime
 from flask import current_app
-from sqlalchemy import and_, desc, func, literal, String
+from sqlalchemy import and_, desc, func, literal, text, String

 from app import db
 from app.models import (
@@ -10,6 +10,25 @@ from app.models import (
 from app.utils import midnight_n_days_ago


+def _get_printing_day(created_at):
+    return func.date_trunc(
+        'day',
+        func.timezone('Europe/London', func.timezone('UTC', created_at)) + text(
+            "interval '6 hours 30 minutes'"
+        )
+    )
+
+
+def _get_printing_datetime(created_at):
+    return _get_printing_day(created_at) + text(
+        "interval '17 hours 30 minutes'"
+    )
+
+
+def _naive_gmt_to_utc(column):
+    return func.timezone('UTC', func.timezone('Europe/London', column))
+
+
 def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size=50):
    # Hardcoded filter to exclude cancelled or scheduled jobs
    # for the moment, but we may want to change this method take 'statuses' as a argument in the future
@@ -56,24 +75,13 @@ def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size
        Notification.status != NOTIFICATION_CANCELLED,
        Template.hidden == True,
        Notification.created_at >= today - func.coalesce(ServiceDataRetention.days_of_retention, 7)
-
    ]
    if limit_days is not None:
        letters_query_filter.append(Notification.created_at >= midnight_n_days_ago(limit_days))

-    letters_query = db.session.query(
-        Notification.id,
-        Notification.client_reference.label('original_file_name'),
-        literal('1').label('notification_count'),
-        literal(None).label('template_type'),
-        func.coalesce(ServiceDataRetention.days_of_retention, 7).label('days_of_retention'),
-        Notification.created_at.label("created_at"),
-        literal(None).label('scheduled_for'),
-        # letters don't have a processing_started date but we want created_at to be used for sorting
-        Notification.created_at.label('processing_started'),
-        Notification.status,
-        literal('letter').label('upload_type'),
-        Notification.to.label('recipient'),
+    letters_subquery = db.session.query(
+        func.count().label('notification_count'),
+        _naive_gmt_to_utc(_get_printing_datetime(Notification.created_at)).label('printing_at'),
    ).join(
        Template, Notification.template_id == Template.id
    ).outerjoin(
@@ -83,6 +91,25 @@ def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size
        )
    ).filter(
        *letters_query_filter
+    ).group_by(
+        'printing_at'
+    ).subquery()
+
+    letters_query = db.session.query(
+        literal(None).label('id'),
+        literal('Uploaded letters').label('original_file_name'),
+        letters_subquery.c.notification_count.label('notification_count'),
+        literal('letter').label('template_type'),
+        literal(None).label('days_of_retention'),
+        letters_subquery.c.printing_at.label('created_at'),
+        literal(None).label('scheduled_for'),
+        letters_subquery.c.printing_at.label('processing_started'),
+        literal(None).label('status'),
+        literal('letter_day').label('upload_type'),
+        literal(None).label('recipient'),
+    ).group_by(
+        letters_subquery.c.notification_count,
+        letters_subquery.c.printing_at,
    )

    return jobs_query.union_all(