From 421c1aac96bf0e07ed12b95d778aabdaa3411921 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Mon, 11 May 2020 10:51:33 +0100 Subject: [PATCH] Group uploaded letters by day of printing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some teams have started uploading quite a lot of letters (in the hundreds per week). They’re also uploading CSVs of emails. This means the uploads page ends up quite jumbled. This is because: - there’s just a lot of items to scan through - conceptually it’s a bit odd to have batches of things displayed alongside individual things on the same page So instead this commit starts grouping together uploaded letters. It does this by the date on which we ‘start’ printing them, or in other words the time at which they can no longer be cancelled. This feels like a natural grouping, and it matches what we know about people’s mental models of ‘batches’ and ‘runs’ when talking about printing. The code for this is a bit gnarly because: - timezones - the print cutoff doesn’t align with the end of a day - we have to do this in SQL because it wouldn’t be efficient to query thousands of letters and then do the timezone calculations on them in Python --- app/dao/uploads_dao.py | 57 +++++++--- app/upload/rest.py | 2 +- tests/app/dao/test_uploads_dao.py | 172 +++++++++++++++++++++--------- tests/app/upload/test_rest.py | 47 +++----- 4 files changed, 179 insertions(+), 99 deletions(-) diff --git a/app/dao/uploads_dao.py b/app/dao/uploads_dao.py index ceab5659f..0bed6a1fa 100644 --- a/app/dao/uploads_dao.py +++ b/app/dao/uploads_dao.py @@ -1,6 +1,6 @@ from datetime import datetime from flask import current_app -from sqlalchemy import and_, desc, func, literal, String +from sqlalchemy import and_, desc, func, literal, text, String from app import db from app.models import ( @@ -10,6 +10,25 @@ from app.models import ( from app.utils import midnight_n_days_ago +def _get_printing_day(created_at): + return func.date_trunc( + 'day', + func.timezone('Europe/London', func.timezone('UTC', created_at)) + text( + "interval '6 hours 30 minutes'" + ) + ) + + +def _get_printing_datetime(created_at): + return _get_printing_day(created_at) + text( + "interval '17 hours 30 minutes'" + ) + + +def _naive_gmt_to_utc(column): + return func.timezone('UTC', func.timezone('Europe/London', column)) + + def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size=50): # Hardcoded filter to exclude cancelled or scheduled jobs # for the moment, but we may want to change this method take 'statuses' as a argument in the future @@ -56,24 +75,13 @@ def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size Notification.status != NOTIFICATION_CANCELLED, Template.hidden == True, Notification.created_at >= today - func.coalesce(ServiceDataRetention.days_of_retention, 7) - ] if limit_days is not None: letters_query_filter.append(Notification.created_at >= midnight_n_days_ago(limit_days)) - letters_query = db.session.query( - Notification.id, - Notification.client_reference.label('original_file_name'), - literal('1').label('notification_count'), - literal(None).label('template_type'), - func.coalesce(ServiceDataRetention.days_of_retention, 7).label('days_of_retention'), - Notification.created_at.label("created_at"), - literal(None).label('scheduled_for'), - # letters don't have a processing_started date but we want created_at to be used for sorting - Notification.created_at.label('processing_started'), - Notification.status, - literal('letter').label('upload_type'), - Notification.to.label('recipient'), + letters_subquery = db.session.query( + func.count().label('notification_count'), + _naive_gmt_to_utc(_get_printing_datetime(Notification.created_at)).label('printing_at'), ).join( Template, Notification.template_id == Template.id ).outerjoin( @@ -83,6 +91,25 @@ def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size ) ).filter( *letters_query_filter + ).group_by( + 'printing_at' + ).subquery() + + letters_query = db.session.query( + literal(None).label('id'), + literal('Uploaded letters').label('original_file_name'), + letters_subquery.c.notification_count.label('notification_count'), + literal('letter').label('template_type'), + literal(None).label('days_of_retention'), + letters_subquery.c.printing_at.label('created_at'), + literal(None).label('scheduled_for'), + letters_subquery.c.printing_at.label('processing_started'), + literal(None).label('status'), + literal('letter_day').label('upload_type'), + literal(None).label('recipient'), + ).group_by( + letters_subquery.c.notification_count, + letters_subquery.c.printing_at, ) return jobs_query.union_all( diff --git a/app/upload/rest.py b/app/upload/rest.py index e0358ca77..7ba0dc8fc 100644 --- a/app/upload/rest.py +++ b/app/upload/rest.py @@ -61,7 +61,7 @@ def get_paginated_uploads(service_id, limit_days, page): upload_dict['statistics'] = [{'status': statistic.status, 'count': statistic.count} for statistic in statistics] else: - upload_dict['statistics'] = [{'status': upload.status, 'count': 1}] + upload_dict['statistics'] = [] data.append(upload_dict) return { diff --git a/tests/app/dao/test_uploads_dao.py b/tests/app/dao/test_uploads_dao.py index 909a6b461..e9c8d5cb7 100644 --- a/tests/app/dao/test_uploads_dao.py +++ b/tests/app/dao/test_uploads_dao.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +from freezegun import freeze_time from app.dao.uploads_dao import dao_get_uploads_by_service_id from app.models import LETTER_TYPE, JOB_STATUS_IN_PROGRESS @@ -30,6 +31,7 @@ def create_uploaded_template(service): ) +@freeze_time("2020-02-02 14:00") # GMT time def test_get_uploads_for_service(sample_template): create_service_data_retention(sample_template.service, 'sms', days_of_retention=9) job = create_job(sample_template, processing_started=datetime.utcnow()) @@ -40,7 +42,7 @@ def test_get_uploads_for_service(sample_template): other_template = create_template(service=other_service) other_job = create_job(other_template, processing_started=datetime.utcnow()) other_letter_template = create_uploaded_template(other_service) - other_letter = create_uploaded_letter(other_letter_template, other_service) + create_uploaded_letter(other_letter_template, other_service) uploads_from_db = dao_get_uploads_by_service_id(job.service_id).items other_uploads_from_db = dao_get_uploads_by_service_id(other_job.service_id).items @@ -48,17 +50,17 @@ def test_get_uploads_for_service(sample_template): assert len(uploads_from_db) == 2 assert uploads_from_db[0] == ( - letter.id, - letter.client_reference, + None, + 'Uploaded letters', 1, + 'letter', None, - 7, - letter.created_at, + letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0), + None, + letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0), + None, + 'letter_day', None, - letter.created_at, - letter.status, - "letter", - "file-name", ) assert uploads_from_db[1] == ( job.id, @@ -75,17 +77,19 @@ def test_get_uploads_for_service(sample_template): ) assert len(other_uploads_from_db) == 2 - assert other_uploads_from_db[0] == (other_letter.id, - other_letter.client_reference, - 1, - None, - 7, - other_letter.created_at, - None, - other_letter.created_at, - other_letter.status, - "letter", - "file-name") + assert other_uploads_from_db[0] == ( + None, + 'Uploaded letters', + 1, + 'letter', + None, + letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0), + None, + letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0), + None, + "letter_day", + None, + ) assert other_uploads_from_db[1] == (other_job.id, other_job.original_file_name, other_job.notification_count, @@ -98,10 +102,48 @@ def test_get_uploads_for_service(sample_template): "job", None) - assert uploads_from_db[0] != other_uploads_from_db[0] assert uploads_from_db[1] != other_uploads_from_db[1] +@freeze_time("2020-02-02 18:00") +def test_get_uploads_for_service_groups_letters(sample_template): + letter_template = create_uploaded_template(sample_template.service) + + # Just gets into yesterday’s print run + create_uploaded_letter(letter_template, sample_template.service, created_at=( + datetime(2020, 2, 1, 17, 29, 59) + )) + + # Yesterday but in today’s print run + create_uploaded_letter(letter_template, sample_template.service, created_at=( + datetime(2020, 2, 1, 17, 30) + )) + # First thing today + create_uploaded_letter(letter_template, sample_template.service, created_at=( + datetime(2020, 2, 2, 0, 0) + )) + # Just before today’s print deadline + create_uploaded_letter(letter_template, sample_template.service, created_at=( + datetime(2020, 2, 2, 17, 29, 59) + )) + + # Just missed today’s print deadline + create_uploaded_letter(letter_template, sample_template.service, created_at=( + datetime(2020, 2, 2, 17, 30) + )) + + uploads_from_db = dao_get_uploads_by_service_id(sample_template.service_id).items + + assert [ + (upload.notification_count, upload.created_at) + for upload in uploads_from_db + ] == [ + (1, datetime(2020, 2, 3, 17, 30)), + (3, datetime(2020, 2, 2, 17, 30)), + (1, datetime(2020, 2, 1, 17, 30)), + ] + + def test_get_uploads_does_not_return_cancelled_jobs_or_letters(sample_template): create_job(sample_template, job_status='scheduled') create_job(sample_template, job_status='cancelled') @@ -118,14 +160,17 @@ def test_get_uploads_orders_by_created_at_desc(sample_template): job_status=JOB_STATUS_IN_PROGRESS) upload_2 = create_job(sample_template, processing_started=datetime.utcnow(), job_status=JOB_STATUS_IN_PROGRESS) - upload_3 = create_uploaded_letter(letter_template, sample_template.service, status='delivered') + create_uploaded_letter(letter_template, sample_template.service, status='delivered') results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items - assert len(results) == 3 - assert results[0].id == upload_3.id - assert results[1].id == upload_2.id - assert results[2].id == upload_1.id + assert [ + (result.id, result.upload_type) for result in results + ] == [ + (None, 'letter_day'), + (upload_2.id, 'job'), + (upload_1.id, 'job'), + ] def test_get_uploads_orders_by_processing_started_desc(sample_template): @@ -148,25 +193,26 @@ def test_get_uploads_orders_by_processing_started_and_created_at_desc(sample_tem letter_template = create_uploaded_template(sample_template.service) days_ago = datetime.utcnow() - timedelta(days=4) - upload_1 = create_uploaded_letter(letter_template, service=letter_template.service) + create_uploaded_letter(letter_template, service=letter_template.service) upload_2 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(days=1), created_at=days_ago, job_status=JOB_STATUS_IN_PROGRESS) upload_3 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(days=2), created_at=days_ago, job_status=JOB_STATUS_IN_PROGRESS) - upload_4 = create_uploaded_letter(letter_template, service=letter_template.service, - created_at=datetime.utcnow() - timedelta(days=3)) + create_uploaded_letter(letter_template, service=letter_template.service, + created_at=datetime.utcnow() - timedelta(days=3)) results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items assert len(results) == 4 - assert results[0].id == upload_1.id + assert results[0].id is None assert results[1].id == upload_2.id assert results[2].id == upload_3.id - assert results[3].id == upload_4.id + assert results[3].id is None +@freeze_time('2020-04-02 14:00') # Few days after the clocks go forward def test_get_uploads_only_gets_uploads_within_service_retention_period(sample_template): letter_template = create_uploaded_template(sample_template.service) create_service_data_retention(sample_template.service, 'sms', days_of_retention=3) @@ -199,36 +245,58 @@ def test_get_uploads_only_gets_uploads_within_service_retention_period(sample_te results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items assert len(results) == 4 - assert results[0].id == upload_1.id - assert results[1].id == upload_2.id - assert results[2].id == upload_3.id - assert results[3].id == upload_4.id + + # Uploaded letters get their `created_at` shifted time of printing + # 17:30 BST == 16:30 UTC + assert results[0].created_at == upload_1.created_at.replace(hour=16, minute=30, second=0, microsecond=0) + + # Jobs keep their original `created_at` + assert results[1].created_at == upload_2.created_at.replace(hour=14, minute=00, second=0, microsecond=0) + + # Still in BST here… + assert results[2].created_at == upload_3.created_at.replace(hour=16, minute=30, second=0, microsecond=0) + + # Now we’ve gone far enough back to be in GMT + # 17:30 GMT == 17:30 UTC + assert results[3].created_at == upload_4.created_at.replace(hour=17, minute=30, second=0, microsecond=0) +@freeze_time('2020-02-02 14:00') def test_get_uploads_is_paginated(sample_template): letter_template = create_uploaded_template(sample_template.service) - upload_1 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', - created_at=datetime.utcnow() - timedelta(minutes=3)) - upload_2 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(minutes=2), - job_status=JOB_STATUS_IN_PROGRESS) - upload_3 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', - created_at=datetime.utcnow() - timedelta(minutes=1)) - upload_4 = create_job(sample_template, processing_started=datetime.utcnow(), job_status=JOB_STATUS_IN_PROGRESS) + create_uploaded_letter( + letter_template, sample_template.service, status='delivered', + created_at=datetime.utcnow() - timedelta(minutes=3), + ) + create_job( + sample_template, processing_started=datetime.utcnow() - timedelta(minutes=2), + job_status=JOB_STATUS_IN_PROGRESS, + ) + create_uploaded_letter( + letter_template, sample_template.service, status='delivered', + created_at=datetime.utcnow() - timedelta(minutes=1), + ) + create_job( + sample_template, processing_started=datetime.utcnow(), + job_status=JOB_STATUS_IN_PROGRESS, + ) - results = dao_get_uploads_by_service_id(sample_template.service_id, page=1, page_size=2) + results = dao_get_uploads_by_service_id(sample_template.service_id, page=1, page_size=1) - assert results.per_page == 2 - assert results.total == 4 - assert len(results.items) == 2 - assert results.items[0].id == upload_4.id - assert results.items[1].id == upload_3.id + assert results.per_page == 1 + assert results.total == 3 + assert len(results.items) == 1 + assert results.items[0].created_at == datetime.utcnow().replace(hour=17, minute=30, second=0, microsecond=0) + assert results.items[0].notification_count == 2 + assert results.items[0].upload_type == 'letter_day' - results = dao_get_uploads_by_service_id(sample_template.service_id, page=2, page_size=2) + results = dao_get_uploads_by_service_id(sample_template.service_id, page=2, page_size=1) - assert len(results.items) == 2 - assert results.items[0].id == upload_2.id - assert results.items[1].id == upload_1.id + assert len(results.items) == 1 + assert results.items[0].created_at == datetime.utcnow().replace(hour=14, minute=0, second=0, microsecond=0) + assert results.items[0].notification_count == 1 + assert results.items[0].upload_type == 'job' def test_get_uploads_returns_empty_list(sample_service): diff --git a/tests/app/upload/test_rest.py b/tests/app/upload/test_rest.py index d7575ced7..d9cb78ba9 100644 --- a/tests/app/upload/test_rest.py +++ b/tests/app/upload/test_rest.py @@ -32,16 +32,17 @@ def create_precompiled_template(service): ) +@freeze_time('2020-02-02 14:00') def test_get_uploads(admin_request, sample_template): letter_template = create_precompiled_template(sample_template.service) - upload_1 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', - created_at=datetime.utcnow() - timedelta(minutes=4)) + create_uploaded_letter(letter_template, sample_template.service, status='delivered', + created_at=datetime.utcnow() - timedelta(minutes=4)) upload_2 = create_job(template=sample_template, processing_started=datetime.utcnow() - timedelta(minutes=3), job_status=JOB_STATUS_FINISHED) - upload_3 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', - created_at=datetime.utcnow() - timedelta(minutes=2)) + create_uploaded_letter(letter_template, sample_template.service, status='delivered', + created_at=datetime.utcnow() - timedelta(minutes=2)) upload_4 = create_job(template=sample_template, processing_started=datetime.utcnow() - timedelta(minutes=1), job_status=JOB_STATUS_FINISHED) @@ -52,7 +53,7 @@ def test_get_uploads(admin_request, sample_template): resp_json = admin_request.get('upload.get_uploads_by_service', service_id=service_id) data = resp_json['data'] - assert len(data) == 5 + assert len(data) == 4 assert data[0] == {'id': str(upload_5.id), 'original_file_name': 'some.csv', 'recipient': None, @@ -61,23 +62,15 @@ def test_get_uploads(admin_request, sample_template): 'created_at': upload_5.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'statistics': [], 'upload_type': 'job'} - assert data[1] == {'id': str(upload_4.id), - 'original_file_name': 'some.csv', + assert data[1] == {'id': None, + 'original_file_name': 'Uploaded letters', 'recipient': None, - 'notification_count': 1, - 'template_type': 'sms', - 'created_at': upload_4.created_at.strftime( + 'notification_count': 2, + 'template_type': 'letter', + 'created_at': upload_4.created_at.replace(hour=17, minute=30).strftime( "%Y-%m-%d %H:%M:%S"), 'statistics': [], - 'upload_type': 'job'} - assert data[2] == {'id': str(upload_3.id), - 'original_file_name': "file-name", - 'recipient': '742 Evergreen Terrace', - 'notification_count': 1, - 'template_type': None, - 'created_at': upload_3.created_at.strftime("%Y-%m-%d %H:%M:%S"), - 'statistics': [{'count': 1, 'status': 'delivered'}], - 'upload_type': 'letter'} + 'upload_type': 'letter_day'} assert data[3] == {'id': str(upload_2.id), 'original_file_name': "some.csv", 'recipient': None, @@ -87,14 +80,6 @@ def test_get_uploads(admin_request, sample_template): "%Y-%m-%d %H:%M:%S"), 'statistics': [], 'upload_type': 'job'} - assert data[4] == {'id': str(upload_1.id), - 'original_file_name': "file-name", - 'recipient': '742 Evergreen Terrace', - 'notification_count': 1, - 'template_type': None, - 'created_at': upload_1.created_at.strftime("%Y-%m-%d %H:%M:%S"), - 'statistics': [{'count': 1, 'status': 'delivered'}], - 'upload_type': 'letter'} def test_get_uploads_should_return_statistics(admin_request, sample_template): @@ -110,8 +95,8 @@ def test_get_uploads_should_return_statistics(admin_request, sample_template): create_notification(template=sample_template, job=job_3, status='sending') letter_template = create_precompiled_template(sample_template.service) - letter_1 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', - created_at=datetime.utcnow() - timedelta(days=3)) + create_uploaded_letter(letter_template, sample_template.service, status='delivered', + created_at=datetime.utcnow() - timedelta(days=3)) resp_json = admin_request.get('upload.get_uploads_by_service', service_id=sample_template.service_id)['data'] assert len(resp_json) == 4 @@ -121,8 +106,8 @@ def test_get_uploads_should_return_statistics(admin_request, sample_template): assert resp_json[1]['statistics'] == [{'status': 'sending', 'count': 4}] assert resp_json[2]['id'] == str(job_2.id) assert resp_json[2]['statistics'] == [{'status': 'created', 'count': 3}] - assert resp_json[3]['id'] == str(letter_1.id) - assert resp_json[3]['statistics'] == [{'status': 'delivered', 'count': 1}] + assert resp_json[3]['id'] is None + assert resp_json[3]['statistics'] == [] def test_get_uploads_should_paginate(admin_request, sample_template):