Group uploaded letters by day of printing

Some teams have started uploading quite a lot of letters (in the
hundreds per week). They’re also uploading CSVs of emails. This means
the uploads page ends up quite jumbled.

This is because:
- there’s just a lot of items to scan through
- conceptually it’s a bit odd to have batches of things displayed
  alongside individual things on the same page

So instead this commit starts grouping together uploaded letters. It
does this by the date on which we ‘start’ printing them, or in other
words the time at which they can no longer be cancelled.

This feels like a natural grouping, and it matches what we know about
people’s mental models of ‘batches’ and ‘runs’ when talking about
printing.

The code for this is a bit gnarly because:
- timezones
- the print cutoff doesn’t align with the end of a day
- we have to do this in SQL because it wouldn’t be efficient to query
  thousands of letters and then do the timezone calculations on them in
  Python
This commit is contained in:
Chris Hill-Scott
2020-05-11 10:51:33 +01:00
parent 79646f7b4a
commit 421c1aac96
4 changed files with 179 additions and 99 deletions

View File

@@ -1,6 +1,6 @@
from datetime import datetime from datetime import datetime
from flask import current_app from flask import current_app
from sqlalchemy import and_, desc, func, literal, String from sqlalchemy import and_, desc, func, literal, text, String
from app import db from app import db
from app.models import ( from app.models import (
@@ -10,6 +10,25 @@ from app.models import (
from app.utils import midnight_n_days_ago from app.utils import midnight_n_days_ago
def _get_printing_day(created_at):
return func.date_trunc(
'day',
func.timezone('Europe/London', func.timezone('UTC', created_at)) + text(
"interval '6 hours 30 minutes'"
)
)
def _get_printing_datetime(created_at):
return _get_printing_day(created_at) + text(
"interval '17 hours 30 minutes'"
)
def _naive_gmt_to_utc(column):
return func.timezone('UTC', func.timezone('Europe/London', column))
def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size=50): def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size=50):
# Hardcoded filter to exclude cancelled or scheduled jobs # Hardcoded filter to exclude cancelled or scheduled jobs
# for the moment, but we may want to change this method take 'statuses' as a argument in the future # for the moment, but we may want to change this method take 'statuses' as a argument in the future
@@ -56,24 +75,13 @@ def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size
Notification.status != NOTIFICATION_CANCELLED, Notification.status != NOTIFICATION_CANCELLED,
Template.hidden == True, Template.hidden == True,
Notification.created_at >= today - func.coalesce(ServiceDataRetention.days_of_retention, 7) Notification.created_at >= today - func.coalesce(ServiceDataRetention.days_of_retention, 7)
] ]
if limit_days is not None: if limit_days is not None:
letters_query_filter.append(Notification.created_at >= midnight_n_days_ago(limit_days)) letters_query_filter.append(Notification.created_at >= midnight_n_days_ago(limit_days))
letters_query = db.session.query( letters_subquery = db.session.query(
Notification.id, func.count().label('notification_count'),
Notification.client_reference.label('original_file_name'), _naive_gmt_to_utc(_get_printing_datetime(Notification.created_at)).label('printing_at'),
literal('1').label('notification_count'),
literal(None).label('template_type'),
func.coalesce(ServiceDataRetention.days_of_retention, 7).label('days_of_retention'),
Notification.created_at.label("created_at"),
literal(None).label('scheduled_for'),
# letters don't have a processing_started date but we want created_at to be used for sorting
Notification.created_at.label('processing_started'),
Notification.status,
literal('letter').label('upload_type'),
Notification.to.label('recipient'),
).join( ).join(
Template, Notification.template_id == Template.id Template, Notification.template_id == Template.id
).outerjoin( ).outerjoin(
@@ -83,6 +91,25 @@ def dao_get_uploads_by_service_id(service_id, limit_days=None, page=1, page_size
) )
).filter( ).filter(
*letters_query_filter *letters_query_filter
).group_by(
'printing_at'
).subquery()
letters_query = db.session.query(
literal(None).label('id'),
literal('Uploaded letters').label('original_file_name'),
letters_subquery.c.notification_count.label('notification_count'),
literal('letter').label('template_type'),
literal(None).label('days_of_retention'),
letters_subquery.c.printing_at.label('created_at'),
literal(None).label('scheduled_for'),
letters_subquery.c.printing_at.label('processing_started'),
literal(None).label('status'),
literal('letter_day').label('upload_type'),
literal(None).label('recipient'),
).group_by(
letters_subquery.c.notification_count,
letters_subquery.c.printing_at,
) )
return jobs_query.union_all( return jobs_query.union_all(

View File

@@ -61,7 +61,7 @@ def get_paginated_uploads(service_id, limit_days, page):
upload_dict['statistics'] = [{'status': statistic.status, 'count': statistic.count} for statistic in upload_dict['statistics'] = [{'status': statistic.status, 'count': statistic.count} for statistic in
statistics] statistics]
else: else:
upload_dict['statistics'] = [{'status': upload.status, 'count': 1}] upload_dict['statistics'] = []
data.append(upload_dict) data.append(upload_dict)
return { return {

View File

@@ -1,4 +1,5 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from freezegun import freeze_time
from app.dao.uploads_dao import dao_get_uploads_by_service_id from app.dao.uploads_dao import dao_get_uploads_by_service_id
from app.models import LETTER_TYPE, JOB_STATUS_IN_PROGRESS from app.models import LETTER_TYPE, JOB_STATUS_IN_PROGRESS
@@ -30,6 +31,7 @@ def create_uploaded_template(service):
) )
@freeze_time("2020-02-02 14:00") # GMT time
def test_get_uploads_for_service(sample_template): def test_get_uploads_for_service(sample_template):
create_service_data_retention(sample_template.service, 'sms', days_of_retention=9) create_service_data_retention(sample_template.service, 'sms', days_of_retention=9)
job = create_job(sample_template, processing_started=datetime.utcnow()) job = create_job(sample_template, processing_started=datetime.utcnow())
@@ -40,7 +42,7 @@ def test_get_uploads_for_service(sample_template):
other_template = create_template(service=other_service) other_template = create_template(service=other_service)
other_job = create_job(other_template, processing_started=datetime.utcnow()) other_job = create_job(other_template, processing_started=datetime.utcnow())
other_letter_template = create_uploaded_template(other_service) other_letter_template = create_uploaded_template(other_service)
other_letter = create_uploaded_letter(other_letter_template, other_service) create_uploaded_letter(other_letter_template, other_service)
uploads_from_db = dao_get_uploads_by_service_id(job.service_id).items uploads_from_db = dao_get_uploads_by_service_id(job.service_id).items
other_uploads_from_db = dao_get_uploads_by_service_id(other_job.service_id).items other_uploads_from_db = dao_get_uploads_by_service_id(other_job.service_id).items
@@ -48,17 +50,17 @@ def test_get_uploads_for_service(sample_template):
assert len(uploads_from_db) == 2 assert len(uploads_from_db) == 2
assert uploads_from_db[0] == ( assert uploads_from_db[0] == (
letter.id, None,
letter.client_reference, 'Uploaded letters',
1, 1,
'letter',
None, None,
7, letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0),
letter.created_at, None,
letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0),
None,
'letter_day',
None, None,
letter.created_at,
letter.status,
"letter",
"file-name",
) )
assert uploads_from_db[1] == ( assert uploads_from_db[1] == (
job.id, job.id,
@@ -75,17 +77,19 @@ def test_get_uploads_for_service(sample_template):
) )
assert len(other_uploads_from_db) == 2 assert len(other_uploads_from_db) == 2
assert other_uploads_from_db[0] == (other_letter.id, assert other_uploads_from_db[0] == (
other_letter.client_reference, None,
1, 'Uploaded letters',
None, 1,
7, 'letter',
other_letter.created_at, None,
None, letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0),
other_letter.created_at, None,
other_letter.status, letter.created_at.replace(hour=17, minute=30, second=0, microsecond=0),
"letter", None,
"file-name") "letter_day",
None,
)
assert other_uploads_from_db[1] == (other_job.id, assert other_uploads_from_db[1] == (other_job.id,
other_job.original_file_name, other_job.original_file_name,
other_job.notification_count, other_job.notification_count,
@@ -98,10 +102,48 @@ def test_get_uploads_for_service(sample_template):
"job", "job",
None) None)
assert uploads_from_db[0] != other_uploads_from_db[0]
assert uploads_from_db[1] != other_uploads_from_db[1] assert uploads_from_db[1] != other_uploads_from_db[1]
@freeze_time("2020-02-02 18:00")
def test_get_uploads_for_service_groups_letters(sample_template):
letter_template = create_uploaded_template(sample_template.service)
# Just gets into yesterdays print run
create_uploaded_letter(letter_template, sample_template.service, created_at=(
datetime(2020, 2, 1, 17, 29, 59)
))
# Yesterday but in todays print run
create_uploaded_letter(letter_template, sample_template.service, created_at=(
datetime(2020, 2, 1, 17, 30)
))
# First thing today
create_uploaded_letter(letter_template, sample_template.service, created_at=(
datetime(2020, 2, 2, 0, 0)
))
# Just before todays print deadline
create_uploaded_letter(letter_template, sample_template.service, created_at=(
datetime(2020, 2, 2, 17, 29, 59)
))
# Just missed todays print deadline
create_uploaded_letter(letter_template, sample_template.service, created_at=(
datetime(2020, 2, 2, 17, 30)
))
uploads_from_db = dao_get_uploads_by_service_id(sample_template.service_id).items
assert [
(upload.notification_count, upload.created_at)
for upload in uploads_from_db
] == [
(1, datetime(2020, 2, 3, 17, 30)),
(3, datetime(2020, 2, 2, 17, 30)),
(1, datetime(2020, 2, 1, 17, 30)),
]
def test_get_uploads_does_not_return_cancelled_jobs_or_letters(sample_template): def test_get_uploads_does_not_return_cancelled_jobs_or_letters(sample_template):
create_job(sample_template, job_status='scheduled') create_job(sample_template, job_status='scheduled')
create_job(sample_template, job_status='cancelled') create_job(sample_template, job_status='cancelled')
@@ -118,14 +160,17 @@ def test_get_uploads_orders_by_created_at_desc(sample_template):
job_status=JOB_STATUS_IN_PROGRESS) job_status=JOB_STATUS_IN_PROGRESS)
upload_2 = create_job(sample_template, processing_started=datetime.utcnow(), upload_2 = create_job(sample_template, processing_started=datetime.utcnow(),
job_status=JOB_STATUS_IN_PROGRESS) job_status=JOB_STATUS_IN_PROGRESS)
upload_3 = create_uploaded_letter(letter_template, sample_template.service, status='delivered') create_uploaded_letter(letter_template, sample_template.service, status='delivered')
results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items
assert len(results) == 3 assert [
assert results[0].id == upload_3.id (result.id, result.upload_type) for result in results
assert results[1].id == upload_2.id ] == [
assert results[2].id == upload_1.id (None, 'letter_day'),
(upload_2.id, 'job'),
(upload_1.id, 'job'),
]
def test_get_uploads_orders_by_processing_started_desc(sample_template): def test_get_uploads_orders_by_processing_started_desc(sample_template):
@@ -148,25 +193,26 @@ def test_get_uploads_orders_by_processing_started_and_created_at_desc(sample_tem
letter_template = create_uploaded_template(sample_template.service) letter_template = create_uploaded_template(sample_template.service)
days_ago = datetime.utcnow() - timedelta(days=4) days_ago = datetime.utcnow() - timedelta(days=4)
upload_1 = create_uploaded_letter(letter_template, service=letter_template.service) create_uploaded_letter(letter_template, service=letter_template.service)
upload_2 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(days=1), upload_2 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(days=1),
created_at=days_ago, created_at=days_ago,
job_status=JOB_STATUS_IN_PROGRESS) job_status=JOB_STATUS_IN_PROGRESS)
upload_3 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(days=2), upload_3 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(days=2),
created_at=days_ago, created_at=days_ago,
job_status=JOB_STATUS_IN_PROGRESS) job_status=JOB_STATUS_IN_PROGRESS)
upload_4 = create_uploaded_letter(letter_template, service=letter_template.service, create_uploaded_letter(letter_template, service=letter_template.service,
created_at=datetime.utcnow() - timedelta(days=3)) created_at=datetime.utcnow() - timedelta(days=3))
results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items
assert len(results) == 4 assert len(results) == 4
assert results[0].id == upload_1.id assert results[0].id is None
assert results[1].id == upload_2.id assert results[1].id == upload_2.id
assert results[2].id == upload_3.id assert results[2].id == upload_3.id
assert results[3].id == upload_4.id assert results[3].id is None
@freeze_time('2020-04-02 14:00') # Few days after the clocks go forward
def test_get_uploads_only_gets_uploads_within_service_retention_period(sample_template): def test_get_uploads_only_gets_uploads_within_service_retention_period(sample_template):
letter_template = create_uploaded_template(sample_template.service) letter_template = create_uploaded_template(sample_template.service)
create_service_data_retention(sample_template.service, 'sms', days_of_retention=3) create_service_data_retention(sample_template.service, 'sms', days_of_retention=3)
@@ -199,36 +245,58 @@ def test_get_uploads_only_gets_uploads_within_service_retention_period(sample_te
results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items results = dao_get_uploads_by_service_id(service_id=sample_template.service_id).items
assert len(results) == 4 assert len(results) == 4
assert results[0].id == upload_1.id
assert results[1].id == upload_2.id # Uploaded letters get their `created_at` shifted time of printing
assert results[2].id == upload_3.id # 17:30 BST == 16:30 UTC
assert results[3].id == upload_4.id assert results[0].created_at == upload_1.created_at.replace(hour=16, minute=30, second=0, microsecond=0)
# Jobs keep their original `created_at`
assert results[1].created_at == upload_2.created_at.replace(hour=14, minute=00, second=0, microsecond=0)
# Still in BST here…
assert results[2].created_at == upload_3.created_at.replace(hour=16, minute=30, second=0, microsecond=0)
# Now weve gone far enough back to be in GMT
# 17:30 GMT == 17:30 UTC
assert results[3].created_at == upload_4.created_at.replace(hour=17, minute=30, second=0, microsecond=0)
@freeze_time('2020-02-02 14:00')
def test_get_uploads_is_paginated(sample_template): def test_get_uploads_is_paginated(sample_template):
letter_template = create_uploaded_template(sample_template.service) letter_template = create_uploaded_template(sample_template.service)
upload_1 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', create_uploaded_letter(
created_at=datetime.utcnow() - timedelta(minutes=3)) letter_template, sample_template.service, status='delivered',
upload_2 = create_job(sample_template, processing_started=datetime.utcnow() - timedelta(minutes=2), created_at=datetime.utcnow() - timedelta(minutes=3),
job_status=JOB_STATUS_IN_PROGRESS) )
upload_3 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', create_job(
created_at=datetime.utcnow() - timedelta(minutes=1)) sample_template, processing_started=datetime.utcnow() - timedelta(minutes=2),
upload_4 = create_job(sample_template, processing_started=datetime.utcnow(), job_status=JOB_STATUS_IN_PROGRESS) job_status=JOB_STATUS_IN_PROGRESS,
)
create_uploaded_letter(
letter_template, sample_template.service, status='delivered',
created_at=datetime.utcnow() - timedelta(minutes=1),
)
create_job(
sample_template, processing_started=datetime.utcnow(),
job_status=JOB_STATUS_IN_PROGRESS,
)
results = dao_get_uploads_by_service_id(sample_template.service_id, page=1, page_size=2) results = dao_get_uploads_by_service_id(sample_template.service_id, page=1, page_size=1)
assert results.per_page == 2 assert results.per_page == 1
assert results.total == 4 assert results.total == 3
assert len(results.items) == 2 assert len(results.items) == 1
assert results.items[0].id == upload_4.id assert results.items[0].created_at == datetime.utcnow().replace(hour=17, minute=30, second=0, microsecond=0)
assert results.items[1].id == upload_3.id assert results.items[0].notification_count == 2
assert results.items[0].upload_type == 'letter_day'
results = dao_get_uploads_by_service_id(sample_template.service_id, page=2, page_size=2) results = dao_get_uploads_by_service_id(sample_template.service_id, page=2, page_size=1)
assert len(results.items) == 2 assert len(results.items) == 1
assert results.items[0].id == upload_2.id assert results.items[0].created_at == datetime.utcnow().replace(hour=14, minute=0, second=0, microsecond=0)
assert results.items[1].id == upload_1.id assert results.items[0].notification_count == 1
assert results.items[0].upload_type == 'job'
def test_get_uploads_returns_empty_list(sample_service): def test_get_uploads_returns_empty_list(sample_service):

View File

@@ -32,16 +32,17 @@ def create_precompiled_template(service):
) )
@freeze_time('2020-02-02 14:00')
def test_get_uploads(admin_request, sample_template): def test_get_uploads(admin_request, sample_template):
letter_template = create_precompiled_template(sample_template.service) letter_template = create_precompiled_template(sample_template.service)
upload_1 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', create_uploaded_letter(letter_template, sample_template.service, status='delivered',
created_at=datetime.utcnow() - timedelta(minutes=4)) created_at=datetime.utcnow() - timedelta(minutes=4))
upload_2 = create_job(template=sample_template, upload_2 = create_job(template=sample_template,
processing_started=datetime.utcnow() - timedelta(minutes=3), processing_started=datetime.utcnow() - timedelta(minutes=3),
job_status=JOB_STATUS_FINISHED) job_status=JOB_STATUS_FINISHED)
upload_3 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', create_uploaded_letter(letter_template, sample_template.service, status='delivered',
created_at=datetime.utcnow() - timedelta(minutes=2)) created_at=datetime.utcnow() - timedelta(minutes=2))
upload_4 = create_job(template=sample_template, upload_4 = create_job(template=sample_template,
processing_started=datetime.utcnow() - timedelta(minutes=1), processing_started=datetime.utcnow() - timedelta(minutes=1),
job_status=JOB_STATUS_FINISHED) job_status=JOB_STATUS_FINISHED)
@@ -52,7 +53,7 @@ def test_get_uploads(admin_request, sample_template):
resp_json = admin_request.get('upload.get_uploads_by_service', service_id=service_id) resp_json = admin_request.get('upload.get_uploads_by_service', service_id=service_id)
data = resp_json['data'] data = resp_json['data']
assert len(data) == 5 assert len(data) == 4
assert data[0] == {'id': str(upload_5.id), assert data[0] == {'id': str(upload_5.id),
'original_file_name': 'some.csv', 'original_file_name': 'some.csv',
'recipient': None, 'recipient': None,
@@ -61,23 +62,15 @@ def test_get_uploads(admin_request, sample_template):
'created_at': upload_5.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'created_at': upload_5.created_at.strftime("%Y-%m-%d %H:%M:%S"),
'statistics': [], 'statistics': [],
'upload_type': 'job'} 'upload_type': 'job'}
assert data[1] == {'id': str(upload_4.id), assert data[1] == {'id': None,
'original_file_name': 'some.csv', 'original_file_name': 'Uploaded letters',
'recipient': None, 'recipient': None,
'notification_count': 1, 'notification_count': 2,
'template_type': 'sms', 'template_type': 'letter',
'created_at': upload_4.created_at.strftime( 'created_at': upload_4.created_at.replace(hour=17, minute=30).strftime(
"%Y-%m-%d %H:%M:%S"), "%Y-%m-%d %H:%M:%S"),
'statistics': [], 'statistics': [],
'upload_type': 'job'} 'upload_type': 'letter_day'}
assert data[2] == {'id': str(upload_3.id),
'original_file_name': "file-name",
'recipient': '742 Evergreen Terrace',
'notification_count': 1,
'template_type': None,
'created_at': upload_3.created_at.strftime("%Y-%m-%d %H:%M:%S"),
'statistics': [{'count': 1, 'status': 'delivered'}],
'upload_type': 'letter'}
assert data[3] == {'id': str(upload_2.id), assert data[3] == {'id': str(upload_2.id),
'original_file_name': "some.csv", 'original_file_name': "some.csv",
'recipient': None, 'recipient': None,
@@ -87,14 +80,6 @@ def test_get_uploads(admin_request, sample_template):
"%Y-%m-%d %H:%M:%S"), "%Y-%m-%d %H:%M:%S"),
'statistics': [], 'statistics': [],
'upload_type': 'job'} 'upload_type': 'job'}
assert data[4] == {'id': str(upload_1.id),
'original_file_name': "file-name",
'recipient': '742 Evergreen Terrace',
'notification_count': 1,
'template_type': None,
'created_at': upload_1.created_at.strftime("%Y-%m-%d %H:%M:%S"),
'statistics': [{'count': 1, 'status': 'delivered'}],
'upload_type': 'letter'}
def test_get_uploads_should_return_statistics(admin_request, sample_template): def test_get_uploads_should_return_statistics(admin_request, sample_template):
@@ -110,8 +95,8 @@ def test_get_uploads_should_return_statistics(admin_request, sample_template):
create_notification(template=sample_template, job=job_3, status='sending') create_notification(template=sample_template, job=job_3, status='sending')
letter_template = create_precompiled_template(sample_template.service) letter_template = create_precompiled_template(sample_template.service)
letter_1 = create_uploaded_letter(letter_template, sample_template.service, status='delivered', create_uploaded_letter(letter_template, sample_template.service, status='delivered',
created_at=datetime.utcnow() - timedelta(days=3)) created_at=datetime.utcnow() - timedelta(days=3))
resp_json = admin_request.get('upload.get_uploads_by_service', service_id=sample_template.service_id)['data'] resp_json = admin_request.get('upload.get_uploads_by_service', service_id=sample_template.service_id)['data']
assert len(resp_json) == 4 assert len(resp_json) == 4
@@ -121,8 +106,8 @@ def test_get_uploads_should_return_statistics(admin_request, sample_template):
assert resp_json[1]['statistics'] == [{'status': 'sending', 'count': 4}] assert resp_json[1]['statistics'] == [{'status': 'sending', 'count': 4}]
assert resp_json[2]['id'] == str(job_2.id) assert resp_json[2]['id'] == str(job_2.id)
assert resp_json[2]['statistics'] == [{'status': 'created', 'count': 3}] assert resp_json[2]['statistics'] == [{'status': 'created', 'count': 3}]
assert resp_json[3]['id'] == str(letter_1.id) assert resp_json[3]['id'] is None
assert resp_json[3]['statistics'] == [{'status': 'delivered', 'count': 1}] assert resp_json[3]['statistics'] == []
def test_get_uploads_should_paginate(admin_request, sample_template): def test_get_uploads_should_paginate(admin_request, sample_template):