diff --git a/app/aws/s3.py b/app/aws/s3.py index edb464ff1..5c91bf8fc 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -19,6 +19,12 @@ def get_s3_object(bucket_name, file_location): return s3.Object(bucket_name, file_location) +def head_s3_object(bucket_name, file_location): + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.head_object + boto_client = client('s3', current_app.config['AWS_REGION']) + return boto_client.head_object(Bucket=bucket_name, Key=file_location) + + def file_exists(bucket_name, file_location): try: # try and access metadata of object diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 6f31d2f91..a4649ce6c 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -1,6 +1,6 @@ import math import base64 -from datetime import datetime +from datetime import datetime, timedelta from uuid import UUID from hashlib import sha512 from base64 import urlsafe_b64encode @@ -14,6 +14,8 @@ from requests import ( from celery.exceptions import MaxRetriesExceededError from notifications_utils.statsd_decorators import statsd from notifications_utils.s3 import s3upload +from notifications_utils.letter_timings import LETTER_PROCESSING_DEADLINE +from notifications_utils.timezones import convert_utc_to_bst from app import encryption, notify_celery from app.aws import s3 @@ -25,7 +27,9 @@ from app.dao.notifications_dao import ( dao_get_notification_by_reference, dao_get_notifications_by_references, dao_update_notifications_by_reference, + dao_get_letters_to_be_printed, ) +from app.letters.utils import get_letter_pdf_filename from app.errors import VirusScanError from app.exceptions import NotificationTechnicalFailureException from app.letters.utils import ( @@ -117,26 +121,31 @@ def get_letters_pdf(template, contact_block, filename, values): @notify_celery.task(name='collate-letter-pdfs-for-day') @cronitor("collate-letter-pdfs-for-day") -def collate_letter_pdfs_for_day(date=None): - if not date: - # Using the truncated date is ok because UTC to BST does not make a difference to the date, - # since it is triggered mid afternoon. - date = datetime.utcnow().strftime("%Y-%m-%d") +def collate_letter_pdfs_for_day(): + """ + Finds all letters which are still waiting to be sent to DVLA for printing - letter_pdfs = sorted( - s3.get_s3_bucket_objects( - current_app.config['LETTERS_PDF_BUCKET_NAME'], - subfolder=date - ), - key=lambda letter: letter['Key'] - ) - for i, letters in enumerate(group_letters(letter_pdfs)): + This would usually be run at 5.50pm and collect up letters created between before 5:30pm today + that have not yet been sent. + If run after midnight, it will collect up letters created before 5:30pm the day before. + """ + date = convert_utc_to_bst(datetime.utcnow()) + if date.time() < LETTER_PROCESSING_DEADLINE: + date = date - timedelta(days=1) + + # Using the truncated date is ok because UTC to BST does not make a difference to the date, + # since it is triggered mid afternoon. + print_run_date = date.strftime("%Y-%m-%d") + + letters_to_print = get_key_and_size_of_letters_to_be_sent_to_print(print_run_date) + + for i, letters in enumerate(group_letters(letters_to_print)): filenames = [letter['Key'] for letter in letters] hash = urlsafe_b64encode(sha512(''.join(filenames).encode()).digest())[:20].decode() # eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP dvla_filename = 'NOTIFY.{date}.{num:03}.{hash}.ZIP'.format( - date=date, + date=print_run_date, num=i + 1, hash=hash ) @@ -159,6 +168,28 @@ def collate_letter_pdfs_for_day(date=None): ) +def get_key_and_size_of_letters_to_be_sent_to_print(print_run_date): + letters_awaiting_sending = dao_get_letters_to_be_printed(print_run_date) + + letter_pdfs = [] + for letter in letters_awaiting_sending: + letter_file_name = get_letter_pdf_filename( + reference=letter.reference, + crown=letter.service.crown, + sending_date=letter.created_at, + postage=letter.postage + ) + + current_app.logger.info( + f'Found notification {letter.id} to send to DVLA to print: {letter_file_name}' + ) + + letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name) + letter_pdfs.append({"Key": letter_file_name, "Size": letter_head['ContentLength']}) + + return letter_pdfs + + def group_letters(letter_pdfs): """ Group letters in chunks of MAX_LETTER_PDF_ZIP_FILESIZE. Will add files to lists, never going over that size. diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 5aa8d72a1..89b2bd1c1 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -731,6 +731,24 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ return notifications +def dao_get_letters_to_be_printed(print_run_date): + """ + Given a date for a print run, Return all letters created before 5:30pm that day that have not yet been sent + """ + last_processing_deadline = datetime.strptime(print_run_date, "%Y-%m-%d").replace( + hour=17, minute=30, second=0, microsecond=0 + ) + + notifications = Notification.query.filter( + Notification.created_at < convert_bst_to_utc(last_processing_deadline), + Notification.notification_type == LETTER_TYPE, + Notification.status == NOTIFICATION_CREATED + ).order_by( + Notification.created_at + ).all() + return notifications + + def dao_old_letters_with_created_status(): yesterday_bst = convert_utc_to_bst(datetime.utcnow()) - timedelta(days=1) last_processing_deadline = yesterday_bst.replace(hour=17, minute=30, second=0, microsecond=0) diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 58568d52e..efdab98db 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -2,7 +2,7 @@ from unittest.mock import Mock, call, ANY import base64 import boto3 -from datetime import datetime +from datetime import datetime, timedelta from moto import mock_s3 from flask import current_app from freezegun import freeze_time @@ -20,6 +20,7 @@ from app.celery.letters_pdf_tasks import ( create_letters_pdf, get_letters_pdf, collate_letter_pdfs_for_day, + get_key_and_size_of_letters_to_be_sent_to_print, group_letters, letter_in_created_state, process_sanitised_letter, @@ -242,27 +243,113 @@ def test_create_letters_gets_the_right_logo_when_service_has_letter_branding_log ) -def test_collate_letter_pdfs_for_day(notify_api, mocker): - mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects', return_value=[ - {'Key': 'B.pDf', 'Size': 2}, - {'Key': 'A.PDF', 'Size': 1}, - {'Key': 'C.pdf', 'Size': 3} +@freeze_time('2020-02-17 18:00:00') +def test_get_key_and_size_of_letters_to_be_sent_to_print(notify_api, mocker, sample_letter_template): + create_notification( + template=sample_letter_template, + status='created', + reference='ref0', + created_at=(datetime.now() - timedelta(hours=2)).isoformat() + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref1', + created_at=(datetime.now() - timedelta(hours=3)).isoformat() + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref2', + created_at=(datetime.now() - timedelta(days=2)).isoformat() + ) + + # notifications we don't expect to get sent to print as they are in the wrong status + for status in ['delivered', 'validation-failed', 'cancelled', 'sending']: + create_notification( + template=sample_letter_template, + status=status, + reference='ref3', + created_at=(datetime.now() - timedelta(days=2)).isoformat() + ) + + # notification we don't expect to get sent as instead will make into this evenings print run + create_notification( + template=sample_letter_template, + status='created', + reference='ref4', + created_at=(datetime.now() - timedelta(minutes=1)).isoformat() + ) + + mock_s3 = mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[ + {'ContentLength': 2}, + {'ContentLength': 1}, + {'ContentLength': 3}, ]) - mock_group_letters = mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[ - [{'Key': 'A.PDF', 'Size': 1}, {'Key': 'B.pDf', 'Size': 2}], - [{'Key': 'C.pdf', 'Size': 3}] + + results = get_key_and_size_of_letters_to_be_sent_to_print('2020-02-17') + + assert mock_s3.call_count == 3 + mock_s3.assert_has_calls( + [ + call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-16/NOTIFY.REF2.D.2.C.C.20200215180000.PDF'), + call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-17/NOTIFY.REF1.D.2.C.C.20200217150000.PDF'), + call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-17/NOTIFY.REF0.D.2.C.C.20200217160000.PDF'), + ] + ) + + assert len(results) == 3 + assert results == [ + {'Key': '2020-02-16/NOTIFY.REF2.D.2.C.C.20200215180000.PDF', 'Size': 2}, + {'Key': '2020-02-17/NOTIFY.REF1.D.2.C.C.20200217150000.PDF', 'Size': 1}, + {'Key': '2020-02-17/NOTIFY.REF0.D.2.C.C.20200217160000.PDF', 'Size': 3}, + ] + + +@freeze_time('2020-02-17 18:00:00') +def test_collate_letter_pdfs_for_day(notify_api, sample_letter_template, mocker): + create_notification( + template=sample_letter_template, + status='created', + reference='ref0', + created_at=(datetime.now() - timedelta(hours=2)).isoformat() + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref1', + created_at=(datetime.now() - timedelta(hours=3)).isoformat() + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref2', + created_at=(datetime.now() - timedelta(days=2)).isoformat() + ) + + mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[ + {'ContentLength': 2}, + {'ContentLength': 1}, + {'ContentLength': 3}, + ]) + mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[ + [{'Key': '2020-02-16/A.PDF', 'Size': 1}, {'Key': '2020-02-17/B.pDf', 'Size': 2}], + [{'Key': '2020-02-17/C.pdf', 'Size': 3}] ]) mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') - collate_letter_pdfs_for_day('2017-01-02') + collate_letter_pdfs_for_day() - mock_s3.assert_called_once_with('test-letters-pdf', subfolder='2017-01-02') - mock_group_letters.assert_called_once_with(sorted(mock_s3.return_value, key=lambda x: x['Key'])) + assert len(mock_celery.call_args_list) == 2 assert mock_celery.call_args_list[0] == call( name='zip-and-send-letter-pdfs', kwargs={ - 'filenames_to_zip': ['A.PDF', 'B.pDf'], - 'upload_filename': 'NOTIFY.2017-01-02.001.oqdjIM2-NAUU9Sm5Slmi.ZIP' + 'filenames_to_zip': ['2020-02-16/A.PDF', '2020-02-17/B.pDf'], + 'upload_filename': 'NOTIFY.2020-02-17.001.fGRjtoP1V9XUrng24Zzq.ZIP' }, queue='process-ftp-tasks', compression='zlib' @@ -270,20 +357,70 @@ def test_collate_letter_pdfs_for_day(notify_api, mocker): assert mock_celery.call_args_list[1] == call( name='zip-and-send-letter-pdfs', kwargs={ - 'filenames_to_zip': ['C.pdf'], - 'upload_filename': 'NOTIFY.2017-01-02.002.tdr7hcdPieiqjkVoS4kU.ZIP' + 'filenames_to_zip': ['2020-02-17/C.pdf'], + 'upload_filename': 'NOTIFY.2020-02-17.002.Ay8mP80pe12NH46clM08.ZIP' }, queue='process-ftp-tasks', compression='zlib' ) -@freeze_time('2018-09-12 17:50:00') -def test_collate_letter_pdfs_for_day_works_without_date_param(notify_api, mocker): - mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects') +@freeze_time('2020-02-18 02:00:00') +def test_collate_letter_pdfs_for_day_when_run_after_midnight(notify_api, sample_letter_template, mocker): + # created_at times for notifications choosen to match times in above test test_collate_letter_pdfs_for_day + create_notification( + template=sample_letter_template, + status='created', + reference='ref0', + created_at=(datetime.now() - timedelta(hours=10)).isoformat() + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref1', + created_at=(datetime.now() - timedelta(hours=11)).isoformat() + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref2', + created_at=(datetime.now() - timedelta(days=2, hours=8)).isoformat() + ) + + mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[ + {'ContentLength': 2}, + {'ContentLength': 1}, + {'ContentLength': 3}, + ]) + mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[ + [{'Key': '2020-02-16/A.PDF', 'Size': 1}, {'Key': '2020-02-17/B.pDf', 'Size': 2}], + [{'Key': '2020-02-17/C.pdf', 'Size': 3}] + ]) + mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') + collate_letter_pdfs_for_day() - expected_date = '2018-09-12' - mock_s3.assert_called_once_with('test-letters-pdf', subfolder=expected_date) + + assert len(mock_celery.call_args_list) == 2 + assert mock_celery.call_args_list[0] == call( + name='zip-and-send-letter-pdfs', + kwargs={ + 'filenames_to_zip': ['2020-02-16/A.PDF', '2020-02-17/B.pDf'], + 'upload_filename': 'NOTIFY.2020-02-17.001.fGRjtoP1V9XUrng24Zzq.ZIP' + }, + queue='process-ftp-tasks', + compression='zlib' + ) + assert mock_celery.call_args_list[1] == call( + name='zip-and-send-letter-pdfs', + kwargs={ + 'filenames_to_zip': ['2020-02-17/C.pdf'], + 'upload_filename': 'NOTIFY.2020-02-17.002.Ay8mP80pe12NH46clM08.ZIP' + }, + queue='process-ftp-tasks', + compression='zlib' + ) def test_group_letters_splits_on_file_size(notify_api, mocker):