diff --git a/app/aws/s3.py b/app/aws/s3.py index 01fa04ac3..5c91bf8fc 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -19,6 +19,12 @@ def get_s3_object(bucket_name, file_location): return s3.Object(bucket_name, file_location) +def head_s3_object(bucket_name, file_location): + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.head_object + boto_client = client('s3', current_app.config['AWS_REGION']) + return boto_client.head_object(Bucket=bucket_name, Key=file_location) + + def file_exists(bucket_name, file_location): try: # try and access metadata of object @@ -56,7 +62,7 @@ def remove_job_from_s3(service_id, job_id): return remove_s3_object(*get_job_location(service_id, job_id)) -def get_s3_bucket_objects(bucket_name, subfolder='', older_than=7, limit_days=2): +def get_s3_bucket_objects(bucket_name, subfolder=''): boto_client = client('s3', current_app.config['AWS_REGION']) paginator = boto_client.get_paginator('list_objects_v2') page_iterator = paginator.paginate( diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 6f31d2f91..b78028596 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -1,6 +1,6 @@ import math import base64 -from datetime import datetime +from datetime import datetime, timedelta from uuid import UUID from hashlib import sha512 from base64 import urlsafe_b64encode @@ -14,6 +14,8 @@ from requests import ( from celery.exceptions import MaxRetriesExceededError from notifications_utils.statsd_decorators import statsd from notifications_utils.s3 import s3upload +from notifications_utils.letter_timings import LETTER_PROCESSING_DEADLINE +from notifications_utils.timezones import convert_utc_to_bst from app import encryption, notify_celery from app.aws import s3 @@ -23,9 +25,10 @@ from app.dao.notifications_dao import ( update_notification_status_by_id, dao_update_notification, dao_get_notification_by_reference, - dao_get_notifications_by_references, dao_update_notifications_by_reference, + dao_get_letters_to_be_printed, ) +from app.letters.utils import get_letter_pdf_filename from app.errors import VirusScanError from app.exceptions import NotificationTechnicalFailureException from app.letters.utils import ( @@ -115,28 +118,33 @@ def get_letters_pdf(template, contact_block, filename, values): return resp.content, billable_units -@notify_celery.task(name='collate-letter-pdfs-for-day') -@cronitor("collate-letter-pdfs-for-day") -def collate_letter_pdfs_for_day(date=None): - if not date: - # Using the truncated date is ok because UTC to BST does not make a difference to the date, - # since it is triggered mid afternoon. - date = datetime.utcnow().strftime("%Y-%m-%d") +@notify_celery.task(name='collate-letter-pdfs-to-be-sent') +@cronitor("collate-letter-pdfs-to-be-sent") +def collate_letter_pdfs_to_be_sent(): + """ + Finds all letters which are still waiting to be sent to DVLA for printing - letter_pdfs = sorted( - s3.get_s3_bucket_objects( - current_app.config['LETTERS_PDF_BUCKET_NAME'], - subfolder=date - ), - key=lambda letter: letter['Key'] + This would usually be run at 5.50pm and collect up letters created between before 5:30pm today + that have not yet been sent. + If run after midnight, it will collect up letters created before 5:30pm the day before. + """ + print_run_date = convert_utc_to_bst(datetime.utcnow()) + if print_run_date.time() < LETTER_PROCESSING_DEADLINE: + print_run_date = print_run_date - timedelta(days=1) + + print_run_deadline = print_run_date.replace( + hour=17, minute=30, second=0, microsecond=0 ) - for i, letters in enumerate(group_letters(letter_pdfs)): + + letters_to_print = get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline) + + for i, letters in enumerate(group_letters(letters_to_print)): filenames = [letter['Key'] for letter in letters] hash = urlsafe_b64encode(sha512(''.join(filenames).encode()).digest())[:20].decode() # eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP dvla_filename = 'NOTIFY.{date}.{num:03}.{hash}.ZIP'.format( - date=date, + date=print_run_deadline.strftime("%Y-%m-%d"), num=i + 1, hash=hash ) @@ -159,6 +167,24 @@ def collate_letter_pdfs_for_day(date=None): ) +def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline): + letters_awaiting_sending = dao_get_letters_to_be_printed(print_run_deadline) + + letter_pdfs = [] + for letter in letters_awaiting_sending: + letter_file_name = get_letter_pdf_filename( + reference=letter.reference, + crown=letter.service.crown, + sending_date=letter.created_at, + postage=letter.postage + ) + + letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name) + letter_pdfs.append({"Key": letter_file_name, "Size": letter_head['ContentLength']}) + + return letter_pdfs + + def group_letters(letter_pdfs): """ Group letters in chunks of MAX_LETTER_PDF_ZIP_FILESIZE. Will add files to lists, never going over that size. @@ -168,7 +194,7 @@ def group_letters(letter_pdfs): running_filesize = 0 list_of_files = [] for letter in letter_pdfs: - if letter['Key'].lower().endswith('.pdf') and letter_in_created_state(letter['Key']): + if letter['Key'].lower().endswith('.pdf'): if ( running_filesize + letter['Size'] > current_app.config['MAX_LETTER_PDF_ZIP_FILESIZE'] or len(list_of_files) >= current_app.config['MAX_LETTER_PDF_COUNT_PER_ZIP'] @@ -184,22 +210,6 @@ def group_letters(letter_pdfs): yield list_of_files -def letter_in_created_state(filename): - # filename looks like '2018-01-13/NOTIFY.ABCDEF1234567890.D.2.C.C.20180113120000.PDF' - subfolder = filename.split('/')[0] - ref = get_reference_from_filename(filename) - notifications = dao_get_notifications_by_references([ref]) - if notifications: - if notifications[0].status == NOTIFICATION_CREATED: - return True - current_app.logger.info('Collating letters for {} but notification with reference {} already in {}'.format( - subfolder, - ref, - notifications[0].status - )) - return False - - @notify_celery.task(bind=True, name='process-virus-scan-passed', max_retries=15, default_retry_delay=300) def process_virus_scan_passed(self, filename): reference = get_reference_from_filename(filename) diff --git a/app/config.py b/app/config.py index 602f57f2c..a4949a175 100644 --- a/app/config.py +++ b/app/config.py @@ -296,8 +296,8 @@ class Config(object): }, # The collate-letter-pdf does assume it is called in an hour that BST does not make a # difference to the truncate date which translates to the filename to process - 'collate-letter-pdfs-for-day': { - 'task': 'collate-letter-pdfs-for-day', + 'collate-letter-pdfs-to-be-sent': { + 'task': 'collate-letter-pdfs-to-be-sent', 'schedule': crontab(hour=17, minute=50), 'options': {'queue': QueueNames.PERIODIC} }, diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 5aa8d72a1..645bf9090 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -731,6 +731,21 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ return notifications +def dao_get_letters_to_be_printed(print_run_deadline): + """ + Return all letters created before the print run deadline that have not yet been sent + """ + notifications = Notification.query.filter( + Notification.created_at < convert_bst_to_utc(print_run_deadline), + Notification.notification_type == LETTER_TYPE, + Notification.status == NOTIFICATION_CREATED, + Notification.key_type == KEY_TYPE_NORMAL + ).order_by( + Notification.created_at + ).all() + return notifications + + def dao_old_letters_with_created_status(): yesterday_bst = convert_utc_to_bst(datetime.utcnow()) - timedelta(days=1) last_processing_deadline = yesterday_bst.replace(hour=17, minute=30, second=0, microsecond=0) diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 58568d52e..ef95a17b0 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -2,7 +2,7 @@ from unittest.mock import Mock, call, ANY import base64 import boto3 -from datetime import datetime +from datetime import datetime, timedelta from moto import mock_s3 from flask import current_app from freezegun import freeze_time @@ -19,9 +19,9 @@ from app.exceptions import NotificationTechnicalFailureException from app.celery.letters_pdf_tasks import ( create_letters_pdf, get_letters_pdf, - collate_letter_pdfs_for_day, + collate_letter_pdfs_to_be_sent, + get_key_and_size_of_letters_to_be_sent_to_print, group_letters, - letter_in_created_state, process_sanitised_letter, process_virus_scan_passed, process_virus_scan_failed, @@ -40,7 +40,6 @@ from app.models import ( NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, NOTIFICATION_PENDING_VIRUS_CHECK, - NOTIFICATION_SENDING, NOTIFICATION_TECHNICAL_FAILURE, NOTIFICATION_VALIDATION_FAILED, NOTIFICATION_VIRUS_SCAN_FAILED, @@ -53,7 +52,7 @@ from tests.conftest import set_config_values def test_should_have_decorated_tasks_functions(): assert create_letters_pdf.__wrapped__.__name__ == 'create_letters_pdf' - assert collate_letter_pdfs_for_day.__wrapped__.__name__ == 'collate_letter_pdfs_for_day' + assert collate_letter_pdfs_to_be_sent.__wrapped__.__name__ == 'collate_letter_pdfs_to_be_sent' assert process_virus_scan_passed.__wrapped__.__name__ == 'process_virus_scan_passed' assert process_virus_scan_failed.__wrapped__.__name__ == 'process_virus_scan_failed' assert process_virus_scan_error.__wrapped__.__name__ == 'process_virus_scan_error' @@ -242,27 +241,128 @@ def test_create_letters_gets_the_right_logo_when_service_has_letter_branding_log ) -def test_collate_letter_pdfs_for_day(notify_api, mocker): - mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects', return_value=[ - {'Key': 'B.pDf', 'Size': 2}, - {'Key': 'A.PDF', 'Size': 1}, - {'Key': 'C.pdf', 'Size': 3} +@freeze_time('2020-02-17 18:00:00') +def test_get_key_and_size_of_letters_to_be_sent_to_print(notify_api, mocker, sample_letter_template): + create_notification( + template=sample_letter_template, + status='created', + reference='ref0', + created_at=(datetime.now() - timedelta(hours=2)) + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref1', + created_at=(datetime.now() - timedelta(hours=3)) + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref2', + created_at=(datetime.now() - timedelta(days=2)) + ) + + # notifications we don't expect to get sent to print as they are in the wrong status + for status in ['delivered', 'validation-failed', 'cancelled', 'sending']: + create_notification( + template=sample_letter_template, + status=status, + reference='ref3', + created_at=(datetime.now() - timedelta(days=2)) + ) + + # notification we don't expect to get sent as instead will make into this evenings print run + create_notification( + template=sample_letter_template, + status='created', + reference='ref4', + created_at=(datetime.now() - timedelta(minutes=1)) + ) + + # test notification we don't expect to get sent + create_notification( + template=sample_letter_template, + status='created', + reference='ref4', + created_at=(datetime.now() - timedelta(days=1)), + key_type=KEY_TYPE_TEST + ) + + mock_s3 = mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[ + {'ContentLength': 2}, + {'ContentLength': 1}, + {'ContentLength': 3}, ]) - mock_group_letters = mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[ - [{'Key': 'A.PDF', 'Size': 1}, {'Key': 'B.pDf', 'Size': 2}], - [{'Key': 'C.pdf', 'Size': 3}] + + results = get_key_and_size_of_letters_to_be_sent_to_print(datetime.now() - timedelta(minutes=30)) + + assert mock_s3.call_count == 3 + mock_s3.assert_has_calls( + [ + call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-16/NOTIFY.REF2.D.2.C.C.20200215180000.PDF'), + call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-17/NOTIFY.REF1.D.2.C.C.20200217150000.PDF'), + call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-17/NOTIFY.REF0.D.2.C.C.20200217160000.PDF'), + ] + ) + + assert len(results) == 3 + assert results == [ + {'Key': '2020-02-16/NOTIFY.REF2.D.2.C.C.20200215180000.PDF', 'Size': 2}, + {'Key': '2020-02-17/NOTIFY.REF1.D.2.C.C.20200217150000.PDF', 'Size': 1}, + {'Key': '2020-02-17/NOTIFY.REF0.D.2.C.C.20200217160000.PDF', 'Size': 3}, + ] + + +@pytest.mark.parametrize('time_to_run_task', [ + "2020-02-17 18:00:00", # after 5:30pm + "2020-02-18 02:00:00", # the next day after midnight, before 5:30pm we expect the same results +]) +def test_collate_letter_pdfs_to_be_sent(notify_api, sample_letter_template, mocker, time_to_run_task): + with freeze_time("2020-02-17 18:00:00"): + create_notification( + template=sample_letter_template, + status='created', + reference='ref0', + created_at=(datetime.now() - timedelta(hours=2)) + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref1', + created_at=(datetime.now() - timedelta(hours=3)) + ) + + create_notification( + template=sample_letter_template, + status='created', + reference='ref2', + created_at=(datetime.now() - timedelta(days=2)) + ) + + mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[ + {'ContentLength': 2}, + {'ContentLength': 1}, + {'ContentLength': 3}, ]) + mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') - collate_letter_pdfs_for_day('2017-01-02') + with set_config_values(notify_api, {'MAX_LETTER_PDF_COUNT_PER_ZIP': 2}): + with freeze_time(time_to_run_task): + collate_letter_pdfs_to_be_sent() - mock_s3.assert_called_once_with('test-letters-pdf', subfolder='2017-01-02') - mock_group_letters.assert_called_once_with(sorted(mock_s3.return_value, key=lambda x: x['Key'])) + assert len(mock_celery.call_args_list) == 2 assert mock_celery.call_args_list[0] == call( name='zip-and-send-letter-pdfs', kwargs={ - 'filenames_to_zip': ['A.PDF', 'B.pDf'], - 'upload_filename': 'NOTIFY.2017-01-02.001.oqdjIM2-NAUU9Sm5Slmi.ZIP' + 'filenames_to_zip': [ + '2020-02-16/NOTIFY.REF2.D.2.C.C.20200215180000.PDF', + '2020-02-17/NOTIFY.REF1.D.2.C.C.20200217150000.PDF' + ], + 'upload_filename': 'NOTIFY.2020-02-17.001.k3x_WqC5KhB6e2DWv9Ma.ZIP' }, queue='process-ftp-tasks', compression='zlib' @@ -270,24 +370,17 @@ def test_collate_letter_pdfs_for_day(notify_api, mocker): assert mock_celery.call_args_list[1] == call( name='zip-and-send-letter-pdfs', kwargs={ - 'filenames_to_zip': ['C.pdf'], - 'upload_filename': 'NOTIFY.2017-01-02.002.tdr7hcdPieiqjkVoS4kU.ZIP' + 'filenames_to_zip': [ + '2020-02-17/NOTIFY.REF0.D.2.C.C.20200217160000.PDF' + ], + 'upload_filename': 'NOTIFY.2020-02-17.002.J85cUw-FWlKuAIOcwdLS.ZIP' }, queue='process-ftp-tasks', compression='zlib' ) -@freeze_time('2018-09-12 17:50:00') -def test_collate_letter_pdfs_for_day_works_without_date_param(notify_api, mocker): - mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects') - collate_letter_pdfs_for_day() - expected_date = '2018-09-12' - mock_s3.assert_called_once_with('test-letters-pdf', subfolder=expected_date) - - -def test_group_letters_splits_on_file_size(notify_api, mocker): - mocker.patch('app.celery.letters_pdf_tasks.letter_in_created_state', return_value=True) +def test_group_letters_splits_on_file_size(notify_api): letters = [ # ends under max but next one is too big {'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}, @@ -313,8 +406,7 @@ def test_group_letters_splits_on_file_size(notify_api, mocker): assert next(x, None) is None -def test_group_letters_splits_on_file_count(notify_api, mocker): - mocker.patch('app.celery.letters_pdf_tasks.letter_in_created_state', return_value=True) +def test_group_letters_splits_on_file_count(notify_api): letters = [ {'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}, @@ -337,8 +429,7 @@ def test_group_letters_splits_on_file_count(notify_api, mocker): assert next(x, None) is None -def test_group_letters_splits_on_file_size_and_file_count(notify_api, mocker): - mocker.patch('app.celery.letters_pdf_tasks.letter_in_created_state', return_value=True) +def test_group_letters_splits_on_file_size_and_file_count(notify_api): letters = [ # ends under max file size but next file is too big {'Key': 'A.pdf', 'Size': 1}, @@ -372,44 +463,22 @@ def test_group_letters_splits_on_file_size_and_file_count(notify_api, mocker): assert next(x, None) is None -def test_group_letters_ignores_non_pdfs(notify_api, mocker): - mocker.patch('app.celery.letters_pdf_tasks.letter_in_created_state', return_value=True) - letters = [{'Key': 'A.zip'}] +@pytest.mark.parametrize('key', ["A.ZIP", "B.zip"]) +def test_group_letters_ignores_non_pdfs(key): + letters = [{'Key': key, 'Size': 1}] assert list(group_letters(letters)) == [] -def test_group_letters_ignores_notifications_already_sent(notify_api, mocker): - mock = mocker.patch('app.celery.letters_pdf_tasks.letter_in_created_state', return_value=False) - letters = [{'Key': 'A.pdf'}] - assert list(group_letters(letters)) == [] - mock.assert_called_once_with('A.pdf') +@pytest.mark.parametrize('key', ["A.PDF", "B.pdf", "C.PdF"]) +def test_group_letters_includes_pdf_files(key): + letters = [{'Key': key, 'Size': 1}] + assert list(group_letters(letters)) == [[{'Key': key, 'Size': 1}]] -def test_group_letters_with_no_letters(notify_api, mocker): - mocker.patch('app.celery.letters_pdf_tasks.letter_in_created_state', return_value=True) +def test_group_letters_with_no_letters(): assert list(group_letters([])) == [] -def test_letter_in_created_state(sample_notification): - sample_notification.reference = 'ABCDEF1234567890' - filename = '2018-01-13/NOTIFY.ABCDEF1234567890.D.2.C.C.20180113120000.PDF' - - assert letter_in_created_state(filename) is True - - -def test_letter_in_created_state_fails_if_notification_not_in_created(sample_notification): - sample_notification.reference = 'ABCDEF1234567890' - sample_notification.status = NOTIFICATION_SENDING - filename = '2018-01-13/NOTIFY.ABCDEF1234567890.D.2.C.C.20180113120000.PDF' - assert letter_in_created_state(filename) is False - - -def test_letter_in_created_state_fails_if_notification_doesnt_exist(sample_notification): - sample_notification.reference = 'QWERTY1234567890' - filename = '2018-01-13/NOTIFY.ABCDEF1234567890.D.2.C.C.20180113120000.PDF' - assert letter_in_created_state(filename) is False - - @freeze_time('2018-01-01 18:00') @mock_s3 @pytest.mark.parametrize('key_type,noti_status,bucket_config_name,destination_folder', [