diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index d70265791..ec08666b4 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -1,15 +1,15 @@ -from flask import current_app import math + +from flask import current_app from requests import ( post as requests_post, RequestException ) - from botocore.exceptions import ClientError as BotoClientError from app import notify_celery from app.aws import s3 -from app.config import QueueNames +from app.config import QueueNames, TaskNames from app.dao.notifications_dao import ( get_notification_by_id, update_notification_status_by_id, @@ -79,3 +79,46 @@ def get_letters_pdf(template, contact_block, org_id, values): billable_units = math.ceil(int(resp.headers.get("X-pdf-page-count", 0)) / pages_per_sheet) return resp.content, billable_units + + +@notify_celery.task(name='collate-letter-pdfs-for-day') +def collate_letter_pdfs_for_day(date): + letter_pdfs = s3.get_s3_bucket_objects( + current_app.config['LETTERS_PDF_BUCKET_NAME'], + subfolder=date + ) + for letters in group_letters(letter_pdfs): + filenames = [letter['Key'] for letter in letters] + current_app.logger.info( + 'Calling task zip-and-send-letter-pdfs for {} pdfs of total size {:,} bytes'.format( + len(filenames), + sum(letter['Size'] for letter in letters) + ) + ) + notify_celery.send_task( + name=TaskNames.ZIP_AND_SEND_LETTER_PDFS, + kwargs={'filenames_to_zip': filenames}, + queue=QueueNames.PROCESS_FTP + ) + + +def group_letters(letter_pdfs): + """ + Group letters in chunks of MAX_LETTER_PDF_ZIP_FILESIZE. Will add files to lists, never going over that size. + If a single file is (somehow) larger than MAX_LETTER_PDF_ZIP_FILESIZE that'll be in a list on it's own. + If there are no files, will just exit (rather than yielding an empty list). + """ + running_filesize = 0 + list_of_files = [] + for letter in letter_pdfs: + if letter['Key'].lower().endswith('.pdf'): + if running_filesize + letter['Size'] > current_app.config['MAX_LETTER_PDF_ZIP_FILESIZE']: + yield list_of_files + running_filesize = 0 + list_of_files = [] + + running_filesize += letter['Size'] + list_of_files.append(letter) + + if list_of_files: + yield list_of_files diff --git a/app/config.py b/app/config.py index 36a977110..e42a08c3b 100644 --- a/app/config.py +++ b/app/config.py @@ -57,6 +57,7 @@ class TaskNames(object): DVLA_JOBS = 'send-jobs-to-dvla' DVLA_NOTIFICATIONS = 'send-api-notifications-to-dvla' PROCESS_INCOMPLETE_JOBS = 'process-incomplete-jobs' + ZIP_AND_SEND_LETTER_PDFS = 'zip-and-send-letter-pdfs' class Config(object): @@ -127,6 +128,8 @@ class Config(object): ONE_OFF_MESSAGE_FILENAME = 'Report' MAX_VERIFY_CODE_COUNT = 10 + MAX_LETTER_PDF_ZIP_FILESIZE = 500 * 1024 * 1024 # 500mb + CHECK_PROXY_HEADER = False NOTIFY_SERVICE_ID = 'd6aa2c68-a2d9-4437-ab19-3ae8eb202553' diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index a414fa2db..78a58306c 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -1,6 +1,7 @@ +from unittest.mock import call + import pytest import requests_mock - from botocore.exceptions import ClientError from celery.exceptions import MaxRetriesExceededError from requests import RequestException @@ -9,6 +10,8 @@ from sqlalchemy.orm.exc import NoResultFound from app.celery.letters_pdf_tasks import ( create_letters_pdf, get_letters_pdf, + collate_letter_pdfs_for_day, + group_letters ) from app.models import Notification @@ -135,3 +138,62 @@ def test_create_letters_pdf_sets_technical_failure_max_retries(mocker, sample_le assert mock_retry.called assert mock_update_noti.called mock_update_noti.assert_called_once_with(sample_letter_notification.id, 'technical-failure') + + +def test_collate_letter_pdfs_for_day(notify_api, mocker): + mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects') + mock_group_letters = mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[ + [{'Key': 'A.PDF', 'Size': 1}, {'Key': 'B.pDf', 'Size': 2}], + [{'Key': 'C.pdf', 'Size': 3}] + ]) + mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') + + collate_letter_pdfs_for_day('2017-01-02') + + mock_s3.assert_called_once_with('test-letters-pdf', subfolder='2017-01-02') + mock_group_letters.assert_called_once_with(mock_s3.return_value) + assert mock_celery.call_args_list[0] == call( + name='zip-and-send-letter-pdfs', + kwargs={'filenames_to_zip': ['A.PDF', 'B.pDf']}, + queue='process-ftp-tasks' + ) + assert mock_celery.call_args_list[1] == call( + name='zip-and-send-letter-pdfs', + kwargs={'filenames_to_zip': ['C.pdf']}, + queue='process-ftp-tasks' + ) + + +def test_group_letters(notify_api): + letters = [ + # ends under max but next one is too big + {'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}, + # ends on exactly max + {'Key': 'C.pdf', 'Size': 3}, {'Key': 'D.pdf', 'Size': 1}, {'Key': 'E.pdf', 'Size': 1}, + # exactly max goes in next file + {'Key': 'F.pdf', 'Size': 5}, + # if it's bigger than the max, still gets included + {'Key': 'G.pdf', 'Size': 6}, + # whatever's left goes in last list + {'Key': 'H.pdf', 'Size': 1}, {'Key': 'I.pdf', 'Size': 1}, + ] + + with set_config_values(notify_api, {'MAX_LETTER_PDF_ZIP_FILESIZE': 5}): + x = group_letters(letters) + + assert next(x) == [{'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}] + assert next(x) == [{'Key': 'C.pdf', 'Size': 3}, {'Key': 'D.pdf', 'Size': 1}, {'Key': 'E.pdf', 'Size': 1}] + assert next(x) == [{'Key': 'F.pdf', 'Size': 5}] + assert next(x) == [{'Key': 'G.pdf', 'Size': 6}] + assert next(x) == [{'Key': 'H.pdf', 'Size': 1}, {'Key': 'I.pdf', 'Size': 1}] + # make sure iterator is exhausted + assert next(x, None) is None + + +def test_group_letters_ignores_non_pdfs(notify_api): + letters = [{'Key': 'A.zip'}] + assert list(group_letters(letters)) == [] + + +def test_group_letters_with_no_letters(notify_api): + assert list(group_letters([])) == []