Merge pull request #1510 from alphagov/collate-letter-pdfs-for-day

add collate-letter-pdfs task
This commit is contained in:
kentsanggds
2018-01-02 10:56:18 +00:00
committed by GitHub
3 changed files with 112 additions and 4 deletions

View File

@@ -1,15 +1,15 @@
from flask import current_app
import math
from flask import current_app
from requests import (
post as requests_post,
RequestException
)
from botocore.exceptions import ClientError as BotoClientError
from app import notify_celery
from app.aws import s3
from app.config import QueueNames
from app.config import QueueNames, TaskNames
from app.dao.notifications_dao import (
get_notification_by_id,
update_notification_status_by_id,
@@ -79,3 +79,46 @@ def get_letters_pdf(template, contact_block, org_id, values):
billable_units = math.ceil(int(resp.headers.get("X-pdf-page-count", 0)) / pages_per_sheet)
return resp.content, billable_units
@notify_celery.task(name='collate-letter-pdfs-for-day')
def collate_letter_pdfs_for_day(date):
letter_pdfs = s3.get_s3_bucket_objects(
current_app.config['LETTERS_PDF_BUCKET_NAME'],
subfolder=date
)
for letters in group_letters(letter_pdfs):
filenames = [letter['Key'] for letter in letters]
current_app.logger.info(
'Calling task zip-and-send-letter-pdfs for {} pdfs of total size {:,} bytes'.format(
len(filenames),
sum(letter['Size'] for letter in letters)
)
)
notify_celery.send_task(
name=TaskNames.ZIP_AND_SEND_LETTER_PDFS,
kwargs={'filenames_to_zip': filenames},
queue=QueueNames.PROCESS_FTP
)
def group_letters(letter_pdfs):
"""
Group letters in chunks of MAX_LETTER_PDF_ZIP_FILESIZE. Will add files to lists, never going over that size.
If a single file is (somehow) larger than MAX_LETTER_PDF_ZIP_FILESIZE that'll be in a list on it's own.
If there are no files, will just exit (rather than yielding an empty list).
"""
running_filesize = 0
list_of_files = []
for letter in letter_pdfs:
if letter['Key'].lower().endswith('.pdf'):
if running_filesize + letter['Size'] > current_app.config['MAX_LETTER_PDF_ZIP_FILESIZE']:
yield list_of_files
running_filesize = 0
list_of_files = []
running_filesize += letter['Size']
list_of_files.append(letter)
if list_of_files:
yield list_of_files

View File

@@ -57,6 +57,7 @@ class TaskNames(object):
DVLA_JOBS = 'send-jobs-to-dvla'
DVLA_NOTIFICATIONS = 'send-api-notifications-to-dvla'
PROCESS_INCOMPLETE_JOBS = 'process-incomplete-jobs'
ZIP_AND_SEND_LETTER_PDFS = 'zip-and-send-letter-pdfs'
class Config(object):
@@ -127,6 +128,8 @@ class Config(object):
ONE_OFF_MESSAGE_FILENAME = 'Report'
MAX_VERIFY_CODE_COUNT = 10
MAX_LETTER_PDF_ZIP_FILESIZE = 500 * 1024 * 1024 # 500mb
CHECK_PROXY_HEADER = False
NOTIFY_SERVICE_ID = 'd6aa2c68-a2d9-4437-ab19-3ae8eb202553'

View File

@@ -1,6 +1,7 @@
from unittest.mock import call
import pytest
import requests_mock
from botocore.exceptions import ClientError
from celery.exceptions import MaxRetriesExceededError
from requests import RequestException
@@ -9,6 +10,8 @@ from sqlalchemy.orm.exc import NoResultFound
from app.celery.letters_pdf_tasks import (
create_letters_pdf,
get_letters_pdf,
collate_letter_pdfs_for_day,
group_letters
)
from app.models import Notification
@@ -135,3 +138,62 @@ def test_create_letters_pdf_sets_technical_failure_max_retries(mocker, sample_le
assert mock_retry.called
assert mock_update_noti.called
mock_update_noti.assert_called_once_with(sample_letter_notification.id, 'technical-failure')
def test_collate_letter_pdfs_for_day(notify_api, mocker):
mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects')
mock_group_letters = mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[
[{'Key': 'A.PDF', 'Size': 1}, {'Key': 'B.pDf', 'Size': 2}],
[{'Key': 'C.pdf', 'Size': 3}]
])
mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task')
collate_letter_pdfs_for_day('2017-01-02')
mock_s3.assert_called_once_with('test-letters-pdf', subfolder='2017-01-02')
mock_group_letters.assert_called_once_with(mock_s3.return_value)
assert mock_celery.call_args_list[0] == call(
name='zip-and-send-letter-pdfs',
kwargs={'filenames_to_zip': ['A.PDF', 'B.pDf']},
queue='process-ftp-tasks'
)
assert mock_celery.call_args_list[1] == call(
name='zip-and-send-letter-pdfs',
kwargs={'filenames_to_zip': ['C.pdf']},
queue='process-ftp-tasks'
)
def test_group_letters(notify_api):
letters = [
# ends under max but next one is too big
{'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2},
# ends on exactly max
{'Key': 'C.pdf', 'Size': 3}, {'Key': 'D.pdf', 'Size': 1}, {'Key': 'E.pdf', 'Size': 1},
# exactly max goes in next file
{'Key': 'F.pdf', 'Size': 5},
# if it's bigger than the max, still gets included
{'Key': 'G.pdf', 'Size': 6},
# whatever's left goes in last list
{'Key': 'H.pdf', 'Size': 1}, {'Key': 'I.pdf', 'Size': 1},
]
with set_config_values(notify_api, {'MAX_LETTER_PDF_ZIP_FILESIZE': 5}):
x = group_letters(letters)
assert next(x) == [{'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}]
assert next(x) == [{'Key': 'C.pdf', 'Size': 3}, {'Key': 'D.pdf', 'Size': 1}, {'Key': 'E.pdf', 'Size': 1}]
assert next(x) == [{'Key': 'F.pdf', 'Size': 5}]
assert next(x) == [{'Key': 'G.pdf', 'Size': 6}]
assert next(x) == [{'Key': 'H.pdf', 'Size': 1}, {'Key': 'I.pdf', 'Size': 1}]
# make sure iterator is exhausted
assert next(x, None) is None
def test_group_letters_ignores_non_pdfs(notify_api):
letters = [{'Key': 'A.zip'}]
assert list(group_letters(letters)) == []
def test_group_letters_with_no_letters(notify_api):
assert list(group_letters([])) == []