add collate-letter-pdfs task

add collate-letter-pdfs task (name pending). This retrieves a list of
letter pdf files (just the metadata, not the actual data) from s3, and
loops through them, calling the ftp task zip-and-send-letter-pdfs. It
groups them up by adding them to lists while counting the total
filesize, if it gets over a certain filesize (currently set to 500mb)
it breaks at that chunk, sends off that list of files to the ftp app,
and then starts building up a new list.

DVLA have a hard 2gb limit on how big the zip files we can send is -
however we're going to be limited by the amount of memory on the ftp
app well before we get around to handling 2gb of pdf data - so the
limit is 500mb for now. We'll adjust it after we see how ftp performs.
This commit is contained in:
Leo Hemsted
2017-12-19 14:18:05 +00:00
committed by Ken Tsang
parent 360fa787f9
commit 309b4d7d33
3 changed files with 106 additions and 4 deletions

View File

@@ -1,6 +1,7 @@
from unittest.mock import call
import pytest
import requests_mock
from botocore.exceptions import ClientError
from celery.exceptions import MaxRetriesExceededError
from requests import RequestException
@@ -9,6 +10,8 @@ from sqlalchemy.orm.exc import NoResultFound
from app.celery.letters_pdf_tasks import (
create_letters_pdf,
get_letters_pdf,
collate_letter_pdfs_for_day,
group_letters
)
from app.models import Notification
@@ -135,3 +138,57 @@ def test_create_letters_pdf_sets_technical_failure_max_retries(mocker, sample_le
assert mock_retry.called
assert mock_update_noti.called
mock_update_noti.assert_called_once_with(sample_letter_notification.id, 'technical-failure')
def test_collate_letter_pdfs_for_day(notify_api, mocker):
mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects')
mock_group_letters = mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[
[{'Key': 'A', 'Size': 1}, {'Key': 'B', 'Size': 2}],
[{'Key': 'C', 'Size': 3}]
])
mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task')
collate_letter_pdfs_for_day('2017-01-02')
mock_s3.assert_called_once_with('test-letters-pdf', subfolder='2017-01-02')
mock_group_letters.assert_called_once_with(mock_s3.return_value)
assert mock_celery.call_args_list[0] == call(
name='zip-and-send-letter-pdfs',
kwargs={'filenames': ['A', 'B']},
queue='process-ftp-tasks'
)
assert mock_celery.call_args_list[1] == call(
name='zip-and-send-letter-pdfs',
kwargs={'filenames': ['C']},
queue='process-ftp-tasks'
)
def test_group_letters(notify_api):
letters = [
# ends under max but next one is too big
{'Key': 'A', 'Size': 1}, {'Key': 'B', 'Size': 2},
# ends on exactly max
{'Key': 'C', 'Size': 3}, {'Key': 'D', 'Size': 1}, {'Key': 'E', 'Size': 1},
# exactly max goes in next file
{'Key': 'F', 'Size': 5},
# if it's bigger than the max, still gets included
{'Key': 'G', 'Size': 6},
# whatever's left goes in last list
{'Key': 'H', 'Size': 1}, {'Key': 'I', 'Size': 1},
]
with set_config_values(notify_api, {'MAX_LETTER_PDF_ZIP_FILESIZE': 5}):
x = group_letters(letters)
assert next(x) == [{'Key': 'A', 'Size': 1}, {'Key': 'B', 'Size': 2}]
assert next(x) == [{'Key': 'C', 'Size': 3}, {'Key': 'D', 'Size': 1}, {'Key': 'E', 'Size': 1}]
assert next(x) == [{'Key': 'F', 'Size': 5}]
assert next(x) == [{'Key': 'G', 'Size': 6}]
assert next(x) == [{'Key': 'H', 'Size': 1}, {'Key': 'I', 'Size': 1}]
# make sure iterator is exhausted
assert next(x, None) is None
def test_group_letters_with_no_letters(notify_api):
assert list(group_letters([])) == []