mirror of
https://github.com/GSA/notifications-api.git
synced 2026-01-30 06:21:50 -05:00
add collate-letter-pdfs task
add collate-letter-pdfs task (name pending). This retrieves a list of letter pdf files (just the metadata, not the actual data) from s3, and loops through them, calling the ftp task zip-and-send-letter-pdfs. It groups them up by adding them to lists while counting the total filesize, if it gets over a certain filesize (currently set to 500mb) it breaks at that chunk, sends off that list of files to the ftp app, and then starts building up a new list. DVLA have a hard 2gb limit on how big the zip files we can send is - however we're going to be limited by the amount of memory on the ftp app well before we get around to handling 2gb of pdf data - so the limit is 500mb for now. We'll adjust it after we see how ftp performs.
This commit is contained in:
@@ -1,15 +1,15 @@
|
|||||||
from flask import current_app
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
from flask import current_app
|
||||||
from requests import (
|
from requests import (
|
||||||
post as requests_post,
|
post as requests_post,
|
||||||
RequestException
|
RequestException
|
||||||
)
|
)
|
||||||
|
|
||||||
from botocore.exceptions import ClientError as BotoClientError
|
from botocore.exceptions import ClientError as BotoClientError
|
||||||
|
|
||||||
from app import notify_celery
|
from app import notify_celery
|
||||||
from app.aws import s3
|
from app.aws import s3
|
||||||
from app.config import QueueNames
|
from app.config import QueueNames, TaskNames
|
||||||
from app.dao.notifications_dao import (
|
from app.dao.notifications_dao import (
|
||||||
get_notification_by_id,
|
get_notification_by_id,
|
||||||
update_notification_status_by_id,
|
update_notification_status_by_id,
|
||||||
@@ -79,3 +79,45 @@ def get_letters_pdf(template, contact_block, org_id, values):
|
|||||||
billable_units = math.ceil(int(resp.headers.get("X-pdf-page-count", 0)) / pages_per_sheet)
|
billable_units = math.ceil(int(resp.headers.get("X-pdf-page-count", 0)) / pages_per_sheet)
|
||||||
|
|
||||||
return resp.content, billable_units
|
return resp.content, billable_units
|
||||||
|
|
||||||
|
|
||||||
|
@notify_celery.task(name='collate-letter-pdfs-for-day')
|
||||||
|
def collate_letter_pdfs_for_day(date):
|
||||||
|
letter_pdfs = s3.get_s3_bucket_objects(
|
||||||
|
current_app.config['LETTERS_PDF_BUCKET_NAME'],
|
||||||
|
subfolder=date
|
||||||
|
)
|
||||||
|
for letters in group_letters(letter_pdfs):
|
||||||
|
filenames = [letter['Key'] for letter in letters]
|
||||||
|
current_app.logger.info(
|
||||||
|
'Calling task zip-and-send-letter-pdfs for {} pdfs of total size {:,} bytes'.format(
|
||||||
|
len(filenames),
|
||||||
|
sum(letter['Size'] for letter in letters)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
notify_celery.send_task(
|
||||||
|
name=TaskNames.ZIP_AND_SEND_LETTER_PDFS,
|
||||||
|
kwargs={'filenames': filenames},
|
||||||
|
queue=QueueNames.PROCESS_FTP
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def group_letters(letter_pdfs):
|
||||||
|
"""
|
||||||
|
Group letters in chunks of MAX_LETTER_PDF_ZIP_FILESIZE. Will add files to lists, never going over that size.
|
||||||
|
If a single file is (somehow) larger than MAX_LETTER_PDF_ZIP_FILESIZE that'll be in a list on it's own.
|
||||||
|
If there are no files, will just exit (rather than yielding an empty list).
|
||||||
|
"""
|
||||||
|
running_filesize = 0
|
||||||
|
list_of_files = []
|
||||||
|
for letter in letter_pdfs:
|
||||||
|
if running_filesize + letter['Size'] > current_app.config['MAX_LETTER_PDF_ZIP_FILESIZE']:
|
||||||
|
yield list_of_files
|
||||||
|
running_filesize = 0
|
||||||
|
list_of_files = []
|
||||||
|
|
||||||
|
running_filesize += letter['Size']
|
||||||
|
list_of_files.append(letter)
|
||||||
|
|
||||||
|
if list_of_files:
|
||||||
|
yield list_of_files
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ class TaskNames(object):
|
|||||||
DVLA_JOBS = 'send-jobs-to-dvla'
|
DVLA_JOBS = 'send-jobs-to-dvla'
|
||||||
DVLA_NOTIFICATIONS = 'send-api-notifications-to-dvla'
|
DVLA_NOTIFICATIONS = 'send-api-notifications-to-dvla'
|
||||||
PROCESS_INCOMPLETE_JOBS = 'process-incomplete-jobs'
|
PROCESS_INCOMPLETE_JOBS = 'process-incomplete-jobs'
|
||||||
|
ZIP_AND_SEND_LETTER_PDFS = 'zip-and-send-letter-pdfs'
|
||||||
|
|
||||||
|
|
||||||
class Config(object):
|
class Config(object):
|
||||||
@@ -127,6 +128,8 @@ class Config(object):
|
|||||||
ONE_OFF_MESSAGE_FILENAME = 'Report'
|
ONE_OFF_MESSAGE_FILENAME = 'Report'
|
||||||
MAX_VERIFY_CODE_COUNT = 10
|
MAX_VERIFY_CODE_COUNT = 10
|
||||||
|
|
||||||
|
MAX_LETTER_PDF_ZIP_FILESIZE = 500 * 1024 * 1024 # 500mb
|
||||||
|
|
||||||
CHECK_PROXY_HEADER = False
|
CHECK_PROXY_HEADER = False
|
||||||
|
|
||||||
NOTIFY_SERVICE_ID = 'd6aa2c68-a2d9-4437-ab19-3ae8eb202553'
|
NOTIFY_SERVICE_ID = 'd6aa2c68-a2d9-4437-ab19-3ae8eb202553'
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
|
from unittest.mock import call
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests_mock
|
import requests_mock
|
||||||
|
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
from celery.exceptions import MaxRetriesExceededError
|
from celery.exceptions import MaxRetriesExceededError
|
||||||
from requests import RequestException
|
from requests import RequestException
|
||||||
@@ -9,6 +10,8 @@ from sqlalchemy.orm.exc import NoResultFound
|
|||||||
from app.celery.letters_pdf_tasks import (
|
from app.celery.letters_pdf_tasks import (
|
||||||
create_letters_pdf,
|
create_letters_pdf,
|
||||||
get_letters_pdf,
|
get_letters_pdf,
|
||||||
|
collate_letter_pdfs_for_day,
|
||||||
|
group_letters
|
||||||
)
|
)
|
||||||
from app.models import Notification
|
from app.models import Notification
|
||||||
|
|
||||||
@@ -135,3 +138,57 @@ def test_create_letters_pdf_sets_technical_failure_max_retries(mocker, sample_le
|
|||||||
assert mock_retry.called
|
assert mock_retry.called
|
||||||
assert mock_update_noti.called
|
assert mock_update_noti.called
|
||||||
mock_update_noti.assert_called_once_with(sample_letter_notification.id, 'technical-failure')
|
mock_update_noti.assert_called_once_with(sample_letter_notification.id, 'technical-failure')
|
||||||
|
|
||||||
|
|
||||||
|
def test_collate_letter_pdfs_for_day(notify_api, mocker):
|
||||||
|
mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects')
|
||||||
|
mock_group_letters = mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[
|
||||||
|
[{'Key': 'A', 'Size': 1}, {'Key': 'B', 'Size': 2}],
|
||||||
|
[{'Key': 'C', 'Size': 3}]
|
||||||
|
])
|
||||||
|
mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task')
|
||||||
|
|
||||||
|
collate_letter_pdfs_for_day('2017-01-02')
|
||||||
|
|
||||||
|
mock_s3.assert_called_once_with('test-letters-pdf', subfolder='2017-01-02')
|
||||||
|
mock_group_letters.assert_called_once_with(mock_s3.return_value)
|
||||||
|
assert mock_celery.call_args_list[0] == call(
|
||||||
|
name='zip-and-send-letter-pdfs',
|
||||||
|
kwargs={'filenames': ['A', 'B']},
|
||||||
|
queue='process-ftp-tasks'
|
||||||
|
)
|
||||||
|
assert mock_celery.call_args_list[1] == call(
|
||||||
|
name='zip-and-send-letter-pdfs',
|
||||||
|
kwargs={'filenames': ['C']},
|
||||||
|
queue='process-ftp-tasks'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_group_letters(notify_api):
|
||||||
|
letters = [
|
||||||
|
# ends under max but next one is too big
|
||||||
|
{'Key': 'A', 'Size': 1}, {'Key': 'B', 'Size': 2},
|
||||||
|
# ends on exactly max
|
||||||
|
{'Key': 'C', 'Size': 3}, {'Key': 'D', 'Size': 1}, {'Key': 'E', 'Size': 1},
|
||||||
|
# exactly max goes in next file
|
||||||
|
{'Key': 'F', 'Size': 5},
|
||||||
|
# if it's bigger than the max, still gets included
|
||||||
|
{'Key': 'G', 'Size': 6},
|
||||||
|
# whatever's left goes in last list
|
||||||
|
{'Key': 'H', 'Size': 1}, {'Key': 'I', 'Size': 1},
|
||||||
|
]
|
||||||
|
|
||||||
|
with set_config_values(notify_api, {'MAX_LETTER_PDF_ZIP_FILESIZE': 5}):
|
||||||
|
x = group_letters(letters)
|
||||||
|
|
||||||
|
assert next(x) == [{'Key': 'A', 'Size': 1}, {'Key': 'B', 'Size': 2}]
|
||||||
|
assert next(x) == [{'Key': 'C', 'Size': 3}, {'Key': 'D', 'Size': 1}, {'Key': 'E', 'Size': 1}]
|
||||||
|
assert next(x) == [{'Key': 'F', 'Size': 5}]
|
||||||
|
assert next(x) == [{'Key': 'G', 'Size': 6}]
|
||||||
|
assert next(x) == [{'Key': 'H', 'Size': 1}, {'Key': 'I', 'Size': 1}]
|
||||||
|
# make sure iterator is exhausted
|
||||||
|
assert next(x, None) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_group_letters_with_no_letters(notify_api):
|
||||||
|
assert list(group_letters([])) == []
|
||||||
|
|||||||
Reference in New Issue
Block a user