From 644b110a8ddcc8ab7a1dab83a16b8468e6629081 Mon Sep 17 00:00:00 2001 From: Katie Smith Date: Tue, 2 Jan 2018 17:18:01 +0000 Subject: [PATCH 1/2] Group letters into a max number of files for sending to DVLA Grouping the letters into a maximum number of files is necessary because the SQS task needs to be under a certain size. We also compress the task when sending. --- app/celery/letters_pdf_tasks.py | 8 ++++-- app/config.py | 1 + tests/app/celery/test_letters_pdf_tasks.py | 31 +++++++++++++++++++--- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index ec08666b4..7096f281a 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -98,7 +98,8 @@ def collate_letter_pdfs_for_day(date): notify_celery.send_task( name=TaskNames.ZIP_AND_SEND_LETTER_PDFS, kwargs={'filenames_to_zip': filenames}, - queue=QueueNames.PROCESS_FTP + queue=QueueNames.PROCESS_FTP, + compression='zlib' ) @@ -112,7 +113,10 @@ def group_letters(letter_pdfs): list_of_files = [] for letter in letter_pdfs: if letter['Key'].lower().endswith('.pdf'): - if running_filesize + letter['Size'] > current_app.config['MAX_LETTER_PDF_ZIP_FILESIZE']: + if ( + running_filesize + letter['Size'] > current_app.config['MAX_LETTER_PDF_ZIP_FILESIZE'] or + len(list_of_files) >= current_app.config['MAX_LETTER_PDF_COUNT_PER_ZIP'] + ): yield list_of_files running_filesize = 0 list_of_files = [] diff --git a/app/config.py b/app/config.py index e42a08c3b..ba24b4a5a 100644 --- a/app/config.py +++ b/app/config.py @@ -129,6 +129,7 @@ class Config(object): MAX_VERIFY_CODE_COUNT = 10 MAX_LETTER_PDF_ZIP_FILESIZE = 500 * 1024 * 1024 # 500mb + MAX_LETTER_PDF_COUNT_PER_ZIP = 5000 CHECK_PROXY_HEADER = False diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 78a58306c..e387f6b27 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -155,16 +155,18 @@ def test_collate_letter_pdfs_for_day(notify_api, mocker): assert mock_celery.call_args_list[0] == call( name='zip-and-send-letter-pdfs', kwargs={'filenames_to_zip': ['A.PDF', 'B.pDf']}, - queue='process-ftp-tasks' + queue='process-ftp-tasks', + compression='zlib' ) assert mock_celery.call_args_list[1] == call( name='zip-and-send-letter-pdfs', kwargs={'filenames_to_zip': ['C.pdf']}, - queue='process-ftp-tasks' + queue='process-ftp-tasks', + compression='zlib' ) -def test_group_letters(notify_api): +def test_group_letters_splits_on_file_size(notify_api): letters = [ # ends under max but next one is too big {'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}, @@ -190,6 +192,29 @@ def test_group_letters(notify_api): assert next(x, None) is None +def test_group_letters_splits_on_file_count(notify_api): + letters = [ + {'Key': 'A.pdf', 'Size': 1}, + {'Key': 'B.pdf', 'Size': 2}, + {'Key': 'C.pdf', 'Size': 3}, + {'Key': 'D.pdf', 'Size': 1}, + {'Key': 'E.pdf', 'Size': 1}, + {'Key': 'F.pdf', 'Size': 5}, + {'Key': 'G.pdf', 'Size': 6}, + {'Key': 'H.pdf', 'Size': 1}, + {'Key': 'I.pdf', 'Size': 1}, + ] + + with set_config_values(notify_api, {'MAX_LETTER_PDF_COUNT_PER_ZIP': 3}): + x = group_letters(letters) + + assert next(x) == [{'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}, {'Key': 'C.pdf', 'Size': 3}] + assert next(x) == [{'Key': 'D.pdf', 'Size': 1}, {'Key': 'E.pdf', 'Size': 1}, {'Key': 'F.pdf', 'Size': 5}] + assert next(x) == [{'Key': 'G.pdf', 'Size': 6}, {'Key': 'H.pdf', 'Size': 1}, {'Key': 'I.pdf', 'Size': 1}] + # make sure iterator is exhausted + assert next(x, None) is None + + def test_group_letters_ignores_non_pdfs(notify_api): letters = [{'Key': 'A.zip'}] assert list(group_letters(letters)) == [] From 440012288df76cbad47b4c63a5bfef88883187e7 Mon Sep 17 00:00:00 2001 From: Katie Smith Date: Wed, 3 Jan 2018 10:42:37 +0000 Subject: [PATCH 2/2] Add test for max file size and file count when grouping letters --- tests/app/celery/test_letters_pdf_tasks.py | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index e387f6b27..5c2532934 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -215,6 +215,40 @@ def test_group_letters_splits_on_file_count(notify_api): assert next(x, None) is None +def test_group_letters_splits_on_file_size_and_file_count(notify_api): + letters = [ + # ends under max file size but next file is too big + {'Key': 'A.pdf', 'Size': 1}, + {'Key': 'B.pdf', 'Size': 2}, + # ends on exactly max number of files and file size + {'Key': 'C.pdf', 'Size': 3}, + {'Key': 'D.pdf', 'Size': 1}, + {'Key': 'E.pdf', 'Size': 1}, + # exactly max file size goes in next file + {'Key': 'F.pdf', 'Size': 5}, + # file size is within max but number of files reaches limit + {'Key': 'G.pdf', 'Size': 1}, + {'Key': 'H.pdf', 'Size': 1}, + {'Key': 'I.pdf', 'Size': 1}, + # whatever's left goes in last list + {'Key': 'J.pdf', 'Size': 1}, + ] + + with set_config_values(notify_api, { + 'MAX_LETTER_PDF_ZIP_FILESIZE': 5, + 'MAX_LETTER_PDF_COUNT_PER_ZIP': 3 + }): + x = group_letters(letters) + + assert next(x) == [{'Key': 'A.pdf', 'Size': 1}, {'Key': 'B.pdf', 'Size': 2}] + assert next(x) == [{'Key': 'C.pdf', 'Size': 3}, {'Key': 'D.pdf', 'Size': 1}, {'Key': 'E.pdf', 'Size': 1}] + assert next(x) == [{'Key': 'F.pdf', 'Size': 5}] + assert next(x) == [{'Key': 'G.pdf', 'Size': 1}, {'Key': 'H.pdf', 'Size': 1}, {'Key': 'I.pdf', 'Size': 1}] + assert next(x) == [{'Key': 'J.pdf', 'Size': 1}] + # make sure iterator is exhausted + assert next(x, None) is None + + def test_group_letters_ignores_non_pdfs(notify_api): letters = [{'Key': 'A.zip'}] assert list(group_letters(letters)) == []