From 4b61060d326f6b8ff6c4b7ad9faa1fad87f96277 Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Fri, 23 Oct 2020 10:21:52 +0100 Subject: [PATCH] stream notifications when collating zip files we had issues where we had 150k 2nd class notifications, and the collate task never ran properly, presumably because the volume of data being returned was too big. to try and help with this, we can switch to streaming rather than using `.all` and building up lists of data. This should help, though the initial query may be a problem still. --- app/celery/letters_pdf_tasks.py | 7 ++----- app/dao/notifications_dao.py | 2 +- tests/app/celery/test_letters_pdf_tasks.py | 8 ++++++-- tests/app/dao/notification_dao/test_notification_dao.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index a2431fbd1..5f715c97d 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -178,7 +178,6 @@ def collate_letter_pdfs_to_be_sent(): def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage): letters_awaiting_sending = dao_get_letters_to_be_printed(print_run_deadline, postage) - letter_pdfs = [] for letter in letters_awaiting_sending: try: letter_file_name = get_letter_pdf_filename( @@ -188,17 +187,15 @@ def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage) postage=letter.postage ) letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name) - letter_pdfs.append({ + yield { "Key": letter_file_name, "Size": letter_head['ContentLength'], "ServiceId": str(letter.service.id) - }) + } except BotoClientError as e: current_app.logger.exception( f"Error getting letter from bucket for notification: {letter.id} with reference: {letter.reference}", e) - return letter_pdfs - def group_letters(letter_pdfs): """ diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index b8570a451..cf9f143d4 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -730,7 +730,7 @@ def dao_get_letters_to_be_printed(print_run_deadline, postage): ).order_by( Notification.service_id, Notification.created_at - ).all() + ) return notifications diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 6e1664fef..6badc3bce 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -223,7 +223,9 @@ def test_get_key_and_size_of_letters_to_be_sent_to_print(notify_api, mocker, sam {'ContentLength': 3}, ]) - results = get_key_and_size_of_letters_to_be_sent_to_print(datetime.now() - timedelta(minutes=30), postage='second') + results = list( + get_key_and_size_of_letters_to_be_sent_to_print(datetime.now() - timedelta(minutes=30), postage='second') + ) assert mock_s3.call_count == 3 mock_s3.assert_has_calls( @@ -284,7 +286,9 @@ def test_get_key_and_size_of_letters_to_be_sent_to_print_catches_exception( ClientError(error_response, "File not found") ]) - results = get_key_and_size_of_letters_to_be_sent_to_print(datetime.now() - timedelta(minutes=30), postage='second') + results = list( + get_key_and_size_of_letters_to_be_sent_to_print(datetime.now() - timedelta(minutes=30), postage='second') + ) assert mock_head_s3_object.call_count == 2 mock_head_s3_object.assert_has_calls( diff --git a/tests/app/dao/notification_dao/test_notification_dao.py b/tests/app/dao/notification_dao/test_notification_dao.py index 375bfafd6..8b8717740 100644 --- a/tests/app/dao/notification_dao/test_notification_dao.py +++ b/tests/app/dao/notification_dao/test_notification_dao.py @@ -1692,6 +1692,6 @@ def test_letters_to_be_printed_sort_by_service(notify_db_session): notification_2 = create_notification(template=first_template, created_at=datetime(2020, 12, 1, 12, 30)) notification_3 = create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 30)) - results = dao_get_letters_to_be_printed(print_run_deadline=datetime(2020, 12, 1, 17, 30), postage='second') + results = list(dao_get_letters_to_be_printed(print_run_deadline=datetime(2020, 12, 1, 17, 30), postage='second')) assert len(results) == 3 assert results == [notification_1, notification_2, notification_3]