From c76e789f1eadc4803a86d632773ecc5594f6ecea Mon Sep 17 00:00:00 2001 From: Ben Thorner Date: Tue, 16 Mar 2021 11:57:33 +0000 Subject: [PATCH] Reduce extra S3 ops when working with letter PDFs Previously we did some unnecessary work: - Collate task. This had one S3 request to get a summary of the object, which was then used in another request to get the full object. We only need the size of the object, which is included in the summary [1]. - Archive task. This had one S3 request to get a summary of the object, which was then used to make another request to delete it. We still need both requests, but we can remove the S3.Object in the middle. [1]: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#objectsummary --- app/celery/letters_pdf_tasks.py | 9 ++++----- app/dao/notifications_dao.py | 8 +++----- app/letters/utils.py | 15 ++------------- ...st_notification_dao_delete_notifications.py | 18 ++++++++---------- tests/app/letters/test_letter_utils.py | 10 +++++----- 5 files changed, 22 insertions(+), 38 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 43cd55fe4..a4c7e2628 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -28,7 +28,7 @@ from app.exceptions import NotificationTechnicalFailureException from app.letters.utils import ( LetterPDFNotFound, ScanErrorType, - find_letter_pdf_filename, + find_letter_pdf_in_s3, generate_letter_pdf_filename, get_billable_units_for_letter_page_count, get_file_names_from_error_bucket, @@ -237,11 +237,10 @@ def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage) letters_awaiting_sending = dao_get_letters_to_be_printed(print_run_deadline, postage) for letter in letters_awaiting_sending: try: - letter_file_name = find_letter_pdf_filename(letter) - letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name) + letter_pdf = find_letter_pdf_in_s3(letter) yield { - "Key": letter_file_name, - "Size": letter_head['ContentLength'], + "Key": letter_pdf.key, + "Size": letter_pdf.size, "ServiceId": str(letter.service_id) } except (BotoClientError, LetterPDFNotFound) as e: diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 95e5ad1d1..6388eddca 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -22,12 +22,11 @@ from sqlalchemy.sql.expression import case from werkzeug.datastructures import MultiDict from app import create_uuid, db -from app.aws.s3 import remove_s3_object from app.clients.sms.firetext import ( get_message_status_and_reason_from_firetext_code, ) from app.dao.dao_utils import transactional -from app.letters.utils import LetterPDFNotFound, find_letter_pdf_filename +from app.letters.utils import LetterPDFNotFound, find_letter_pdf_in_s3 from app.models import ( EMAIL_TYPE, KEY_TYPE_NORMAL, @@ -440,7 +439,6 @@ def _move_notifications_to_notification_history(notification_type, service_id, d def _delete_letters_from_s3( notification_type, service_id, date_to_delete_from, query_limit ): - bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] letters_to_delete_from_s3 = db.session.query( Notification ).filter( @@ -454,8 +452,8 @@ def _delete_letters_from_s3( ).limit(query_limit).all() for letter in letters_to_delete_from_s3: try: - filename = find_letter_pdf_filename(letter) - remove_s3_object(bucket_name, filename) + letter_pdf = find_letter_pdf_in_s3(letter) + letter_pdf.delete() except (ClientError, LetterPDFNotFound): current_app.logger.exception( "Could not delete S3 object for letter: {}".format(letter.id)) diff --git a/app/letters/utils.py b/app/letters/utils.py index 623cac84a..1c3df7b1f 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -41,12 +41,7 @@ class LetterPDFNotFound(Exception): pass -def find_letter_pdf_filename(notification): - """ - Retrieve the filename of a letter from s3 by searching for it based on a prefix. - - Use this when retrieving existing pdfs, so that we can be more resilient if the naming convention changes. - """ +def find_letter_pdf_in_s3(notification): bucket_name, prefix = get_bucket_name_and_prefix_for_notification(notification) s3 = boto3.resource('s3') @@ -55,16 +50,10 @@ def find_letter_pdf_filename(notification): item = next(x for x in bucket.objects.filter(Prefix=prefix)) except StopIteration: raise LetterPDFNotFound(f'File not found in bucket {bucket_name} with prefix {prefix}', ) - return item.key + return item def generate_letter_pdf_filename(reference, crown, created_at, ignore_folder=False, postage=SECOND_CLASS): - """ - Generate a filename for putting a letter into s3 or sending to dvla. - - We should only use this function when uploading data. If you need to get a letter or its metadata from s3 - then use `find_letter_pdf_filename` instead. - """ upload_file_name = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format( folder='' if ignore_folder else get_folder_name(created_at), reference=reference, diff --git a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py index 1ac06f6e2..88c4f8717 100644 --- a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py +++ b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py @@ -72,8 +72,7 @@ def test_should_delete_notifications_by_type_after_seven_days( expected_email_count, expected_letter_count ): - mocker.patch("app.dao.notifications_dao.find_letter_pdf_filename") - mocker.patch("app.dao.notifications_dao.remove_s3_object") + mocker.patch("app.dao.notifications_dao.find_letter_pdf_in_s3") email_template, letter_template, sms_template = _create_templates(sample_service) # create one notification a day between 1st and 10th from 11:00 to 19:00 of each type for i in range(1, 11): @@ -120,17 +119,16 @@ def test_should_not_delete_notification_history(sample_service, mocker): @pytest.mark.parametrize('notification_type', ['sms', 'email', 'letter']) def test_delete_notifications_for_days_of_retention(sample_service, notification_type, mocker): - mocker.patch('app.dao.notifications_dao.find_letter_pdf_filename') - mock_remove_s3 = mocker.patch("app.dao.notifications_dao.remove_s3_object") + mock_s3_object = mocker.patch('app.dao.notifications_dao.find_letter_pdf_in_s3').return_value create_test_data(notification_type, sample_service) assert Notification.query.count() == 9 delete_notifications_older_than_retention_by_type(notification_type) assert Notification.query.count() == 7 assert Notification.query.filter_by(notification_type=notification_type).count() == 1 if notification_type == 'letter': - assert mock_remove_s3.call_count == 2 + assert mock_s3_object.delete.call_count == 2 else: - mock_remove_s3.assert_not_called() + mock_s3_object.delete.assert_not_called() @mock_s3 @@ -227,7 +225,7 @@ def test_delete_notifications_delete_notification_type_for_default_time_if_no_da def test_delete_notifications_deletes_letters_not_sent_and_in_final_state_from_table_but_not_s3( sample_service, mocker, notification_status ): - mock_remove_s3 = mocker.patch("app.dao.notifications_dao.remove_s3_object") + mock_s3_object = mocker.patch("app.dao.notifications_dao.find_letter_pdf_in_s3").return_value letter_template = create_template(service=sample_service, template_type='letter') create_notification( template=letter_template, @@ -242,7 +240,7 @@ def test_delete_notifications_deletes_letters_not_sent_and_in_final_state_from_t assert Notification.query.count() == 0 assert NotificationHistory.query.count() == 1 - mock_remove_s3.assert_not_called() + mock_s3_object.assert_not_called() @mock_s3 @@ -289,7 +287,7 @@ def test_delete_notifications_deletes_letters_sent_and_in_final_state_from_table def test_delete_notifications_does_not_delete_letters_not_yet_in_final_state( sample_service, mocker, notification_status ): - mock_remove_s3 = mocker.patch("app.dao.notifications_dao.remove_s3_object") + mock_s3_object = mocker.patch("app.dao.notifications_dao.find_letter_pdf_in_s3").return_value letter_template = create_template(service=sample_service, template_type='letter') create_notification( template=letter_template, @@ -304,7 +302,7 @@ def test_delete_notifications_does_not_delete_letters_not_yet_in_final_state( assert Notification.query.count() == 1 assert NotificationHistory.query.count() == 0 - mock_remove_s3.assert_not_called() + mock_s3_object.assert_not_called() @freeze_time('2020-03-25 00:01') diff --git a/tests/app/letters/test_letter_utils.py b/tests/app/letters/test_letter_utils.py index 4d99f7c0a..fb17feb84 100644 --- a/tests/app/letters/test_letter_utils.py +++ b/tests/app/letters/test_letter_utils.py @@ -10,7 +10,7 @@ from moto import mock_s3 from app.letters.utils import ( LetterPDFNotFound, ScanErrorType, - find_letter_pdf_filename, + find_letter_pdf_in_s3, generate_letter_pdf_filename, get_bucket_name_and_prefix_for_notification, get_folder_name, @@ -49,7 +49,7 @@ def _sample_precompiled_letter_notification_using_test_key(sample_precompiled_le @mock_s3 -def test_find_letter_pdf_filename_returns_filename(sample_notification): +def test_find_letter_pdf_in_s3_returns_object(sample_notification): bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] s3 = boto3.client('s3', region_name='eu-west-1') s3.create_bucket( @@ -60,11 +60,11 @@ def test_find_letter_pdf_filename_returns_filename(sample_notification): _, prefix = get_bucket_name_and_prefix_for_notification(sample_notification) s3.put_object(Bucket=bucket_name, Key=f'{prefix}-and-then-some', Body=b'f') - assert find_letter_pdf_filename(sample_notification) == f'{prefix}-and-then-some' + assert find_letter_pdf_in_s3(sample_notification).key == f'{prefix}-and-then-some' @mock_s3 -def test_find_letter_pdf_filename_raises_if_not_found(sample_notification): +def test_find_letter_pdf_in_s3_raises_if_not_found(sample_notification): bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] s3 = boto3.client('s3', region_name='eu-west-1') s3.create_bucket( @@ -73,7 +73,7 @@ def test_find_letter_pdf_filename_raises_if_not_found(sample_notification): ) with pytest.raises(LetterPDFNotFound): - find_letter_pdf_filename(sample_notification) + find_letter_pdf_in_s3(sample_notification) @pytest.mark.parametrize('created_at,folder', [