Reduce extra S3 ops when working with letter PDFs

Previously we did some unnecessary work:

- Collate task. This had one S3 request to get a summary of the object,
which was then used in another request to get the full object. We only
need the size of the object, which is included in the summary [1].

- Archive task. This had one S3 request to get a summary of the object,
which was then used to make another request to delete it. We still need
both requests, but we can remove the S3.Object in the middle.

[1]: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#objectsummary
This commit is contained in:
Ben Thorner
2021-03-16 11:57:33 +00:00
parent ff7eebc90a
commit c76e789f1e
5 changed files with 22 additions and 38 deletions

View File

@@ -28,7 +28,7 @@ from app.exceptions import NotificationTechnicalFailureException
from app.letters.utils import (
LetterPDFNotFound,
ScanErrorType,
find_letter_pdf_filename,
find_letter_pdf_in_s3,
generate_letter_pdf_filename,
get_billable_units_for_letter_page_count,
get_file_names_from_error_bucket,
@@ -237,11 +237,10 @@ def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage)
letters_awaiting_sending = dao_get_letters_to_be_printed(print_run_deadline, postage)
for letter in letters_awaiting_sending:
try:
letter_file_name = find_letter_pdf_filename(letter)
letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name)
letter_pdf = find_letter_pdf_in_s3(letter)
yield {
"Key": letter_file_name,
"Size": letter_head['ContentLength'],
"Key": letter_pdf.key,
"Size": letter_pdf.size,
"ServiceId": str(letter.service_id)
}
except (BotoClientError, LetterPDFNotFound) as e:

View File

@@ -22,12 +22,11 @@ from sqlalchemy.sql.expression import case
from werkzeug.datastructures import MultiDict
from app import create_uuid, db
from app.aws.s3 import remove_s3_object
from app.clients.sms.firetext import (
get_message_status_and_reason_from_firetext_code,
)
from app.dao.dao_utils import transactional
from app.letters.utils import LetterPDFNotFound, find_letter_pdf_filename
from app.letters.utils import LetterPDFNotFound, find_letter_pdf_in_s3
from app.models import (
EMAIL_TYPE,
KEY_TYPE_NORMAL,
@@ -440,7 +439,6 @@ def _move_notifications_to_notification_history(notification_type, service_id, d
def _delete_letters_from_s3(
notification_type, service_id, date_to_delete_from, query_limit
):
bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME']
letters_to_delete_from_s3 = db.session.query(
Notification
).filter(
@@ -454,8 +452,8 @@ def _delete_letters_from_s3(
).limit(query_limit).all()
for letter in letters_to_delete_from_s3:
try:
filename = find_letter_pdf_filename(letter)
remove_s3_object(bucket_name, filename)
letter_pdf = find_letter_pdf_in_s3(letter)
letter_pdf.delete()
except (ClientError, LetterPDFNotFound):
current_app.logger.exception(
"Could not delete S3 object for letter: {}".format(letter.id))

View File

@@ -41,12 +41,7 @@ class LetterPDFNotFound(Exception):
pass
def find_letter_pdf_filename(notification):
"""
Retrieve the filename of a letter from s3 by searching for it based on a prefix.
Use this when retrieving existing pdfs, so that we can be more resilient if the naming convention changes.
"""
def find_letter_pdf_in_s3(notification):
bucket_name, prefix = get_bucket_name_and_prefix_for_notification(notification)
s3 = boto3.resource('s3')
@@ -55,16 +50,10 @@ def find_letter_pdf_filename(notification):
item = next(x for x in bucket.objects.filter(Prefix=prefix))
except StopIteration:
raise LetterPDFNotFound(f'File not found in bucket {bucket_name} with prefix {prefix}', )
return item.key
return item
def generate_letter_pdf_filename(reference, crown, created_at, ignore_folder=False, postage=SECOND_CLASS):
"""
Generate a filename for putting a letter into s3 or sending to dvla.
We should only use this function when uploading data. If you need to get a letter or its metadata from s3
then use `find_letter_pdf_filename` instead.
"""
upload_file_name = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format(
folder='' if ignore_folder else get_folder_name(created_at),
reference=reference,

View File

@@ -72,8 +72,7 @@ def test_should_delete_notifications_by_type_after_seven_days(
expected_email_count,
expected_letter_count
):
mocker.patch("app.dao.notifications_dao.find_letter_pdf_filename")
mocker.patch("app.dao.notifications_dao.remove_s3_object")
mocker.patch("app.dao.notifications_dao.find_letter_pdf_in_s3")
email_template, letter_template, sms_template = _create_templates(sample_service)
# create one notification a day between 1st and 10th from 11:00 to 19:00 of each type
for i in range(1, 11):
@@ -120,17 +119,16 @@ def test_should_not_delete_notification_history(sample_service, mocker):
@pytest.mark.parametrize('notification_type', ['sms', 'email', 'letter'])
def test_delete_notifications_for_days_of_retention(sample_service, notification_type, mocker):
mocker.patch('app.dao.notifications_dao.find_letter_pdf_filename')
mock_remove_s3 = mocker.patch("app.dao.notifications_dao.remove_s3_object")
mock_s3_object = mocker.patch('app.dao.notifications_dao.find_letter_pdf_in_s3').return_value
create_test_data(notification_type, sample_service)
assert Notification.query.count() == 9
delete_notifications_older_than_retention_by_type(notification_type)
assert Notification.query.count() == 7
assert Notification.query.filter_by(notification_type=notification_type).count() == 1
if notification_type == 'letter':
assert mock_remove_s3.call_count == 2
assert mock_s3_object.delete.call_count == 2
else:
mock_remove_s3.assert_not_called()
mock_s3_object.delete.assert_not_called()
@mock_s3
@@ -227,7 +225,7 @@ def test_delete_notifications_delete_notification_type_for_default_time_if_no_da
def test_delete_notifications_deletes_letters_not_sent_and_in_final_state_from_table_but_not_s3(
sample_service, mocker, notification_status
):
mock_remove_s3 = mocker.patch("app.dao.notifications_dao.remove_s3_object")
mock_s3_object = mocker.patch("app.dao.notifications_dao.find_letter_pdf_in_s3").return_value
letter_template = create_template(service=sample_service, template_type='letter')
create_notification(
template=letter_template,
@@ -242,7 +240,7 @@ def test_delete_notifications_deletes_letters_not_sent_and_in_final_state_from_t
assert Notification.query.count() == 0
assert NotificationHistory.query.count() == 1
mock_remove_s3.assert_not_called()
mock_s3_object.assert_not_called()
@mock_s3
@@ -289,7 +287,7 @@ def test_delete_notifications_deletes_letters_sent_and_in_final_state_from_table
def test_delete_notifications_does_not_delete_letters_not_yet_in_final_state(
sample_service, mocker, notification_status
):
mock_remove_s3 = mocker.patch("app.dao.notifications_dao.remove_s3_object")
mock_s3_object = mocker.patch("app.dao.notifications_dao.find_letter_pdf_in_s3").return_value
letter_template = create_template(service=sample_service, template_type='letter')
create_notification(
template=letter_template,
@@ -304,7 +302,7 @@ def test_delete_notifications_does_not_delete_letters_not_yet_in_final_state(
assert Notification.query.count() == 1
assert NotificationHistory.query.count() == 0
mock_remove_s3.assert_not_called()
mock_s3_object.assert_not_called()
@freeze_time('2020-03-25 00:01')

View File

@@ -10,7 +10,7 @@ from moto import mock_s3
from app.letters.utils import (
LetterPDFNotFound,
ScanErrorType,
find_letter_pdf_filename,
find_letter_pdf_in_s3,
generate_letter_pdf_filename,
get_bucket_name_and_prefix_for_notification,
get_folder_name,
@@ -49,7 +49,7 @@ def _sample_precompiled_letter_notification_using_test_key(sample_precompiled_le
@mock_s3
def test_find_letter_pdf_filename_returns_filename(sample_notification):
def test_find_letter_pdf_in_s3_returns_object(sample_notification):
bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME']
s3 = boto3.client('s3', region_name='eu-west-1')
s3.create_bucket(
@@ -60,11 +60,11 @@ def test_find_letter_pdf_filename_returns_filename(sample_notification):
_, prefix = get_bucket_name_and_prefix_for_notification(sample_notification)
s3.put_object(Bucket=bucket_name, Key=f'{prefix}-and-then-some', Body=b'f')
assert find_letter_pdf_filename(sample_notification) == f'{prefix}-and-then-some'
assert find_letter_pdf_in_s3(sample_notification).key == f'{prefix}-and-then-some'
@mock_s3
def test_find_letter_pdf_filename_raises_if_not_found(sample_notification):
def test_find_letter_pdf_in_s3_raises_if_not_found(sample_notification):
bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME']
s3 = boto3.client('s3', region_name='eu-west-1')
s3.create_bucket(
@@ -73,7 +73,7 @@ def test_find_letter_pdf_filename_raises_if_not_found(sample_notification):
)
with pytest.raises(LetterPDFNotFound):
find_letter_pdf_filename(sample_notification)
find_letter_pdf_in_s3(sample_notification)
@pytest.mark.parametrize('created_at,folder', [