Reduce extra S3 ops when working with letter PDFs

Previously we did some unnecessary work: - Collate task. This had one S3 request to get a summary of the object, which was then used in another request to get the full object. We only need the size of the object, which is included in the summary [1]. - Archive task. This had one S3 request to get a summary of the object, which was then used to make another request to delete it. We still need both requests, but we can remove the S3.Object in the middle. [1]: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#objectsummary
2025-12-21 16:01:15 -05:00 · 2021-03-16 11:57:33 +00:00
parent ff7eebc90a
commit c76e789f1e
5 changed files with 22 additions and 38 deletions
--- a/app/letters/utils.py
+++ b/app/letters/utils.py
@@ -41,12 +41,7 @@ class LetterPDFNotFound(Exception):
    pass


-def find_letter_pdf_filename(notification):
-    """
-    Retrieve the filename of a letter from s3 by searching for it based on a prefix.
-
-    Use this when retrieving existing pdfs, so that we can be more resilient if the naming convention changes.
-    """
+def find_letter_pdf_in_s3(notification):
    bucket_name, prefix = get_bucket_name_and_prefix_for_notification(notification)

    s3 = boto3.resource('s3')
@@ -55,16 +50,10 @@ def find_letter_pdf_filename(notification):
        item = next(x for x in bucket.objects.filter(Prefix=prefix))
    except StopIteration:
        raise LetterPDFNotFound(f'File not found in bucket {bucket_name} with prefix {prefix}', )
-    return item.key
+    return item


 def generate_letter_pdf_filename(reference, crown, created_at, ignore_folder=False, postage=SECOND_CLASS):
-    """
-    Generate a filename for putting a letter into s3 or sending to dvla.
-
-    We should only use this function when uploading data. If you need to get a letter or its metadata from s3
-    then use `find_letter_pdf_filename` instead.
-    """
    upload_file_name = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format(
        folder='' if ignore_folder else get_folder_name(created_at),
        reference=reference,