mirror of
https://github.com/GSA/notifications-api.git
synced 2026-01-31 23:26:23 -05:00
Relax lookup of letter PDFs in S3 buckets
Previously we generated the filename we expected a letter PDF to be
stored at in S3, and used that to retrieve it. However, the generated
filename can change over the course of a notification's lifetime e.g.
if the service changes from crown ('.C.') to non-crown ('.N.').
The prefix of the filename is stable: it's based on properties of the
notification - reference and creation - that don't change. This commit
changes the way we interact with letter PDFs in S3:
- Uploading uses the original method to generate the full file name.
The method is renamed to 'generate_' to distinguish it from the new one.
- Downloading uses a new 'find_' method to get the filename using just
its prefix, which makes it agnostic to changes in the filename suffix.
Making this change helps to decouple our code from the requirements DVLA
have on the filenames. While it means more traffic to S3, we rely on S3
in any case to download the files. From experience, we know S3 is highly
reliable and performant, so don't anticipate any issues.
In the tests we favour using moto to mock S3, so that the behaviour is
realistic. There are a couple of places where we just mock the method,
since what it returns isn't important for the test.
Note that, since the new method requires a notification object, we need
to change a query in one place, the columns of which were only selected
to appease the original method to generate a filename.
This commit is contained in:
@@ -27,9 +27,10 @@ from app.errors import VirusScanError
|
||||
from app.exceptions import NotificationTechnicalFailureException
|
||||
from app.letters.utils import (
|
||||
ScanErrorType,
|
||||
find_letter_pdf_filename,
|
||||
generate_letter_pdf_filename,
|
||||
get_billable_units_for_letter_page_count,
|
||||
get_file_names_from_error_bucket,
|
||||
get_letter_pdf_filename,
|
||||
get_reference_from_filename,
|
||||
move_error_pdf_to_scan_bucket,
|
||||
move_failed_pdf,
|
||||
@@ -58,7 +59,7 @@ from app.models import (
|
||||
def get_pdf_for_templated_letter(self, notification_id):
|
||||
try:
|
||||
notification = get_notification_by_id(notification_id, _raise=True)
|
||||
letter_filename = get_letter_pdf_filename(
|
||||
letter_filename = generate_letter_pdf_filename(
|
||||
reference=notification.reference,
|
||||
crown=notification.service.crown,
|
||||
created_at=notification.created_at,
|
||||
@@ -235,12 +236,7 @@ def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage)
|
||||
letters_awaiting_sending = dao_get_letters_to_be_printed(print_run_deadline, postage)
|
||||
for letter in letters_awaiting_sending:
|
||||
try:
|
||||
letter_file_name = get_letter_pdf_filename(
|
||||
reference=letter.reference,
|
||||
crown=letter.crown,
|
||||
created_at=letter.created_at,
|
||||
postage=postage
|
||||
)
|
||||
letter_file_name = find_letter_pdf_filename(letter)
|
||||
letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name)
|
||||
yield {
|
||||
"Key": letter_file_name,
|
||||
@@ -375,7 +371,7 @@ def process_sanitised_letter(self, sanitise_data):
|
||||
|
||||
# The original filename could be wrong because we didn't know the postage.
|
||||
# Now we know if the letter is international, we can check what the filename should be.
|
||||
upload_file_name = get_letter_pdf_filename(
|
||||
upload_file_name = generate_letter_pdf_filename(
|
||||
reference=notification.reference,
|
||||
crown=notification.service.crown,
|
||||
created_at=notification.created_at,
|
||||
|
||||
@@ -27,7 +27,7 @@ from app.clients.sms.firetext import (
|
||||
get_message_status_and_reason_from_firetext_code,
|
||||
)
|
||||
from app.dao.dao_utils import transactional
|
||||
from app.letters.utils import get_letter_pdf_filename
|
||||
from app.letters.utils import find_letter_pdf_filename
|
||||
from app.models import (
|
||||
EMAIL_TYPE,
|
||||
KEY_TYPE_NORMAL,
|
||||
@@ -453,11 +453,7 @@ def _delete_letters_from_s3(
|
||||
Notification.status.in_(NOTIFICATION_STATUS_TYPES_COMPLETED)
|
||||
).limit(query_limit).all()
|
||||
for letter in letters_to_delete_from_s3:
|
||||
prefix = get_letter_pdf_filename(reference=letter.reference,
|
||||
crown=letter.service.crown,
|
||||
created_at=letter.created_at,
|
||||
ignore_folder=letter.key_type == KEY_TYPE_TEST,
|
||||
postage=letter.postage)
|
||||
prefix = find_letter_pdf_filename(letter)
|
||||
s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix)
|
||||
for s3_object in s3_objects:
|
||||
try:
|
||||
@@ -758,13 +754,7 @@ def dao_get_letters_to_be_printed(print_run_deadline, postage, query_limit=10000
|
||||
https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
|
||||
https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html
|
||||
"""
|
||||
notifications = db.session.query(
|
||||
Notification.id,
|
||||
Notification.created_at,
|
||||
Notification.reference,
|
||||
Notification.service_id,
|
||||
Service.crown,
|
||||
).join(
|
||||
notifications = Notification.query.join(
|
||||
Notification.service
|
||||
).filter(
|
||||
Notification.created_at < convert_bst_to_utc(print_run_deadline),
|
||||
|
||||
@@ -37,7 +37,27 @@ def get_folder_name(created_at):
|
||||
return '{}/'.format(print_datetime.date())
|
||||
|
||||
|
||||
def get_letter_pdf_filename(reference, crown, created_at, ignore_folder=False, postage=SECOND_CLASS):
|
||||
def find_letter_pdf_filename(notification):
|
||||
"""
|
||||
Retrieve the filename of a letter from s3 by searching for it based on a prefix.
|
||||
|
||||
Use this when retrieving existing pdfs, so that we can be more resilient if the naming convention changes.
|
||||
"""
|
||||
bucket_name, prefix = get_bucket_name_and_prefix_for_notification(notification)
|
||||
|
||||
s3 = boto3.resource('s3')
|
||||
bucket = s3.Bucket(bucket_name)
|
||||
item = next(x for x in bucket.objects.filter(Prefix=prefix))
|
||||
return item.key
|
||||
|
||||
|
||||
def generate_letter_pdf_filename(reference, crown, created_at, ignore_folder=False, postage=SECOND_CLASS):
|
||||
"""
|
||||
Generate a filename for putting a letter into s3 or sending to dvla.
|
||||
|
||||
We should only use this function when uploading data. If you need to get a letter or its metadata from s3
|
||||
then use `find_letter_pdf_filename` instead.
|
||||
"""
|
||||
upload_file_name = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format(
|
||||
folder='' if ignore_folder else get_folder_name(created_at),
|
||||
reference=reference,
|
||||
@@ -78,7 +98,7 @@ def upload_letter_pdf(notification, pdf_data, precompiled=False):
|
||||
current_app.logger.info("PDF Letter {} reference {} created at {}, {} bytes".format(
|
||||
notification.id, notification.reference, notification.created_at, len(pdf_data)))
|
||||
|
||||
upload_file_name = get_letter_pdf_filename(
|
||||
upload_file_name = generate_letter_pdf_filename(
|
||||
reference=notification.reference,
|
||||
crown=notification.service.crown,
|
||||
created_at=notification.created_at,
|
||||
|
||||
@@ -17,8 +17,8 @@ from app.dao.templates_dao import (
|
||||
)
|
||||
from app.dao.users_dao import get_user_by_id
|
||||
from app.letters.utils import (
|
||||
generate_letter_pdf_filename,
|
||||
get_billable_units_for_letter_page_count,
|
||||
get_letter_pdf_filename,
|
||||
get_page_count,
|
||||
move_uploaded_pdf_to_letters_bucket,
|
||||
)
|
||||
@@ -197,7 +197,7 @@ def send_pdf_letter_notification(service_id, post_data):
|
||||
postage=post_data['postage'] or template.postage,
|
||||
)
|
||||
|
||||
upload_filename = get_letter_pdf_filename(
|
||||
upload_filename = generate_letter_pdf_filename(
|
||||
reference=notification.reference,
|
||||
crown=notification.service.crown,
|
||||
created_at=notification.created_at,
|
||||
|
||||
Reference in New Issue
Block a user