Files
notifications-admin/app/s3_client/s3_letter_upload_client.py
Ben Thorner b3f48c1a84 Backup original precompiled uploads to S3
This continues the work from Template Preview [1], so that we have
a complete store of original PDFs to use for testing changes to it.

Previously we did store some originals, but these were only invalid
PDFs that had failed sanitisation; for valid PDFs, the "transient"
bucket only contains the sanitised versions, which the API deletes
/ moves when the notification is sent [2].

Since the notification is only created at a later stage [3], there's
no easy way to get the final name of the PDF we send to DVLA. Instead,
we use the "upload_id", which eventually becomes the notification ID
[4]. This should be enough to trace the file for specific debugging.

Note that we only want to store original PDFs if they're valid (and
virus free!), since there's no point testing changes with bad data.

[1]: https://github.com/alphagov/notifications-template-preview/pull/545
[2]: c44ec57c17/app/service/send_notification.py (L212)
[3]: 7930a53a58/app/main/views/uploads.py (L362)
[4]: 7930a53a58/app/main/views/uploads.py (L373)
2021-06-24 12:21:05 +01:00

88 lines
2.4 KiB
Python

import json
import urllib
from boto3 import resource
from flask import current_app
from notifications_utils.s3 import s3upload as utils_s3upload
def get_transient_letter_file_location(service_id, upload_id):
return 'service-{}/{}.pdf'.format(service_id, upload_id)
def backup_original_letter_to_s3(
data,
upload_id,
):
utils_s3upload(
filedata=data,
region=current_app.config['AWS_REGION'],
bucket_name=current_app.config['PRECOMPILED_ORIGINALS_BACKUP_LETTERS'],
file_location=f'{upload_id}.pdf',
)
def upload_letter_to_s3(
data,
*,
file_location,
status,
page_count,
filename,
message=None,
invalid_pages=None,
recipient=None
):
# Use of urllib.parse.quote encodes metadata into ascii, which is required by s3.
# Making sure data for displaying to users is decoded is taken care of by LetterMetadata
metadata = {
'status': status,
'page_count': str(page_count),
'filename': urllib.parse.quote(filename),
}
if message:
metadata['message'] = message
if invalid_pages:
metadata['invalid_pages'] = json.dumps(invalid_pages)
if recipient:
metadata['recipient'] = urllib.parse.quote(recipient)
utils_s3upload(
filedata=data,
region=current_app.config['AWS_REGION'],
bucket_name=current_app.config['TRANSIENT_UPLOADED_LETTERS'],
file_location=file_location,
metadata=metadata,
)
class LetterMetadata:
KEYS_TO_DECODE = ["filename", "recipient"]
def __init__(self, metadata):
self._metadata = metadata
def get(self, key, default=None):
value = self._metadata.get(key, default)
if value and key in self.KEYS_TO_DECODE:
value = urllib.parse.unquote(value)
return value
def get_letter_pdf_and_metadata(service_id, file_id):
file_location = get_transient_letter_file_location(service_id, file_id)
s3 = resource('s3')
s3_object = s3.Object(current_app.config['TRANSIENT_UPLOADED_LETTERS'], file_location).get()
pdf = s3_object['Body'].read()
return pdf, LetterMetadata(s3_object['Metadata'])
def get_letter_metadata(service_id, file_id):
file_location = get_transient_letter_file_location(service_id, file_id)
s3 = resource('s3')
s3_object = s3.Object(current_app.config['TRANSIENT_UPLOADED_LETTERS'], file_location).get()
return LetterMetadata(s3_object['Metadata'])