From b3f48c1a84fc2ea3f13c95ee66515897c2bd273c Mon Sep 17 00:00:00 2001 From: Ben Thorner Date: Tue, 22 Jun 2021 12:28:46 +0100 Subject: [PATCH] Backup original precompiled uploads to S3 This continues the work from Template Preview [1], so that we have a complete store of original PDFs to use for testing changes to it. Previously we did store some originals, but these were only invalid PDFs that had failed sanitisation; for valid PDFs, the "transient" bucket only contains the sanitised versions, which the API deletes / moves when the notification is sent [2]. Since the notification is only created at a later stage [3], there's no easy way to get the final name of the PDF we send to DVLA. Instead, we use the "upload_id", which eventually becomes the notification ID [4]. This should be enough to trace the file for specific debugging. Note that we only want to store original PDFs if they're valid (and virus free!), since there's no point testing changes with bad data. [1]: https://github.com/alphagov/notifications-template-preview/pull/545 [2]: https://github.com/alphagov/notifications-api/blob/c44ec57c1784084d92ab54903034241c9612253d/app/service/send_notification.py#L212 [3]: https://github.com/alphagov/notifications-admin/blob/7930a53a58bd86f6fe4f979db5253b889fd10c06/app/main/views/uploads.py#L362 [4]: https://github.com/alphagov/notifications-admin/blob/7930a53a58bd86f6fe4f979db5253b889fd10c06/app/main/views/uploads.py#L373 --- app/config.py | 5 +++++ app/main/views/uploads.py | 6 +++++ app/s3_client/s3_letter_upload_client.py | 12 ++++++++++ .../main/views/uploads/test_upload_letter.py | 22 +++++++++++++++---- .../s3_client/test_s3_letter_upload_client.py | 19 ++++++++++++++++ 5 files changed, 60 insertions(+), 4 deletions(-) diff --git a/app/config.py b/app/config.py index 11e03eac4..f92347d9d 100644 --- a/app/config.py +++ b/app/config.py @@ -89,6 +89,7 @@ class Development(Config): LOGO_UPLOAD_BUCKET_NAME = 'public-logos-tools' MOU_BUCKET_NAME = 'notify.tools-mou' TRANSIENT_UPLOADED_LETTERS = 'development-transient-uploaded-letters' + PRECOMPILED_ORIGINALS_BACKUP_LETTERS = 'development-letters-precompiled-originals-backup' ADMIN_CLIENT_SECRET = 'dev-notify-secret-key' API_HOST_NAME = 'http://localhost:6011' @@ -112,6 +113,7 @@ class Test(Development): LOGO_UPLOAD_BUCKET_NAME = 'public-logos-test' MOU_BUCKET_NAME = 'test-mou' TRANSIENT_UPLOADED_LETTERS = 'test-transient-uploaded-letters' + PRECOMPILED_ORIGINALS_BACKUP_LETTERS = 'test-letters-precompiled-originals-backup' NOTIFY_ENVIRONMENT = 'test' API_HOST_NAME = 'http://you-forgot-to-mock-an-api-call-to' TEMPLATE_PREVIEW_API_HOST = 'http://localhost:9999' @@ -131,6 +133,7 @@ class Preview(Config): LOGO_UPLOAD_BUCKET_NAME = 'public-logos-preview' MOU_BUCKET_NAME = 'notify.works-mou' TRANSIENT_UPLOADED_LETTERS = 'preview-transient-uploaded-letters' + PRECOMPILED_ORIGINALS_BACKUP_LETTERS = 'preview-letters-precompiled-originals-backup' NOTIFY_ENVIRONMENT = 'preview' CHECK_PROXY_HEADER = False ASSET_DOMAIN = 'static.notify.works' @@ -148,6 +151,7 @@ class Staging(Config): LOGO_UPLOAD_BUCKET_NAME = 'public-logos-staging' MOU_BUCKET_NAME = 'staging-notify.works-mou' TRANSIENT_UPLOADED_LETTERS = 'staging-transient-uploaded-letters' + PRECOMPILED_ORIGINALS_BACKUP_LETTERS = 'staging-letters-precompiled-originals-backup' NOTIFY_ENVIRONMENT = 'staging' CHECK_PROXY_HEADER = False ASSET_DOMAIN = 'static.staging-notify.works' @@ -162,6 +166,7 @@ class Live(Config): LOGO_UPLOAD_BUCKET_NAME = 'public-logos-production' MOU_BUCKET_NAME = 'notifications.service.gov.uk-mou' TRANSIENT_UPLOADED_LETTERS = 'production-transient-uploaded-letters' + PRECOMPILED_ORIGINALS_BACKUP_LETTERS = 'production-letters-precompiled-originals-backup' NOTIFY_ENVIRONMENT = 'live' CHECK_PROXY_HEADER = False ASSET_DOMAIN = 'static.notifications.service.gov.uk' diff --git a/app/main/views/uploads.py b/app/main/views/uploads.py index 3ef0666b1..9a563c470 100644 --- a/app/main/views/uploads.py +++ b/app/main/views/uploads.py @@ -39,6 +39,7 @@ from app.main import main from app.main.forms import CsvUploadForm, LetterUploadPostageForm, PDFUploadForm from app.models.contact_list import ContactList from app.s3_client.s3_letter_upload_client import ( + backup_original_letter_to_s3, get_letter_metadata, get_letter_pdf_and_metadata, get_transient_letter_file_location, @@ -216,6 +217,11 @@ def upload_letter(service_id): filename=original_filename, recipient=recipient) + backup_original_letter_to_s3( + pdf_file_bytes, + upload_id=upload_id, + ) + return redirect( url_for( 'main.uploaded_letter_preview', diff --git a/app/s3_client/s3_letter_upload_client.py b/app/s3_client/s3_letter_upload_client.py index 2e3ff08cb..861a62604 100644 --- a/app/s3_client/s3_letter_upload_client.py +++ b/app/s3_client/s3_letter_upload_client.py @@ -10,6 +10,18 @@ def get_transient_letter_file_location(service_id, upload_id): return 'service-{}/{}.pdf'.format(service_id, upload_id) +def backup_original_letter_to_s3( + data, + upload_id, +): + utils_s3upload( + filedata=data, + region=current_app.config['AWS_REGION'], + bucket_name=current_app.config['PRECOMPILED_ORIGINALS_BACKUP_LETTERS'], + file_location=f'{upload_id}.pdf', + ) + + def upload_letter_to_s3( data, *, diff --git a/tests/app/main/views/uploads/test_upload_letter.py b/tests/app/main/views/uploads/test_upload_letter.py index ebd206d9e..8e994ab3c 100644 --- a/tests/app/main/views/uploads/test_upload_letter.py +++ b/tests/app/main/views/uploads/test_upload_letter.py @@ -42,7 +42,8 @@ def test_post_upload_letter_redirects_for_valid_file( json=lambda: {'file': 'VGhlIHNhbml0aXNlZCBjb250ZW50', 'recipient_address': 'The Queen'} ) ) - mock_s3 = mocker.patch('app.main.views.uploads.upload_letter_to_s3') + mock_s3_upload = mocker.patch('app.main.views.uploads.upload_letter_to_s3') + mock_s3_backup = mocker.patch('app.main.views.uploads.backup_original_letter_to_s3') mocker.patch('app.main.views.uploads.get_letter_metadata', return_value=LetterMetadata({ 'filename': 'tests/test_pdf_files/one_page_pdf.pdf', 'page_count': '1', @@ -64,7 +65,7 @@ def test_post_upload_letter_redirects_for_valid_file( ) assert antivirus_mock.called - mock_s3.assert_called_once_with( + mock_s3_upload.assert_called_once_with( b'The sanitised content', file_location='service-{}/{}.pdf'.format(SERVICE_ONE_ID, fake_uuid), status='valid', @@ -72,6 +73,13 @@ def test_post_upload_letter_redirects_for_valid_file( filename='tests/test_pdf_files/one_page_pdf.pdf', recipient='The Queen', ) + + with open('tests/test_pdf_files/one_page_pdf.pdf', 'rb') as file: + mock_s3_backup.assert_called_once_with( + file.read(), + upload_id=ANY, + ) + mock_sanitise.assert_called_once_with( ANY, allow_international_letters=expected_allow_international, @@ -108,6 +116,7 @@ def test_post_upload_letter_shows_letter_preview_for_valid_file( ) ) mocker.patch('app.main.views.uploads.upload_letter_to_s3') + mocker.patch('app.main.views.uploads.backup_original_letter_to_s3') mocker.patch('app.main.views.uploads.pdf_page_count', return_value=3) mocker.patch('app.main.views.uploads.get_letter_metadata', return_value=LetterMetadata({ 'filename': 'tests/test_pdf_files/one_page_pdf.pdf', @@ -167,6 +176,7 @@ def test_upload_international_letter_shows_preview_with_no_choice_of_postage( json=lambda: {'file': 'VGhlIHNhbml0aXNlZCBjb250ZW50', 'recipient_address': 'The Queen'} )) mocker.patch('app.main.views.uploads.upload_letter_to_s3') + mocker.patch('app.main.views.uploads.backup_original_letter_to_s3') mocker.patch('app.main.views.uploads.pdf_page_count', return_value=3) mocker.patch('app.main.views.uploads.get_letter_metadata', return_value=LetterMetadata({ 'filename': 'tests/test_pdf_files/one_page_pdf.pdf', @@ -230,6 +240,7 @@ def test_post_upload_letter_shows_error_when_no_file_uploaded(client_request): def test_post_upload_letter_shows_error_when_file_contains_virus(mocker, client_request): mocker.patch('app.main.views.uploads.antivirus_client.scan', return_value=False) + mock_s3_backup = mocker.patch('app.main.views.uploads.backup_original_letter_to_s3') with open('tests/test_pdf_files/one_page_pdf.pdf', 'rb') as file: page = client_request.post( @@ -240,6 +251,7 @@ def test_post_upload_letter_shows_error_when_file_contains_virus(mocker, client_ ) assert page.find('div', class_='banner-dangerous').find('h1').text == 'Your file contains a virus' assert normalize_spaces(page.find('label', class_='file-upload-button').text) == 'Upload your file again' + mock_s3_backup.assert_not_called() def test_post_choose_upload_file_when_file_is_too_big(mocker, client_request): @@ -277,7 +289,8 @@ def test_post_choose_upload_file_when_file_is_malformed(mocker, client_request): def test_post_upload_letter_with_invalid_file(mocker, client_request, fake_uuid): mocker.patch('uuid.uuid4', return_value=fake_uuid) mocker.patch('app.main.views.uploads.antivirus_client.scan', return_value=True) - mock_s3 = mocker.patch('app.main.views.uploads.upload_letter_to_s3') + mock_s3_upload = mocker.patch('app.main.views.uploads.upload_letter_to_s3') + mock_s3_backup = mocker.patch('app.main.views.uploads.backup_original_letter_to_s3') mock_sanitise_response = Mock() mock_sanitise_response.raise_for_status.side_effect = RequestException(response=Mock(status_code=400)) @@ -302,7 +315,7 @@ def test_post_upload_letter_with_invalid_file(mocker, client_request, fake_uuid) _follow_redirects=True ) - mock_s3.assert_called_once_with( + mock_s3_upload.assert_called_once_with( file_contents, file_location='service-{}/{}.pdf'.format(SERVICE_ONE_ID, fake_uuid), status='invalid', @@ -312,6 +325,7 @@ def test_post_upload_letter_with_invalid_file(mocker, client_request, fake_uuid) message='content-outside-printable-area' ) + mock_s3_backup.assert_not_called() assert page.find('div', class_='banner-dangerous').find('h1', {"data-error-type": 'content-outside-printable-area'}) assert not page.find('button', {'class': 'page-footer__button', 'type': 'submit'}) diff --git a/tests/app/s3_client/test_s3_letter_upload_client.py b/tests/app/s3_client/test_s3_letter_upload_client.py index 900e8b31e..163b28ca6 100644 --- a/tests/app/s3_client/test_s3_letter_upload_client.py +++ b/tests/app/s3_client/test_s3_letter_upload_client.py @@ -1,13 +1,32 @@ import urllib +import uuid from flask import current_app from app.s3_client.s3_letter_upload_client import ( LetterMetadata, + backup_original_letter_to_s3, upload_letter_to_s3, ) +def test_backup_original_letter_to_s3(mocker, notify_admin): + s3_mock = mocker.patch('app.s3_client.s3_letter_upload_client.utils_s3upload') + upload_id = uuid.uuid4() + + backup_original_letter_to_s3( + 'pdf_data', + upload_id=upload_id, + ) + + s3_mock.assert_called_once_with( + bucket_name=current_app.config['PRECOMPILED_ORIGINALS_BACKUP_LETTERS'], + file_location=f'{str(upload_id)}.pdf', + filedata='pdf_data', + region=current_app.config['AWS_REGION'] + ) + + def test_upload_letter_to_s3(mocker): s3_mock = mocker.patch('app.s3_client.s3_letter_upload_client.utils_s3upload')