From 8a322b844b358470b8459a20666a56a996c0c4de Mon Sep 17 00:00:00 2001 From: Katie Smith Date: Fri, 6 Sep 2019 17:10:48 +0100 Subject: [PATCH] Sanitise uploaded letters and store in S3 This sanitises uploaded letters and stores the sanitised result in S3 with if it passes validation or the original PDF in S3 if validation fails. A metadata value of 'status' is set to either 'valid' or 'invalid'. --- app/config.py | 6 ++ app/main/views/uploads.py | 24 +++++- app/s3_client/s3_letter_upload_client.py | 16 ++++ app/template_previews.py | 8 ++ app/templates/views/uploads/preview.html | 5 ++ tests/app/main/views/test_uploads.py | 78 ++++++++++++++++--- .../s3_client/test_s3_letter_upload_client.py | 17 ++++ tests/app/test_template_previews.py | 18 ++++- 8 files changed, 161 insertions(+), 11 deletions(-) create mode 100644 app/s3_client/s3_letter_upload_client.py create mode 100644 tests/app/s3_client/test_s3_letter_upload_client.py diff --git a/app/config.py b/app/config.py index ce022374f..aa4f0eaeb 100644 --- a/app/config.py +++ b/app/config.py @@ -72,6 +72,7 @@ class Config(object): NOTIFY_ENVIRONMENT = 'development' LOGO_UPLOAD_BUCKET_NAME = 'public-logos-local' MOU_BUCKET_NAME = 'local-mou' + TRANSIENT_UPLOADED_LETTERS = 'local-transient-uploaded-letters' ROUTE_SECRET_KEY_1 = os.environ.get('ROUTE_SECRET_KEY_1', '') ROUTE_SECRET_KEY_2 = os.environ.get('ROUTE_SECRET_KEY_2', '') CHECK_PROXY_HEADER = False @@ -94,6 +95,7 @@ class Development(Config): CSV_UPLOAD_BUCKET_NAME = 'development-notifications-csv-upload' LOGO_UPLOAD_BUCKET_NAME = 'public-logos-tools' MOU_BUCKET_NAME = 'notify.tools-mou' + TRANSIENT_UPLOADED_LETTERS = 'development-transient-uploaded-letters' ADMIN_CLIENT_SECRET = 'dev-notify-secret-key' API_HOST_NAME = 'http://localhost:6011' @@ -115,6 +117,7 @@ class Test(Development): CSV_UPLOAD_BUCKET_NAME = 'test-notifications-csv-upload' LOGO_UPLOAD_BUCKET_NAME = 'public-logos-test' MOU_BUCKET_NAME = 'test-mou' + TRANSIENT_UPLOADED_LETTERS = 'test-transient-uploaded-letters' NOTIFY_ENVIRONMENT = 'test' API_HOST_NAME = 'http://you-forgot-to-mock-an-api-call-to' TEMPLATE_PREVIEW_API_HOST = 'http://localhost:9999' @@ -132,6 +135,7 @@ class Preview(Config): CSV_UPLOAD_BUCKET_NAME = 'preview-notifications-csv-upload' LOGO_UPLOAD_BUCKET_NAME = 'public-logos-preview' MOU_BUCKET_NAME = 'notify.works-mou' + TRANSIENT_UPLOADED_LETTERS = 'preview-transient-uploaded-letters' NOTIFY_ENVIRONMENT = 'preview' CHECK_PROXY_HEADER = False ASSET_DOMAIN = 'static.notify.works' @@ -146,6 +150,7 @@ class Staging(Config): CSV_UPLOAD_BUCKET_NAME = 'staging-notifications-csv-upload' LOGO_UPLOAD_BUCKET_NAME = 'public-logos-staging' MOU_BUCKET_NAME = 'staging-notify.works-mou' + TRANSIENT_UPLOADED_LETTERS = 'staging-transient-uploaded-letters' NOTIFY_ENVIRONMENT = 'staging' CHECK_PROXY_HEADER = False ASSET_DOMAIN = 'static.staging-notify.works' @@ -160,6 +165,7 @@ class Live(Config): CSV_UPLOAD_BUCKET_NAME = 'live-notifications-csv-upload' LOGO_UPLOAD_BUCKET_NAME = 'public-logos-production' MOU_BUCKET_NAME = 'notifications.service.gov.uk-mou' + TRANSIENT_UPLOADED_LETTERS = 'production-transient-uploaded-letters' NOTIFY_ENVIRONMENT = 'live' CHECK_PROXY_HEADER = False ASSET_DOMAIN = 'static.notifications.service.gov.uk' diff --git a/app/main/views/uploads.py b/app/main/views/uploads.py index 6a2315a90..ef47b61da 100644 --- a/app/main/views/uploads.py +++ b/app/main/views/uploads.py @@ -11,11 +11,17 @@ from flask import ( ) from notifications_utils.pdf import pdf_page_count from PyPDF2.utils import PdfReadError +from requests import RequestException from app import current_service from app.extensions import antivirus_client from app.main import main from app.main.forms import PDFUploadForm +from app.s3_client.s3_letter_upload_client import ( + get_transient_letter_file_location, + upload_letter_to_s3, +) +from app.template_previews import sanitise_letter from app.utils import user_has_permissions MAX_FILE_UPLOAD_SIZE = 2 * 1024 * 1024 # 2MB @@ -49,6 +55,20 @@ def upload_letter(service_id): return invalid_upload_error('Your file must be a valid PDF') upload_id = uuid.uuid4() + file_location = get_transient_letter_file_location(service_id, upload_id) + + try: + response = sanitise_letter(BytesIO(pdf_file_bytes)) + response.raise_for_status() + except RequestException as ex: + if ex.response is not None and ex.response.status_code == 400: + status = 'invalid' + upload_letter_to_s3(pdf_file_bytes, file_location, status) + else: + raise ex + else: + status = 'valid' + upload_letter_to_s3(response.content, file_location, status) return redirect( url_for( @@ -56,6 +76,7 @@ def upload_letter(service_id): service_id=current_service.id, file_id=upload_id, original_filename=form.file.data.filename, + status=status, ) ) @@ -71,5 +92,6 @@ def invalid_upload_error(message): @user_has_permissions('send_messages') def uploaded_letter_preview(service_id, file_id): original_filename = request.args.get('original_filename') + status = request.args.get('status') - return render_template('views/uploads/preview.html', original_filename=original_filename) + return render_template('views/uploads/preview.html', original_filename=original_filename, status=status) diff --git a/app/s3_client/s3_letter_upload_client.py b/app/s3_client/s3_letter_upload_client.py new file mode 100644 index 000000000..e73266c79 --- /dev/null +++ b/app/s3_client/s3_letter_upload_client.py @@ -0,0 +1,16 @@ +from flask import current_app +from notifications_utils.s3 import s3upload as utils_s3upload + + +def get_transient_letter_file_location(service_id, upload_id): + return 'service-{}/{}.pdf'.format(service_id, upload_id) + + +def upload_letter_to_s3(data, file_location, status): + utils_s3upload( + filedata=data, + region=current_app.config['AWS_REGION'], + bucket_name=current_app.config['TRANSIENT_UPLOADED_LETTERS'], + file_location=file_location, + metadata={'status': status} + ) diff --git a/app/template_previews.py b/app/template_previews.py index 2dbcba122..14a7ece98 100644 --- a/app/template_previews.py +++ b/app/template_previews.py @@ -66,3 +66,11 @@ def validate_letter(pdf_file): data=pdf_file, headers={'Authorization': 'Token {}'.format(current_app.config['TEMPLATE_PREVIEW_API_KEY'])} ) + + +def sanitise_letter(pdf_file): + return requests.post( + '{}/precompiled/sanitise'.format(current_app.config['TEMPLATE_PREVIEW_API_HOST']), + data=pdf_file, + headers={'Authorization': 'Token {}'.format(current_app.config['TEMPLATE_PREVIEW_API_KEY'])} + ) diff --git a/app/templates/views/uploads/preview.html b/app/templates/views/uploads/preview.html index fe14c5bb6..2597b1c0e 100644 --- a/app/templates/views/uploads/preview.html +++ b/app/templates/views/uploads/preview.html @@ -11,4 +11,9 @@ back_link=url_for('main.upload_letter', service_id=current_service.id) ) }} + {% if status == 'invalid' %} +

+ Validation failed +

+ {% endif %} {% endblock %} diff --git a/tests/app/main/views/test_uploads.py b/tests/app/main/views/test_uploads.py index d8daf1398..ee98153a2 100644 --- a/tests/app/main/views/test_uploads.py +++ b/tests/app/main/views/test_uploads.py @@ -1,4 +1,8 @@ +from unittest.mock import Mock + +import pytest from flask import url_for +from requests import RequestException from app.utils import normalize_spaces from tests.conftest import SERVICE_ONE_ID @@ -24,21 +28,26 @@ def test_get_upload_letter(client_request): def test_post_upload_letter_redirects_for_valid_file(mocker, client_request): mocker.patch('uuid.uuid4', return_value='fake-uuid') antivirus_mock = mocker.patch('app.main.views.uploads.antivirus_client.scan', return_value=True) + mocker.patch('app.main.views.uploads.sanitise_letter', return_value=Mock(content='The sanitised content')) + mock_s3 = mocker.patch('app.main.views.uploads.upload_letter_to_s3') with open('tests/test_pdf_files/one_page_pdf.pdf', 'rb') as file: - client_request.post( + page = client_request.post( 'main.upload_letter', service_id=SERVICE_ONE_ID, _data={'file': file}, - _expected_redirect=url_for( - 'main.uploaded_letter_preview', - service_id=SERVICE_ONE_ID, - file_id='fake-uuid', - original_filename='tests/test_pdf_files/one_page_pdf.pdf', - _external=True - ) + _follow_redirects=True, ) - assert antivirus_mock.called + assert antivirus_mock.called + + mock_s3.assert_called_once_with( + 'The sanitised content', + 'service-{}/fake-uuid.pdf'.format(SERVICE_ONE_ID), + 'valid', + ) + + assert page.find('h1').text == 'tests/test_pdf_files/one_page_pdf.pdf' + assert not page.find(id='validation-error-message') def test_post_upload_letter_shows_error_when_file_is_not_a_pdf(client_request): @@ -104,6 +113,57 @@ def test_post_choose_upload_file_when_file_is_malformed(mocker, client_request): assert normalize_spaces(page.select('.banner-dangerous')[0].text) == 'Your file must be a valid PDF' +def test_post_upload_letter_with_invalid_file(mocker, client_request): + mocker.patch('uuid.uuid4', return_value='fake-uuid') + mocker.patch('app.main.views.uploads.antivirus_client.scan', return_value=True) + mock_s3 = mocker.patch('app.main.views.uploads.upload_letter_to_s3') + + mock_sanitise_response = Mock() + mock_sanitise_response.raise_for_status.side_effect = RequestException(response=Mock(status_code=400)) + mocker.patch('app.main.views.uploads.sanitise_letter', return_value=mock_sanitise_response) + + with open('tests/test_pdf_files/one_page_pdf.pdf', 'rb') as file: + file_contents = file.read() + file.seek(0) + + page = client_request.post( + 'main.upload_letter', + service_id=SERVICE_ONE_ID, + _data={'file': file}, + _follow_redirects=True + ) + + mock_s3.assert_called_once_with( + file_contents, + 'service-{}/fake-uuid.pdf'.format(SERVICE_ONE_ID), + 'invalid', + ) + + assert page.find('h1').text == 'tests/test_pdf_files/one_page_pdf.pdf' + assert normalize_spaces( + page.find(id='validation-error-message').text + ) == 'Validation failed' + + +def test_post_upload_letter_does_not_upload_to_s3_if_template_preview_raises_unknown_error(mocker, client_request): + mocker.patch('uuid.uuid4', return_value='fake-uuid') + mocker.patch('app.main.views.uploads.antivirus_client.scan', return_value=True) + mock_s3 = mocker.patch('app.main.views.uploads.upload_letter_to_s3') + + mocker.patch('app.main.views.uploads.sanitise_letter', side_effect=RequestException()) + + with pytest.raises(RequestException): + with open('tests/test_pdf_files/one_page_pdf.pdf', 'rb') as file: + client_request.post( + 'main.upload_letter', + service_id=SERVICE_ONE_ID, + _data={'file': file}, + _follow_redirects=True + ) + + assert not mock_s3.called + + def test_uploaded_letter_preview(client_request): page = client_request.get( 'main.uploaded_letter_preview', diff --git a/tests/app/s3_client/test_s3_letter_upload_client.py b/tests/app/s3_client/test_s3_letter_upload_client.py new file mode 100644 index 000000000..c30ac61b5 --- /dev/null +++ b/tests/app/s3_client/test_s3_letter_upload_client.py @@ -0,0 +1,17 @@ +from flask import current_app + +from app.s3_client.s3_letter_upload_client import upload_letter_to_s3 + + +def test_upload_letter_to_s3(mocker): + s3_mock = mocker.patch('app.s3_client.s3_letter_upload_client.utils_s3upload') + + upload_letter_to_s3('pdf_data', 'service_id/upload_id.pdf', 'valid') + + s3_mock.assert_called_once_with( + bucket_name=current_app.config['TRANSIENT_UPLOADED_LETTERS'], + file_location='service_id/upload_id.pdf', + filedata='pdf_data', + metadata={'status': 'valid'}, + region=current_app.config['AWS_REGION'] + ) diff --git a/tests/app/test_template_previews.py b/tests/app/test_template_previews.py index dbfcbd159..f37464e8d 100644 --- a/tests/app/test_template_previews.py +++ b/tests/app/test_template_previews.py @@ -4,7 +4,11 @@ from unittest.mock import Mock import pytest from notifications_utils.template import LetterPreviewTemplate -from app.template_previews import TemplatePreview, get_page_count_for_letter +from app.template_previews import ( + TemplatePreview, + get_page_count_for_letter, + sanitise_letter, +) @pytest.mark.parametrize('partial_call, expected_page_argument', [ @@ -119,3 +123,15 @@ def test_from_example_template_makes_request(mocker): 'filename': filename, 'letter_contact_block': None} ) + + +def test_sanitise_letter_calls_template_preview_sanitise_endoint_with_file(mocker): + request_mock = mocker.patch('app.template_previews.requests.post') + + sanitise_letter('pdf_data') + + request_mock.assert_called_once_with( + 'http://localhost:9999/precompiled/sanitise', + headers={'Authorization': 'Token my-secret-key'}, + data='pdf_data' + )