From 8f144be29c011689f82a27f69f011cde07bd6390 Mon Sep 17 00:00:00 2001 From: Katie Smith Date: Wed, 4 Dec 2019 15:59:51 +0000 Subject: [PATCH 1/2] Add config for new template preview task Added the queue and task names for the new template preview task to the config. Also added the new bucket name that template preview will use for the sanitised letters to the config for all environments. --- app/config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/config.py b/app/config.py index 49136a50c..ccb94c7de 100644 --- a/app/config.py +++ b/app/config.py @@ -29,6 +29,7 @@ class QueueNames(object): CALLBACKS = 'service-callbacks' LETTERS = 'letter-tasks' ANTIVIRUS = 'antivirus-tasks' + SANITISE_LETTERS = 'sanitise-letter-tasks' @staticmethod def all_queues(): @@ -53,6 +54,7 @@ class TaskNames(object): PROCESS_INCOMPLETE_JOBS = 'process-incomplete-jobs' ZIP_AND_SEND_LETTER_PDFS = 'zip-and-send-letter-pdfs' SCAN_FILE = 'scan-file' + SANITISE_LETTER = 'sanitise-and-upload-letter' class Config(object): @@ -375,6 +377,7 @@ class Development(Config): LETTERS_SCAN_BUCKET_NAME = 'development-letters-scan' INVALID_PDF_BUCKET_NAME = 'development-letters-invalid-pdf' TRANSIENT_UPLOADED_LETTERS = 'development-transient-uploaded-letters' + LETTER_SANITISE_BUCKET_NAME = 'development-letters-sanitise' ADMIN_CLIENT_SECRET = 'dev-notify-secret-key' SECRET_KEY = 'dev-notify-secret-key' @@ -415,6 +418,7 @@ class Test(Development): LETTERS_SCAN_BUCKET_NAME = 'test-letters-scan' INVALID_PDF_BUCKET_NAME = 'test-letters-invalid-pdf' TRANSIENT_UPLOADED_LETTERS = 'test-transient-uploaded-letters' + LETTER_SANITISE_BUCKET_NAME = 'test-letters-sanitise' # this is overriden in jenkins and on cloudfoundry SQLALCHEMY_DATABASE_URI = os.getenv('SQLALCHEMY_DATABASE_URI', 'postgresql://localhost/test_notification_api') @@ -449,6 +453,7 @@ class Preview(Config): LETTERS_SCAN_BUCKET_NAME = 'preview-letters-scan' INVALID_PDF_BUCKET_NAME = 'preview-letters-invalid-pdf' TRANSIENT_UPLOADED_LETTERS = 'preview-transient-uploaded-letters' + LETTER_SANITISE_BUCKET_NAME = 'preview-letters-sanitise' FROM_NUMBER = 'preview' API_RATE_LIMIT_ENABLED = True CHECK_PROXY_HEADER = False @@ -464,6 +469,7 @@ class Staging(Config): LETTERS_SCAN_BUCKET_NAME = 'staging-letters-scan' INVALID_PDF_BUCKET_NAME = 'staging-letters-invalid-pdf' TRANSIENT_UPLOADED_LETTERS = 'staging-transient-uploaded-letters' + LETTER_SANITISE_BUCKET_NAME = 'staging-letters-sanitise' FROM_NUMBER = 'stage' API_RATE_LIMIT_ENABLED = True CHECK_PROXY_HEADER = True @@ -480,6 +486,7 @@ class Live(Config): LETTERS_SCAN_BUCKET_NAME = 'production-letters-scan' INVALID_PDF_BUCKET_NAME = 'production-letters-invalid-pdf' TRANSIENT_UPLOADED_LETTERS = 'production-transient-uploaded-letters' + LETTER_SANITISE_BUCKET_NAME = 'production-letters-sanitise' FROM_NUMBER = 'GOVUK' PERFORMANCE_PLATFORM_ENABLED = True API_RATE_LIMIT_ENABLED = True From cc2191c19f2db8c370b6994da6c85f4c9fcf1717 Mon Sep 17 00:00:00 2001 From: Katie Smith Date: Wed, 4 Dec 2019 16:02:46 +0000 Subject: [PATCH 2/2] Add new tasks for sanitising precompiled letters Added a task, `sanitise-letter`, that will be called from antivirus when a letter has passed virus scan. This calls a new task in template-preview which will sanitise the PDF. A second new task, `process-sanitised-letter`, will be called from the template preview task and deals with updating the notification and moving it to the relevant bucket. --- app/celery/letters_pdf_tasks.py | 98 ++++++++++ app/letters/utils.py | 13 ++ tests/app/celery/test_letters_pdf_tasks.py | 205 ++++++++++++++++++++- tests/app/letters/test_letter_utils.py | 47 +++++ 4 files changed, 358 insertions(+), 5 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 761a97c78..79fa40db2 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -27,6 +27,7 @@ from app.dao.notifications_dao import ( dao_update_notifications_by_reference, ) from app.errors import VirusScanError +from app.exceptions import NotificationTechnicalFailureException from app.letters.utils import ( copy_redaction_failed_pdf, get_billable_units_for_letter_page_count, @@ -35,6 +36,7 @@ from app.letters.utils import ( upload_letter_pdf, ScanErrorType, move_failed_pdf, + move_sanitised_letter_to_test_or_live_pdf_bucket, move_scan_to_invalid_pdf_bucket, move_error_pdf_to_scan_bucket, get_file_names_from_error_bucket, @@ -43,6 +45,7 @@ from app.models import ( KEY_TYPE_TEST, NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, + NOTIFICATION_PENDING_VIRUS_CHECK, NOTIFICATION_TECHNICAL_FAILURE, NOTIFICATION_VALIDATION_FAILED, NOTIFICATION_VIRUS_SCAN_FAILED, @@ -262,6 +265,100 @@ def process_virus_scan_passed(self, filename): update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) +@notify_celery.task(bind=True, name='sanitise-letter', max_retries=15, default_retry_delay=300) +def sanitise_letter(self, filename): + try: + reference = get_reference_from_filename(filename) + notification = dao_get_notification_by_reference(reference) + + current_app.logger.info('Notification ID {} Virus scan passed: {}'.format(notification.id, filename)) + + if notification.status != NOTIFICATION_PENDING_VIRUS_CHECK: + current_app.logger.info('Sanitise letter called for notification {} which has is in {} state'.format( + notification.id, notification.status)) + return + + notify_celery.send_task( + name=TaskNames.SANITISE_LETTER, + kwargs={ + 'notification_id': str(notification.id), + 'filename': filename, + }, + queue=QueueNames.SANITISE_LETTERS, + ) + except Exception: + try: + current_app.logger.exception( + "RETRY: calling sanitise_letter task for notification {} failed".format(notification.id) + ) + self.retry(queue=QueueNames.RETRY) + except self.MaxRetriesExceededError: + message = "RETRY FAILED: Max retries reached. " \ + "The task sanitise_letter failed for notification {}. " \ + "Notification has been updated to technical-failure".format(notification.id) + update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) + raise NotificationTechnicalFailureException(message) + + +@notify_celery.task(name='process-sanitised-letter') +def process_sanitised_letter( + page_count, + message, + invalid_pages, + validation_status, + filename, + notification_id, +): + current_app.logger.info('Processing sanitised letter with id {}'.format(notification_id)) + notification = get_notification_by_id(notification_id, _raise=True) + + if notification.status != NOTIFICATION_PENDING_VIRUS_CHECK: + current_app.logger.info( + 'process-sanitised-letter task called for notification {} which has is in {} state'.format( + notification.id, notification.status) + ) + return + + try: + original_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename) + + if validation_status == 'failed': + current_app.logger.info('Processing invalid precompiled pdf with id {} (file {})'.format( + notification_id, filename)) + + _move_invalid_letter_and_update_status( + notification=notification, + filename=filename, + scan_pdf_object=original_pdf_object, + message=message, + invalid_pages=invalid_pages, + page_count=page_count, + ) + return + + current_app.logger.info('Processing valid precompiled pdf with id {} (file {})'.format( + notification_id, filename)) + + billable_units = get_billable_units_for_letter_page_count(page_count) + is_test_key = notification.key_type == KEY_TYPE_TEST + + move_sanitised_letter_to_test_or_live_pdf_bucket(filename, is_test_key, notification.created_at) + # We've moved the sanitised PDF from the sanitise bucket, but still need to delete the original file: + original_pdf_object.delete() + update_letter_pdf_status( + reference=notification.reference, + status=NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED, + billable_units=billable_units + ) + + except BotoClientError: + current_app.logger.exception( + "Boto error when processing sanitised letter for notification {}".format(filename, notification.id) + ) + update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) + raise NotificationTechnicalFailureException + + def _move_invalid_letter_and_update_status( *, notification, filename, scan_pdf_object, message=None, invalid_pages=None, page_count=None ): @@ -283,6 +380,7 @@ def _move_invalid_letter_and_update_status( "Error when moving letter with id {} to invalid PDF bucket".format(notification.id) ) update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) + raise NotificationTechnicalFailureException def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter, created_at): diff --git a/app/letters/utils.py b/app/letters/utils.py index 272cd9f92..cd96a8d19 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -156,6 +156,19 @@ def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename): ) +def move_sanitised_letter_to_test_or_live_pdf_bucket(filename, is_test_letter, created_at): + target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME' + target_bucket_name = current_app.config[target_bucket_config] + target_filename = get_folder_name(created_at, dont_use_sending_date=is_test_letter) + filename + + _move_s3_object( + source_bucket=current_app.config['LETTER_SANITISE_BUCKET_NAME'], + source_filename=filename, + target_bucket=target_bucket_name, + target_filename=target_filename, + ) + + def get_file_names_from_error_bucket(): s3 = boto3.resource('s3') scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME'] diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index d40047c3f..44d10e865 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -2,6 +2,7 @@ from unittest.mock import Mock, call, ANY import base64 import boto3 +from datetime import datetime from moto import mock_s3 from flask import current_app from freezegun import freeze_time @@ -13,19 +14,23 @@ from requests import RequestException from sqlalchemy.orm.exc import NoResultFound from app.errors import VirusScanError +from app.exceptions import NotificationTechnicalFailureException from app.celery.letters_pdf_tasks import ( create_letters_pdf, get_letters_pdf, collate_letter_pdfs_for_day, group_letters, letter_in_created_state, + process_sanitised_letter, process_virus_scan_passed, process_virus_scan_failed, process_virus_scan_error, replay_letters_in_error, + sanitise_letter, _move_invalid_letter_and_update_status, _sanitise_precompiled_pdf ) +from app.config import QueueNames, TaskNames from app.letters.utils import ScanErrorType from app.models import ( KEY_TYPE_NORMAL, @@ -51,6 +56,8 @@ def test_should_have_decorated_tasks_functions(): assert process_virus_scan_passed.__wrapped__.__name__ == 'process_virus_scan_passed' assert process_virus_scan_failed.__wrapped__.__name__ == 'process_virus_scan_failed' assert process_virus_scan_error.__wrapped__.__name__ == 'process_virus_scan_error' + assert sanitise_letter.__wrapped__.__name__ == 'sanitise_letter' + assert process_sanitised_letter.__wrapped__.__name__ == 'process_sanitised_letter' @pytest.mark.parametrize('personalisation', [{'name': 'test'}, None]) @@ -666,11 +673,12 @@ def test_move_invalid_letter_and_update_status_logs_error_and_sets_tech_failure_ side_effect=ClientError(error_response, 'operation_name')) mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception') - _move_invalid_letter_and_update_status( - notification=sample_letter_notification, - filename='filename', - scan_pdf_object=mocker.Mock() - ) + with pytest.raises(NotificationTechnicalFailureException): + _move_invalid_letter_and_update_status( + notification=sample_letter_notification, + filename='filename', + scan_pdf_object=mocker.Mock() + ) assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE mock_logger.assert_called_once_with( @@ -678,6 +686,193 @@ def test_move_invalid_letter_and_update_status_logs_error_and_sets_tech_failure_ ) +def test_sanitise_letter_calls_template_preview_sanitise_task(mocker, sample_letter_notification): + mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + sanitise_letter(filename) + + mock_celery.assert_called_once_with( + name=TaskNames.SANITISE_LETTER, + kwargs={'notification_id': str(sample_letter_notification.id), 'filename': filename}, + queue=QueueNames.SANITISE_LETTERS, + ) + + +def test_sanitise_letter_does_not_call_template_preview_sanitise_task_if_notification_in_wrong_state( + mocker, + sample_letter_notification, +): + mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + + sanitise_letter(filename) + + assert not mock_celery.called + + +def test_sanitise_letter_does_not_call_template_preview_sanitise_task_if_there_is_an_exception( + mocker, + sample_letter_notification, +): + mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task', side_effect=Exception()) + mock_celery_retry = mocker.patch('app.celery.letters_pdf_tasks.sanitise_letter.retry') + + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + sanitise_letter(filename) + + mock_celery_retry.assert_called_once_with(queue='retry-tasks') + + +def test_sanitise_letter_puts_letter_into_technical_failure_if_max_retries_exceeded(sample_letter_notification, mocker): + mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task', side_effect=Exception()) + mocker.patch('app.celery.letters_pdf_tasks.sanitise_letter.retry', side_effect=MaxRetriesExceededError()) + + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + with pytest.raises(NotificationTechnicalFailureException): + sanitise_letter(filename) + + assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE + + +@mock_s3 +@pytest.mark.parametrize('key_type, destination_bucket, expected_status, destination_filename', [ + (KEY_TYPE_NORMAL, 'LETTERS_PDF_BUCKET_NAME', NOTIFICATION_CREATED, '2018-07-01/NOTIFY.foo'), + (KEY_TYPE_TEST, 'TEST_LETTERS_BUCKET_NAME', NOTIFICATION_DELIVERED, 'NOTIFY.foo'), +]) +def test_process_sanitised_letter_with_valid_letter( + sample_letter_notification, + key_type, + destination_bucket, + expected_status, + destination_filename, +): + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + + scan_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + template_preview_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + destination_bucket_name = current_app.config[destination_bucket] + conn = boto3.resource('s3', region_name='eu-west-1') + + scan_bucket = conn.create_bucket(Bucket=scan_bucket_name) + template_preview_bucket = conn.create_bucket(Bucket=template_preview_bucket_name) + destination_bucket = conn.create_bucket(Bucket=destination_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=scan_bucket_name, Key=filename, Body=b'original_pdf_content') + s3.put_object(Bucket=template_preview_bucket_name, Key=filename, Body=b'sanitised_pdf_content') + + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + sample_letter_notification.key_type = key_type + sample_letter_notification.billable_units = 1 + sample_letter_notification.created_at = datetime(2018, 7, 1, 12) + + process_sanitised_letter( + page_count=2, + message=None, + invalid_pages=None, + validation_status='passed', + filename=filename, + notification_id=str(sample_letter_notification.id) + ) + + assert sample_letter_notification.status == expected_status + assert sample_letter_notification.billable_units == 1 + + assert not [x for x in scan_bucket.objects.all()] + assert not [x for x in template_preview_bucket.objects.all()] + assert len([x for x in destination_bucket.objects.all()]) == 1 + + file_contents = conn.Object(destination_bucket_name, destination_filename).get()['Body'].read().decode('utf-8') + assert file_contents == 'sanitised_pdf_content' + + +@mock_s3 +@pytest.mark.parametrize('key_type', [KEY_TYPE_NORMAL, KEY_TYPE_TEST]) +def test_process_sanitised_letter_with_invalid_letter(sample_letter_notification, key_type): + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + + scan_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + template_preview_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + invalid_letter_bucket_name = current_app.config['INVALID_PDF_BUCKET_NAME'] + conn = boto3.resource('s3', region_name='eu-west-1') + + scan_bucket = conn.create_bucket(Bucket=scan_bucket_name) + template_preview_bucket = conn.create_bucket(Bucket=template_preview_bucket_name) + invalid_letter_bucket = conn.create_bucket(Bucket=invalid_letter_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=scan_bucket_name, Key=filename, Body=b'original_pdf_content') + + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + sample_letter_notification.key_type = key_type + sample_letter_notification.billable_units = 1 + sample_letter_notification.created_at = datetime(2018, 7, 1, 12) + + process_sanitised_letter( + page_count=2, + message='content-outside-printable-area', + invalid_pages=[1], + validation_status='failed', + filename=filename, + notification_id=str(sample_letter_notification.id) + ) + + assert sample_letter_notification.status == NOTIFICATION_VALIDATION_FAILED + assert sample_letter_notification.billable_units == 0 + + assert not [x for x in scan_bucket.objects.all()] + assert not [x for x in template_preview_bucket.objects.all()] + assert len([x for x in invalid_letter_bucket.objects.all()]) == 1 + + file_contents = conn.Object(invalid_letter_bucket_name, filename).get()['Body'].read().decode('utf-8') + assert file_contents == 'original_pdf_content' + + +def test_process_sanitised_letter_when_letter_status_is_not_pending_virus_scan( + sample_letter_notification, + mocker, +): + mock_s3 = mocker.patch('app.celery.letters_pdf_tasks.s3') + sample_letter_notification.status = NOTIFICATION_CREATED + + process_sanitised_letter( + page_count=2, + message=None, + invalid_pages=None, + validation_status='passed', + filename='NOTIFY.{}'.format(sample_letter_notification.reference), + notification_id=str(sample_letter_notification.id) + ) + + assert not mock_s3.called + + +def test_process_sanitised_letter_puts_letter_into_tech_failure_for_boto_errors( + sample_letter_notification, + mocker, +): + mocker.patch('app.celery.letters_pdf_tasks.s3.get_s3_object', side_effect=ClientError({}, 'operation_name')) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + with pytest.raises(NotificationTechnicalFailureException): + process_sanitised_letter( + page_count=2, + message=None, + invalid_pages=None, + validation_status='passed', + filename='NOTIFY.{}'.format(sample_letter_notification.reference), + notification_id=str(sample_letter_notification.id) + ) + + assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE + + def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker): filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK diff --git a/tests/app/letters/test_letter_utils.py b/tests/app/letters/test_letter_utils.py index 99cc4fbe1..eaa950fcc 100644 --- a/tests/app/letters/test_letter_utils.py +++ b/tests/app/letters/test_letter_utils.py @@ -12,6 +12,7 @@ from app.letters.utils import ( get_letter_pdf_filename, get_letter_pdf_and_metadata, letter_print_day, + move_sanitised_letter_to_test_or_live_pdf_bucket, upload_letter_pdf, ScanErrorType, move_failed_pdf, get_folder_name ) @@ -338,6 +339,52 @@ def test_get_folder_name_returns_empty_string_for_test_letter(): assert '' == get_folder_name(datetime.utcnow(), dont_use_sending_date=True) +@mock_s3 +def test_move_sanitised_letter_to_live_pdf_bucket(notify_api, mocker): + filename = 'my_letter.pdf' + source_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + target_bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] + + conn = boto3.resource('s3', region_name='eu-west-1') + source_bucket = conn.create_bucket(Bucket=source_bucket_name) + target_bucket = conn.create_bucket(Bucket=target_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') + + move_sanitised_letter_to_test_or_live_pdf_bucket( + filename=filename, + is_test_letter=False, + created_at=datetime.utcnow() + ) + + assert not [x for x in source_bucket.objects.all()] + assert len([x for x in target_bucket.objects.all()]) == 1 + + +@mock_s3 +def test_move_sanitised_letter_to_test_pdf_bucket(notify_api, mocker): + filename = 'my_letter.pdf' + source_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + target_bucket_name = current_app.config['TEST_LETTERS_BUCKET_NAME'] + + conn = boto3.resource('s3', region_name='eu-west-1') + source_bucket = conn.create_bucket(Bucket=source_bucket_name) + target_bucket = conn.create_bucket(Bucket=target_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') + + move_sanitised_letter_to_test_or_live_pdf_bucket( + filename=filename, + is_test_letter=True, + created_at=datetime.utcnow() + ) + + assert not [x for x in source_bucket.objects.all()] + assert len([x for x in target_bucket.objects.all()]) == 1 + + @freeze_time('2017-07-07 20:00:00') def test_letter_print_day_returns_today_if_letter_was_printed_after_1730_yesterday(): created_at = datetime(2017, 7, 6, 17, 30)