diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 761a97c78..79fa40db2 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -27,6 +27,7 @@ from app.dao.notifications_dao import ( dao_update_notifications_by_reference, ) from app.errors import VirusScanError +from app.exceptions import NotificationTechnicalFailureException from app.letters.utils import ( copy_redaction_failed_pdf, get_billable_units_for_letter_page_count, @@ -35,6 +36,7 @@ from app.letters.utils import ( upload_letter_pdf, ScanErrorType, move_failed_pdf, + move_sanitised_letter_to_test_or_live_pdf_bucket, move_scan_to_invalid_pdf_bucket, move_error_pdf_to_scan_bucket, get_file_names_from_error_bucket, @@ -43,6 +45,7 @@ from app.models import ( KEY_TYPE_TEST, NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, + NOTIFICATION_PENDING_VIRUS_CHECK, NOTIFICATION_TECHNICAL_FAILURE, NOTIFICATION_VALIDATION_FAILED, NOTIFICATION_VIRUS_SCAN_FAILED, @@ -262,6 +265,100 @@ def process_virus_scan_passed(self, filename): update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) +@notify_celery.task(bind=True, name='sanitise-letter', max_retries=15, default_retry_delay=300) +def sanitise_letter(self, filename): + try: + reference = get_reference_from_filename(filename) + notification = dao_get_notification_by_reference(reference) + + current_app.logger.info('Notification ID {} Virus scan passed: {}'.format(notification.id, filename)) + + if notification.status != NOTIFICATION_PENDING_VIRUS_CHECK: + current_app.logger.info('Sanitise letter called for notification {} which has is in {} state'.format( + notification.id, notification.status)) + return + + notify_celery.send_task( + name=TaskNames.SANITISE_LETTER, + kwargs={ + 'notification_id': str(notification.id), + 'filename': filename, + }, + queue=QueueNames.SANITISE_LETTERS, + ) + except Exception: + try: + current_app.logger.exception( + "RETRY: calling sanitise_letter task for notification {} failed".format(notification.id) + ) + self.retry(queue=QueueNames.RETRY) + except self.MaxRetriesExceededError: + message = "RETRY FAILED: Max retries reached. " \ + "The task sanitise_letter failed for notification {}. " \ + "Notification has been updated to technical-failure".format(notification.id) + update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) + raise NotificationTechnicalFailureException(message) + + +@notify_celery.task(name='process-sanitised-letter') +def process_sanitised_letter( + page_count, + message, + invalid_pages, + validation_status, + filename, + notification_id, +): + current_app.logger.info('Processing sanitised letter with id {}'.format(notification_id)) + notification = get_notification_by_id(notification_id, _raise=True) + + if notification.status != NOTIFICATION_PENDING_VIRUS_CHECK: + current_app.logger.info( + 'process-sanitised-letter task called for notification {} which has is in {} state'.format( + notification.id, notification.status) + ) + return + + try: + original_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename) + + if validation_status == 'failed': + current_app.logger.info('Processing invalid precompiled pdf with id {} (file {})'.format( + notification_id, filename)) + + _move_invalid_letter_and_update_status( + notification=notification, + filename=filename, + scan_pdf_object=original_pdf_object, + message=message, + invalid_pages=invalid_pages, + page_count=page_count, + ) + return + + current_app.logger.info('Processing valid precompiled pdf with id {} (file {})'.format( + notification_id, filename)) + + billable_units = get_billable_units_for_letter_page_count(page_count) + is_test_key = notification.key_type == KEY_TYPE_TEST + + move_sanitised_letter_to_test_or_live_pdf_bucket(filename, is_test_key, notification.created_at) + # We've moved the sanitised PDF from the sanitise bucket, but still need to delete the original file: + original_pdf_object.delete() + update_letter_pdf_status( + reference=notification.reference, + status=NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED, + billable_units=billable_units + ) + + except BotoClientError: + current_app.logger.exception( + "Boto error when processing sanitised letter for notification {}".format(filename, notification.id) + ) + update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) + raise NotificationTechnicalFailureException + + def _move_invalid_letter_and_update_status( *, notification, filename, scan_pdf_object, message=None, invalid_pages=None, page_count=None ): @@ -283,6 +380,7 @@ def _move_invalid_letter_and_update_status( "Error when moving letter with id {} to invalid PDF bucket".format(notification.id) ) update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) + raise NotificationTechnicalFailureException def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter, created_at): diff --git a/app/letters/utils.py b/app/letters/utils.py index 272cd9f92..cd96a8d19 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -156,6 +156,19 @@ def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename): ) +def move_sanitised_letter_to_test_or_live_pdf_bucket(filename, is_test_letter, created_at): + target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME' + target_bucket_name = current_app.config[target_bucket_config] + target_filename = get_folder_name(created_at, dont_use_sending_date=is_test_letter) + filename + + _move_s3_object( + source_bucket=current_app.config['LETTER_SANITISE_BUCKET_NAME'], + source_filename=filename, + target_bucket=target_bucket_name, + target_filename=target_filename, + ) + + def get_file_names_from_error_bucket(): s3 = boto3.resource('s3') scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME'] diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index d40047c3f..44d10e865 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -2,6 +2,7 @@ from unittest.mock import Mock, call, ANY import base64 import boto3 +from datetime import datetime from moto import mock_s3 from flask import current_app from freezegun import freeze_time @@ -13,19 +14,23 @@ from requests import RequestException from sqlalchemy.orm.exc import NoResultFound from app.errors import VirusScanError +from app.exceptions import NotificationTechnicalFailureException from app.celery.letters_pdf_tasks import ( create_letters_pdf, get_letters_pdf, collate_letter_pdfs_for_day, group_letters, letter_in_created_state, + process_sanitised_letter, process_virus_scan_passed, process_virus_scan_failed, process_virus_scan_error, replay_letters_in_error, + sanitise_letter, _move_invalid_letter_and_update_status, _sanitise_precompiled_pdf ) +from app.config import QueueNames, TaskNames from app.letters.utils import ScanErrorType from app.models import ( KEY_TYPE_NORMAL, @@ -51,6 +56,8 @@ def test_should_have_decorated_tasks_functions(): assert process_virus_scan_passed.__wrapped__.__name__ == 'process_virus_scan_passed' assert process_virus_scan_failed.__wrapped__.__name__ == 'process_virus_scan_failed' assert process_virus_scan_error.__wrapped__.__name__ == 'process_virus_scan_error' + assert sanitise_letter.__wrapped__.__name__ == 'sanitise_letter' + assert process_sanitised_letter.__wrapped__.__name__ == 'process_sanitised_letter' @pytest.mark.parametrize('personalisation', [{'name': 'test'}, None]) @@ -666,11 +673,12 @@ def test_move_invalid_letter_and_update_status_logs_error_and_sets_tech_failure_ side_effect=ClientError(error_response, 'operation_name')) mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception') - _move_invalid_letter_and_update_status( - notification=sample_letter_notification, - filename='filename', - scan_pdf_object=mocker.Mock() - ) + with pytest.raises(NotificationTechnicalFailureException): + _move_invalid_letter_and_update_status( + notification=sample_letter_notification, + filename='filename', + scan_pdf_object=mocker.Mock() + ) assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE mock_logger.assert_called_once_with( @@ -678,6 +686,193 @@ def test_move_invalid_letter_and_update_status_logs_error_and_sets_tech_failure_ ) +def test_sanitise_letter_calls_template_preview_sanitise_task(mocker, sample_letter_notification): + mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + sanitise_letter(filename) + + mock_celery.assert_called_once_with( + name=TaskNames.SANITISE_LETTER, + kwargs={'notification_id': str(sample_letter_notification.id), 'filename': filename}, + queue=QueueNames.SANITISE_LETTERS, + ) + + +def test_sanitise_letter_does_not_call_template_preview_sanitise_task_if_notification_in_wrong_state( + mocker, + sample_letter_notification, +): + mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task') + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + + sanitise_letter(filename) + + assert not mock_celery.called + + +def test_sanitise_letter_does_not_call_template_preview_sanitise_task_if_there_is_an_exception( + mocker, + sample_letter_notification, +): + mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task', side_effect=Exception()) + mock_celery_retry = mocker.patch('app.celery.letters_pdf_tasks.sanitise_letter.retry') + + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + sanitise_letter(filename) + + mock_celery_retry.assert_called_once_with(queue='retry-tasks') + + +def test_sanitise_letter_puts_letter_into_technical_failure_if_max_retries_exceeded(sample_letter_notification, mocker): + mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task', side_effect=Exception()) + mocker.patch('app.celery.letters_pdf_tasks.sanitise_letter.retry', side_effect=MaxRetriesExceededError()) + + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + with pytest.raises(NotificationTechnicalFailureException): + sanitise_letter(filename) + + assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE + + +@mock_s3 +@pytest.mark.parametrize('key_type, destination_bucket, expected_status, destination_filename', [ + (KEY_TYPE_NORMAL, 'LETTERS_PDF_BUCKET_NAME', NOTIFICATION_CREATED, '2018-07-01/NOTIFY.foo'), + (KEY_TYPE_TEST, 'TEST_LETTERS_BUCKET_NAME', NOTIFICATION_DELIVERED, 'NOTIFY.foo'), +]) +def test_process_sanitised_letter_with_valid_letter( + sample_letter_notification, + key_type, + destination_bucket, + expected_status, + destination_filename, +): + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + + scan_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + template_preview_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + destination_bucket_name = current_app.config[destination_bucket] + conn = boto3.resource('s3', region_name='eu-west-1') + + scan_bucket = conn.create_bucket(Bucket=scan_bucket_name) + template_preview_bucket = conn.create_bucket(Bucket=template_preview_bucket_name) + destination_bucket = conn.create_bucket(Bucket=destination_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=scan_bucket_name, Key=filename, Body=b'original_pdf_content') + s3.put_object(Bucket=template_preview_bucket_name, Key=filename, Body=b'sanitised_pdf_content') + + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + sample_letter_notification.key_type = key_type + sample_letter_notification.billable_units = 1 + sample_letter_notification.created_at = datetime(2018, 7, 1, 12) + + process_sanitised_letter( + page_count=2, + message=None, + invalid_pages=None, + validation_status='passed', + filename=filename, + notification_id=str(sample_letter_notification.id) + ) + + assert sample_letter_notification.status == expected_status + assert sample_letter_notification.billable_units == 1 + + assert not [x for x in scan_bucket.objects.all()] + assert not [x for x in template_preview_bucket.objects.all()] + assert len([x for x in destination_bucket.objects.all()]) == 1 + + file_contents = conn.Object(destination_bucket_name, destination_filename).get()['Body'].read().decode('utf-8') + assert file_contents == 'sanitised_pdf_content' + + +@mock_s3 +@pytest.mark.parametrize('key_type', [KEY_TYPE_NORMAL, KEY_TYPE_TEST]) +def test_process_sanitised_letter_with_invalid_letter(sample_letter_notification, key_type): + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + + scan_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + template_preview_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + invalid_letter_bucket_name = current_app.config['INVALID_PDF_BUCKET_NAME'] + conn = boto3.resource('s3', region_name='eu-west-1') + + scan_bucket = conn.create_bucket(Bucket=scan_bucket_name) + template_preview_bucket = conn.create_bucket(Bucket=template_preview_bucket_name) + invalid_letter_bucket = conn.create_bucket(Bucket=invalid_letter_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=scan_bucket_name, Key=filename, Body=b'original_pdf_content') + + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + sample_letter_notification.key_type = key_type + sample_letter_notification.billable_units = 1 + sample_letter_notification.created_at = datetime(2018, 7, 1, 12) + + process_sanitised_letter( + page_count=2, + message='content-outside-printable-area', + invalid_pages=[1], + validation_status='failed', + filename=filename, + notification_id=str(sample_letter_notification.id) + ) + + assert sample_letter_notification.status == NOTIFICATION_VALIDATION_FAILED + assert sample_letter_notification.billable_units == 0 + + assert not [x for x in scan_bucket.objects.all()] + assert not [x for x in template_preview_bucket.objects.all()] + assert len([x for x in invalid_letter_bucket.objects.all()]) == 1 + + file_contents = conn.Object(invalid_letter_bucket_name, filename).get()['Body'].read().decode('utf-8') + assert file_contents == 'original_pdf_content' + + +def test_process_sanitised_letter_when_letter_status_is_not_pending_virus_scan( + sample_letter_notification, + mocker, +): + mock_s3 = mocker.patch('app.celery.letters_pdf_tasks.s3') + sample_letter_notification.status = NOTIFICATION_CREATED + + process_sanitised_letter( + page_count=2, + message=None, + invalid_pages=None, + validation_status='passed', + filename='NOTIFY.{}'.format(sample_letter_notification.reference), + notification_id=str(sample_letter_notification.id) + ) + + assert not mock_s3.called + + +def test_process_sanitised_letter_puts_letter_into_tech_failure_for_boto_errors( + sample_letter_notification, + mocker, +): + mocker.patch('app.celery.letters_pdf_tasks.s3.get_s3_object', side_effect=ClientError({}, 'operation_name')) + sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK + + with pytest.raises(NotificationTechnicalFailureException): + process_sanitised_letter( + page_count=2, + message=None, + invalid_pages=None, + validation_status='passed', + filename='NOTIFY.{}'.format(sample_letter_notification.reference), + notification_id=str(sample_letter_notification.id) + ) + + assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE + + def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker): filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK diff --git a/tests/app/letters/test_letter_utils.py b/tests/app/letters/test_letter_utils.py index 99cc4fbe1..eaa950fcc 100644 --- a/tests/app/letters/test_letter_utils.py +++ b/tests/app/letters/test_letter_utils.py @@ -12,6 +12,7 @@ from app.letters.utils import ( get_letter_pdf_filename, get_letter_pdf_and_metadata, letter_print_day, + move_sanitised_letter_to_test_or_live_pdf_bucket, upload_letter_pdf, ScanErrorType, move_failed_pdf, get_folder_name ) @@ -338,6 +339,52 @@ def test_get_folder_name_returns_empty_string_for_test_letter(): assert '' == get_folder_name(datetime.utcnow(), dont_use_sending_date=True) +@mock_s3 +def test_move_sanitised_letter_to_live_pdf_bucket(notify_api, mocker): + filename = 'my_letter.pdf' + source_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + target_bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] + + conn = boto3.resource('s3', region_name='eu-west-1') + source_bucket = conn.create_bucket(Bucket=source_bucket_name) + target_bucket = conn.create_bucket(Bucket=target_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') + + move_sanitised_letter_to_test_or_live_pdf_bucket( + filename=filename, + is_test_letter=False, + created_at=datetime.utcnow() + ) + + assert not [x for x in source_bucket.objects.all()] + assert len([x for x in target_bucket.objects.all()]) == 1 + + +@mock_s3 +def test_move_sanitised_letter_to_test_pdf_bucket(notify_api, mocker): + filename = 'my_letter.pdf' + source_bucket_name = current_app.config['LETTER_SANITISE_BUCKET_NAME'] + target_bucket_name = current_app.config['TEST_LETTERS_BUCKET_NAME'] + + conn = boto3.resource('s3', region_name='eu-west-1') + source_bucket = conn.create_bucket(Bucket=source_bucket_name) + target_bucket = conn.create_bucket(Bucket=target_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') + + move_sanitised_letter_to_test_or_live_pdf_bucket( + filename=filename, + is_test_letter=True, + created_at=datetime.utcnow() + ) + + assert not [x for x in source_bucket.objects.all()] + assert len([x for x in target_bucket.objects.all()]) == 1 + + @freeze_time('2017-07-07 20:00:00') def test_letter_print_day_returns_today_if_letter_was_printed_after_1730_yesterday(): created_at = datetime(2017, 7, 6, 17, 30)