From 8b6d28d3b0972b157021a5db6eecfa62007b4f2d Mon Sep 17 00:00:00 2001 From: Richard Chapman Date: Fri, 23 Mar 2018 15:27:24 +0000 Subject: [PATCH 1/6] Added a new task to handle any error cases with the anti-virus application. If the Anti-virus app fails due to s3 errors or ClamAV so does not scan (even after retries) the file at all an error needs to be raised and the notification set to technical-failure. Files should be moved to a 'folder' a separate one for ERROR and FAILURE. * Added new letter task to process the error * Added a new method to letter utils.py to move a file into an error or failure folder based on the input * Added tests to test the task and the utils.py method --- app/celery/letters_pdf_tasks.py | 23 +++++++++-- app/letters/utils.py | 46 ++++++++++++++++++++++ tests/app/celery/test_letters_pdf_tasks.py | 23 ++++++++--- tests/app/letters/test_letter_utils.py | 44 ++++++++++++++++++++- 4 files changed, 124 insertions(+), 12 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index e3db9572e..9c05ab223 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -22,16 +22,16 @@ from app.dao.notifications_dao import ( dao_update_notifications_by_reference, ) from app.letters.utils import ( - delete_pdf_from_letters_scan_bucket, get_reference_from_filename, move_scanned_pdf_to_test_or_live_pdf_bucket, - upload_letter_pdf -) + upload_letter_pdf, + move_failed_pdf, ScanErrorType) from app.models import ( KEY_TYPE_TEST, NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, NOTIFICATION_VIRUS_SCAN_FAILED, + NOTIFICATION_TECHNICAL_FAILURE ) @@ -180,7 +180,7 @@ def process_virus_scan_passed(filename): @notify_celery.task(name='process-virus-scan-failed') def process_virus_scan_failed(filename): current_app.logger.exception('Virus scan failed: {}'.format(filename)) - delete_pdf_from_letters_scan_bucket(filename) + move_failed_pdf(filename, ScanErrorType.FAILURE) reference = get_reference_from_filename(filename) updated_count = update_letter_pdf_status(reference, NOTIFICATION_VIRUS_SCAN_FAILED) @@ -192,6 +192,21 @@ def process_virus_scan_failed(filename): ) +@notify_celery.task(name='process-virus-scan-error') +def process_virus_scan_error(filename): + current_app.logger.exception('Virus scan error: {}'.format(filename)) + move_failed_pdf(filename, ScanErrorType.ERROR) + reference = get_reference_from_filename(filename) + updated_count = update_letter_pdf_status(reference, NOTIFICATION_TECHNICAL_FAILURE) + + if updated_count != 1: + raise Exception( + "There should only be one letter notification for each reference. Found {} notifications".format( + updated_count + ) + ) + + def update_letter_pdf_status(reference, status): return dao_update_notifications_by_reference( references=[reference], diff --git a/app/letters/utils.py b/app/letters/utils.py index 16113c69e..4a36230ad 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +from enum import Enum import boto3 from flask import current_app @@ -9,6 +10,11 @@ from app.models import KEY_TYPE_TEST from app.variables import Retention +class ScanErrorType(Enum): + ERROR = 1 + FAILURE = 2 + + LETTERS_PDF_FILE_LOCATION_STRUCTURE = \ '{folder}NOTIFY.{reference}.{duplex}.{letter_class}.{colour}.{crown}.{date}.pdf' @@ -117,6 +123,46 @@ def delete_pdf_from_letters_scan_bucket(filename): current_app.logger.info("Deleted letter PDF: {}/{}".format(bucket_name, filename)) +def move_failed_pdf(filename, scan_error_type): + bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + + s3 = boto3.resource('s3') + copy_source = {'Bucket': bucket_name, 'Key': filename} + + if scan_error_type == ScanErrorType.ERROR: + target_filename = 'ERROR/' + filename + elif scan_error_type == ScanErrorType.FAILURE: + target_filename = 'FAILURE/' + filename + + target_bucket = s3.Bucket(bucket_name) + obj = target_bucket.Object(target_filename) + + # Tags are copied across but the expiration time is reset in the destination bucket + # e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket, + # in the destination bucket the expiration time will be reset to 7 days left to expire + obj.copy(copy_source, ExtraArgs={'ServerSideEncryption': 'AES256'}) + + s3.Object(bucket_name, filename).delete() + + current_app.logger.info("Moved letter PDF: {}/{} to {}/{}".format( + bucket_name, filename, bucket_name, target_filename)) + + +def move_pdf_from_letters_scan_bucket(filename, scan_error_type): + bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + + s3 = boto3.resource('s3') + + if scan_error_type == ScanErrorType.ERROR: + file_path = 'ERROR/' + filename + elif scan_error_type == ScanErrorType.FA: + file_path = 'FAILURE/' + filename + + s3.Object(bucket_name, file_path).delete() + + current_app.logger.info("Moved letter PDF: {}/{} to {}/{}".format(bucket_name, filename, bucket_name, file_path)) + + def get_letter_pdf(notification): is_test_letter = notification.key_type == KEY_TYPE_TEST and notification.template.is_precompiled_letter if is_test_letter: diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index e492540ed..9d07e7239 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -19,8 +19,8 @@ from app.celery.letters_pdf_tasks import ( letter_in_created_state, process_virus_scan_passed, process_virus_scan_failed, -) -from app.letters.utils import get_letter_pdf_filename + process_virus_scan_error) +from app.letters.utils import get_letter_pdf_filename, ScanErrorType from app.models import ( KEY_TYPE_NORMAL, KEY_TYPE_TEST, @@ -28,8 +28,8 @@ from app.models import ( NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, NOTIFICATION_VIRUS_SCAN_FAILED, - NOTIFICATION_SENDING -) + NOTIFICATION_SENDING, + NOTIFICATION_TECHNICAL_FAILURE) from tests.conftest import set_config_values @@ -341,9 +341,20 @@ def test_process_letter_task_check_virus_scan_passed( def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker): filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) sample_letter_notification.status = 'pending-virus-check' - mock_delete_pdf = mocker.patch('app.celery.letters_pdf_tasks.delete_pdf_from_letters_scan_bucket') + mock_move_failed_pdf = mocker.patch('app.celery.letters_pdf_tasks.move_failed_pdf') process_virus_scan_failed(filename) - mock_delete_pdf.assert_called_once_with(filename) + mock_move_failed_pdf.assert_called_once_with(filename, ScanErrorType.FAILURE) assert sample_letter_notification.status == NOTIFICATION_VIRUS_SCAN_FAILED + + +def test_process_letter_task_check_virus_scan_error(sample_letter_notification, mocker): + filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + sample_letter_notification.status = 'pending-virus-check' + mock_move_failed_pdf = mocker.patch('app.celery.letters_pdf_tasks.move_failed_pdf') + + process_virus_scan_error(filename) + + mock_move_failed_pdf.assert_called_once_with(filename, ScanErrorType.ERROR) + assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE diff --git a/tests/app/letters/test_letter_utils.py b/tests/app/letters/test_letter_utils.py index 662862e50..400c545dc 100644 --- a/tests/app/letters/test_letter_utils.py +++ b/tests/app/letters/test_letter_utils.py @@ -11,8 +11,8 @@ from app.letters.utils import ( get_letter_pdf_filename, get_letter_pdf, upload_letter_pdf, - move_scanned_pdf_to_test_or_live_pdf_bucket -) + move_scanned_pdf_to_test_or_live_pdf_bucket, + ScanErrorType, move_failed_pdf) from app.models import KEY_TYPE_NORMAL, KEY_TYPE_TEST, PRECOMPILED_TEMPLATE_NAME from app.variables import Retention @@ -163,3 +163,43 @@ def test_move_scanned_letter_pdf_to_processing_bucket( assert folder_date_name + filename in [o.key for o in target_bucket.objects.all()] assert filename not in [o.key for o in source_bucket.objects.all()] + + +@mock_s3 +@freeze_time(FROZEN_DATE_TIME) +def test_move_failed_pdf_error(notify_api): + filename = 'test.pdf' + source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + target_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + + conn = boto3.resource('s3', region_name='eu-west-1') + source_bucket = conn.create_bucket(Bucket=source_bucket_name) + target_bucket = conn.create_bucket(Bucket=target_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') + + move_failed_pdf(filename, ScanErrorType.ERROR) + + assert 'ERROR/' + filename in [o.key for o in target_bucket.objects.all()] + assert filename not in [o.key for o in source_bucket.objects.all()] + + +@mock_s3 +@freeze_time(FROZEN_DATE_TIME) +def test_move_failed_pdf_scan_failed(notify_api): + filename = 'test.pdf' + source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + target_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] + + conn = boto3.resource('s3', region_name='eu-west-1') + source_bucket = conn.create_bucket(Bucket=source_bucket_name) + target_bucket = conn.create_bucket(Bucket=target_bucket_name) + + s3 = boto3.client('s3', region_name='eu-west-1') + s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') + + move_failed_pdf(filename, ScanErrorType.FAILURE) + + assert 'FAILURE/' + filename in [o.key for o in target_bucket.objects.all()] + assert filename not in [o.key for o in source_bucket.objects.all()] From 598539dcb3a8937b97b533f6cd6276432c73bb17 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Mon, 26 Mar 2018 15:24:21 +0100 Subject: [PATCH 2/6] Update logging for provider tasks. Move the info message before the fetch. Include the exception in the log message. --- app/celery/provider_tasks.py | 8 ++++---- tests/app/celery/test_provider_tasks.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index 2e59541a9..957456fe1 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -22,15 +22,15 @@ def worker_process_shutdown(sender, signal, pid, exitcode): @statsd(namespace="tasks") def deliver_sms(self, notification_id): try: + current_app.logger.info("Start sending SMS for notification id: {}".format(notification_id)) notification = notifications_dao.get_notification_by_id(notification_id) if not notification: raise NoResultFound() - current_app.logger.info("Start sending SMS for notification id: {}".format(notification_id)) send_to_providers.send_sms_to_provider(notification) except Exception as e: try: current_app.logger.exception( - "SMS notification delivery for id: {} failed".format(notification_id) + "SMS notification delivery for id: {} failed".format(notification_id), e ) self.retry(queue=QueueNames.RETRY) except self.MaxRetriesExceededError: @@ -44,10 +44,10 @@ def deliver_sms(self, notification_id): @statsd(namespace="tasks") def deliver_email(self, notification_id): try: + current_app.logger.info("Start sending email for notification id: {}".format(notification_id)) notification = notifications_dao.get_notification_by_id(notification_id) if not notification: raise NoResultFound() - current_app.logger.info("Start sending email for notification id: {}".format(notification_id)) send_to_providers.send_email_to_provider(notification) except InvalidEmailError as e: current_app.logger.exception(e) @@ -55,7 +55,7 @@ def deliver_email(self, notification_id): except Exception as e: try: current_app.logger.exception( - "RETRY: Email notification {} failed".format(notification_id) + "RETRY: Email notification {} failed".format(notification_id), e ) self.retry(queue=QueueNames.RETRY) except self.MaxRetriesExceededError: diff --git a/tests/app/celery/test_provider_tasks.py b/tests/app/celery/test_provider_tasks.py index 79cf03021..c0dfbdc63 100644 --- a/tests/app/celery/test_provider_tasks.py +++ b/tests/app/celery/test_provider_tasks.py @@ -1,10 +1,12 @@ import pytest +from botocore.exceptions import ClientError from celery.exceptions import MaxRetriesExceededError from notifications_utils.recipients import InvalidEmailError import app from app.celery import provider_tasks from app.celery.provider_tasks import deliver_sms, deliver_email +from app.clients.email.aws_ses import AwsSesClientException from app.exceptions import NotificationTechnicalFailureException @@ -92,6 +94,24 @@ def test_should_technical_error_and_not_retry_if_invalid_email(sample_notificati assert sample_notification.status == 'technical-failure' +def test_should_retry_and_log_exception(sample_notification, mocker): + error_response = { + 'Error': { + 'Code': 'SomeError', + 'Message': 'some error message from amazon', + 'Type': 'Sender' + } + } + ex = ClientError(error_response=error_response, operation_name='opname') + mocker.patch('app.delivery.send_to_providers.send_email_to_provider', side_effect=AwsSesClientException(str(ex))) + mocker.patch('app.celery.provider_tasks.deliver_email.retry') + + deliver_email(sample_notification.id) + + assert provider_tasks.deliver_email.retry.called is True + assert sample_notification.status == 'created' + + def test_send_sms_should_switch_providers_on_provider_failure(sample_notification, mocker): provider_to_use = mocker.patch('app.delivery.send_to_providers.provider_to_use') provider_to_use.return_value.send_sms.side_effect = Exception('Error') From c75458cee9d5d6782af12b769aeff1cfb3af276f Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Mon, 26 Mar 2018 16:44:29 +0100 Subject: [PATCH 3/6] Revert change to exception log. --- app/celery/provider_tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index 957456fe1..9a543ea23 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -30,7 +30,7 @@ def deliver_sms(self, notification_id): except Exception as e: try: current_app.logger.exception( - "SMS notification delivery for id: {} failed".format(notification_id), e + "SMS notification delivery for id: {} failed".format(notification_id) ) self.retry(queue=QueueNames.RETRY) except self.MaxRetriesExceededError: @@ -55,7 +55,7 @@ def deliver_email(self, notification_id): except Exception as e: try: current_app.logger.exception( - "RETRY: Email notification {} failed".format(notification_id), e + "RETRY: Email notification {} failed".format(notification_id) ) self.retry(queue=QueueNames.RETRY) except self.MaxRetriesExceededError: From 3299055a09082f3002dd220196f27beea110b2ca Mon Sep 17 00:00:00 2001 From: Richard Chapman Date: Tue, 27 Mar 2018 10:32:40 +0100 Subject: [PATCH 4/6] Refactored the shared code between move_scanned_pdf_to_test_or_live_pdf_bucket and move_failed_pdf to consolidate some code so it is easier to maintain in future as so that _move_s3_object can be used for any new methods. --- app/letters/utils.py | 85 ++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 59 deletions(-) diff --git a/app/letters/utils.py b/app/letters/utils.py index 4a36230ad..37d1145a2 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -91,76 +91,25 @@ def upload_letter_pdf(notification, pdf_data): return upload_file_name -def move_scanned_pdf_to_test_or_live_pdf_bucket(filename, is_test_letter=False): +def move_scanned_pdf_to_test_or_live_pdf_bucket(source_filename, is_test_letter=False): source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME' target_bucket_name = current_app.config[target_bucket_config] - s3 = boto3.resource('s3') - copy_source = {'Bucket': source_bucket_name, 'Key': filename} - target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + filename + target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + source_filename - target_bucket = s3.Bucket(target_bucket_name) - obj = target_bucket.Object(target_filename) - - # Tags are copied across but the expiration time is reset in the destination bucket - # e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket, - # in the destination bucket the expiration time will be reset to 7 days left to expire - obj.copy(copy_source, ExtraArgs={'ServerSideEncryption': 'AES256'}) - - s3.Object(source_bucket_name, filename).delete() - - current_app.logger.info("Moved letter PDF: {}/{} to {}/{}".format( - source_bucket_name, filename, target_bucket_name, target_filename)) + _move_s3_object(source_bucket_name, source_filename, target_bucket_name, target_filename) -def delete_pdf_from_letters_scan_bucket(filename): - bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] - - s3 = boto3.resource('s3') - s3.Object(bucket_name, filename).delete() - - current_app.logger.info("Deleted letter PDF: {}/{}".format(bucket_name, filename)) - - -def move_failed_pdf(filename, scan_error_type): - bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] - - s3 = boto3.resource('s3') - copy_source = {'Bucket': bucket_name, 'Key': filename} +def move_failed_pdf(source_filename, scan_error_type): + scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME'] if scan_error_type == ScanErrorType.ERROR: - target_filename = 'ERROR/' + filename + target_filename = 'ERROR/' + source_filename elif scan_error_type == ScanErrorType.FAILURE: - target_filename = 'FAILURE/' + filename + target_filename = 'FAILURE/' + source_filename - target_bucket = s3.Bucket(bucket_name) - obj = target_bucket.Object(target_filename) - - # Tags are copied across but the expiration time is reset in the destination bucket - # e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket, - # in the destination bucket the expiration time will be reset to 7 days left to expire - obj.copy(copy_source, ExtraArgs={'ServerSideEncryption': 'AES256'}) - - s3.Object(bucket_name, filename).delete() - - current_app.logger.info("Moved letter PDF: {}/{} to {}/{}".format( - bucket_name, filename, bucket_name, target_filename)) - - -def move_pdf_from_letters_scan_bucket(filename, scan_error_type): - bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] - - s3 = boto3.resource('s3') - - if scan_error_type == ScanErrorType.ERROR: - file_path = 'ERROR/' + filename - elif scan_error_type == ScanErrorType.FA: - file_path = 'FAILURE/' + filename - - s3.Object(bucket_name, file_path).delete() - - current_app.logger.info("Moved letter PDF: {}/{} to {}/{}".format(bucket_name, filename, bucket_name, file_path)) + _move_s3_object(scan_bucket, source_filename, scan_bucket, target_filename) def get_letter_pdf(notification): @@ -181,3 +130,21 @@ def get_letter_pdf(notification): file_content = obj.get()["Body"].read() return file_content + + +def _move_s3_object(source_bucket, source_filename, target_bucket, target_filename): + s3 = boto3.resource('s3') + copy_source = {'Bucket': source_bucket, 'Key': source_filename} + + target_bucket = s3.Bucket(target_bucket) + obj = target_bucket.Object(target_filename) + + # Tags are copied across but the expiration time is reset in the destination bucket + # e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket, + # in the destination bucket the expiration time will be reset to 7 days left to expire + obj.copy(copy_source, ExtraArgs={'ServerSideEncryption': 'AES256'}) + + s3.Object(source_bucket, source_filename).delete() + + current_app.logger.info("Moved letter PDF: {}/{} to {}/{}".format( + source_bucket, source_filename, target_bucket, target_filename)) From 023862dfdcdae803c7982b66a78fd9c10904b689 Mon Sep 17 00:00:00 2001 From: Richard Chapman Date: Tue, 27 Mar 2018 13:32:46 +0100 Subject: [PATCH 5/6] Refactored code to be more Pythonic and make the code more readable --- app/letters/utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/app/letters/utils.py b/app/letters/utils.py index 37d1145a2..4fa041491 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -104,10 +104,7 @@ def move_scanned_pdf_to_test_or_live_pdf_bucket(source_filename, is_test_letter= def move_failed_pdf(source_filename, scan_error_type): scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME'] - if scan_error_type == ScanErrorType.ERROR: - target_filename = 'ERROR/' + source_filename - elif scan_error_type == ScanErrorType.FAILURE: - target_filename = 'FAILURE/' + source_filename + target_filename = ('ERROR/' if scan_error_type == ScanErrorType.ERROR else 'FAILURE/') + source_filename _move_s3_object(scan_bucket, source_filename, scan_bucket, target_filename) From eebd754a581ccf0245ddcc0a97d55a45bf57c205 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Tue, 27 Mar 2018 14:44:41 +0100 Subject: [PATCH 6/6] =?UTF-8?q?Don=E2=80=99t=20show=20HTML=20entities=20in?= =?UTF-8?q?=20plain=20text=20emails?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brings in: - [ ] https://github.com/alphagov/notifications-utils/pull/427 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0a7da8868..24316ae9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,6 @@ notifications-python-client==4.8.1 # PaaS awscli-cwlogs>=1.4,<1.5 -git+https://github.com/alphagov/notifications-utils.git@25.2.1#egg=notifications-utils==25.2.1 +git+https://github.com/alphagov/notifications-utils.git@25.2.2#egg=notifications-utils==25.2.2 git+https://github.com/alphagov/boto.git@2.43.0-patch3#egg=boto==2.43.0-patch3