From 001f84b0b865cafc7c9db4d1c039f8cac9aaa5ab Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Tue, 21 Aug 2018 18:02:17 +0100 Subject: [PATCH] Sanitise PDFs after virus scan Notify antivirus, on success, calls the process_virus_scan_passed taks. Previously, this task would: * update status to created (or delivered for test keys) * copy the file from the scan bucket to either the live or test bucket based on the results * delete the old file in the scan bucket We want it to: * download file from scan bucket * sanitise PDF using new template-preview functionality * if sanitise failed, set to new status "validation-failed" and save the pdf somewhere. * send new pdf to live/test bucket * update status to created (or delivered for test keys) * delete the original file in the scan bucket This PR does some of that: * download file from scan bucket * sanitise PDF using new template-preview functionality * if sanitise failed, just log. * send OLD pdf to live/test bucket * update status to created (or delivered for test keys) * delete the original file in the scan bucket So if sanitising fails, we won't fall over and not deliver the letter, we'll just log a message for now. If sanitise throws an unexpected error (as opposed to a 400), we'll retry up to fifteen times (the same as when creating a new letter). I've added the code for using the sanitised pdf, but it's commented out for now --- app/celery/letters_pdf_tasks.py | 81 +++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index d81ecb398..17a557794 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -9,6 +9,7 @@ from requests import ( ) from notifications_utils.statsd_decorators import statsd +from notifications_utils.s3 import s3upload from app import notify_celery from app.aws import s3 @@ -24,9 +25,11 @@ from app.dao.notifications_dao import ( from app.errors import VirusScanError from app.letters.utils import ( get_reference_from_filename, - move_scanned_pdf_to_test_or_live_pdf_bucket, + get_folder_name, upload_letter_pdf, - move_failed_pdf, ScanErrorType, move_error_pdf_to_scan_bucket, + move_failed_pdf, + ScanErrorType, + move_error_pdf_to_scan_bucket, get_file_names_from_error_bucket ) from app.models import ( @@ -34,7 +37,8 @@ from app.models import ( NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, NOTIFICATION_VIRUS_SCAN_FAILED, - NOTIFICATION_TECHNICAL_FAILURE + NOTIFICATION_TECHNICAL_FAILURE, + # NOTIFICATION_VALIDATION_FAILED ) @@ -163,22 +167,83 @@ def letter_in_created_state(filename): return False -@notify_celery.task(name='process-virus-scan-passed') -def process_virus_scan_passed(filename): +@notify_celery.task(bind=True, name='process-virus-scan-passed', max_retries=15, default_retry_delay=300) +def process_virus_scan_passed(self, filename): reference = get_reference_from_filename(filename) notification = dao_get_notification_by_reference(reference) current_app.logger.info('notification id {} Virus scan passed: {}'.format(notification.id, filename)) is_test_key = notification.key_type == KEY_TYPE_TEST - move_scanned_pdf_to_test_or_live_pdf_bucket( + + scan_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename) + old_pdf = scan_pdf_object.get()['Body'].read() + + new_pdf = _sanitise_precomiled_pdf(self, notification, old_pdf) + + if not new_pdf: + current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename)) + # update_notification_status_by_id(notification.id, NOTIFICATION_VALIDATION_FAILED) + # move_scan_to_invalid_pdf_bucket() # TODO: implement this (and create bucket etc) + # scan_pdf_object.delete() + # return + + current_app.logger.info('notification id {} ({}) sanitised and ready to send'.format(notification.id, filename)) + + # temporarily upload original pdf while testing sanitise flow. + _upload_pdf_to_test_or_live_pdf_bucket( + old_pdf, # TODO: change to new_pdf filename, - is_test_letter=is_test_key - ) + is_test_letter=is_test_key) + update_letter_pdf_status( reference, NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED ) + scan_pdf_object.delete() + + +def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter): + target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME' + target_bucket_name = current_app.config[target_bucket_config] + target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + filename + + s3upload( + filedata=pdf_data, + region=current_app.config['AWS_REGION'], + bucket_name=target_bucket_name, + file_location=target_filename + ) + + +def _sanitise_precomiled_pdf(self, notification, precompiled_pdf): + try: + resp = requests_post( + '{}/precompiled/sanitise'.format( + current_app.config['TEMPLATE_PREVIEW_API_HOST'] + ), + data=precompiled_pdf, + headers={'Authorization': 'Token {}'.format(current_app.config['TEMPLATE_PREVIEW_API_KEY'])} + ) + resp.raise_for_status() + return resp.content + except RequestException as ex: + if ex.status_code == 400: + # validation error + return None + + try: + current_app.logger.exception( + "sanitise_precomiled_pdf failed for notification: {}".format(notification.id) + ) + self.retry(queue=QueueNames.RETRY) + except self.MaxRetriesExceededError: + current_app.logger.exception( + "RETRY FAILED: sanitise_precomiled_pdf failed for notification {}".format(notification.id), + ) + update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) + raise + @notify_celery.task(name='process-virus-scan-failed') def process_virus_scan_failed(filename):