From 0a617379c41f67046ff61f4a96fbf07ba1aaa498 Mon Sep 17 00:00:00 2001 From: Pea Tyczynska Date: Fri, 11 Oct 2019 12:24:53 +0100 Subject: [PATCH 1/4] Put pdf letter validation failed message in S3 metadata So it can be used to tell the user why the letter has failed validation --- app/celery/letters_pdf_tasks.py | 39 +++++++---- app/letters/utils.py | 28 ++++++-- tests/app/celery/test_letters_pdf_tasks.py | 79 +++++++++++++--------- 3 files changed, 93 insertions(+), 53 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 434f2fdb7..ebe541cd9 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -49,7 +49,6 @@ from app.models import ( NOTIFICATION_VIRUS_SCAN_FAILED, ) from app.cronitor import cronitor -from json import JSONDecodeError @notify_celery.task(bind=True, name="create-letters-pdf", max_retries=15, default_retry_delay=300) @@ -214,18 +213,16 @@ def process_virus_scan_passed(self, filename): billable_units = get_page_count(old_pdf) except PdfReadError: current_app.logger.exception(msg='Invalid PDF received for notification_id: {}'.format(notification.id)) - _move_invalid_letter_and_update_status(notification, filename, scan_pdf_object) + _move_invalid_letter_and_update_status( + notification=notification, filename=filename, scan_pdf_object=scan_pdf_object + ) return sanitise_response = _sanitise_precompiled_pdf(self, notification, old_pdf) - if not sanitise_response: + if sanitise_response["message"]: # is response without message attribute possible now? I think not? new_pdf = None else: - sanitise_response = sanitise_response.json() - try: - new_pdf = base64.b64decode(sanitise_response["file"].encode()) - except JSONDecodeError: - new_pdf = sanitise_response.content + new_pdf = base64.b64decode(sanitise_response["file"].encode()) redaction_failed_message = sanitise_response.get("redaction_failed_message") if redaction_failed_message and not is_test_key: @@ -241,7 +238,14 @@ def process_virus_scan_passed(self, filename): if not new_pdf: current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename)) - _move_invalid_letter_and_update_status(notification, filename, scan_pdf_object) + _move_invalid_letter_and_update_status( + notification=notification, + filename=filename, + scan_pdf_object=scan_pdf_object, + message=sanitise_response["message"], + invalid_pages=sanitise_response.get("invalid_pages"), + page_count=sanitise_response.get("page_count") + ) return else: current_app.logger.info( @@ -270,9 +274,16 @@ def process_virus_scan_passed(self, filename): update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) -def _move_invalid_letter_and_update_status(notification, filename, scan_pdf_object): +def _move_invalid_letter_and_update_status( + *, notification, filename, scan_pdf_object, message=None, invalid_pages=None, page_count=None +): try: - move_scan_to_invalid_pdf_bucket(filename) + move_scan_to_invalid_pdf_bucket( + source_filename=filename, + message=message, + invalid_pages=invalid_pages, + page_count=page_count + ) scan_pdf_object.delete() update_letter_pdf_status( @@ -311,17 +322,19 @@ def _sanitise_precompiled_pdf(self, notification, precompiled_pdf): 'Notification-ID': str(notification.id)} ) response.raise_for_status() - return response + return response.json() except RequestException as ex: if ex.response is not None and ex.response.status_code == 400: message = "sanitise_precompiled_pdf validation error for notification: {}. ".format(notification.id) if "message" in response.json(): message += response.json()["message"] + if "invalid_pages" in response.json(): + message += (" on pages: " + ", ".join(map(str, response.json()["invalid_pages"]))) current_app.logger.info( message ) - return None + return response.json() try: current_app.logger.exception( diff --git a/app/letters/utils.py b/app/letters/utils.py index 98e954fb5..f67ba00d1 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -127,10 +127,21 @@ def move_error_pdf_to_scan_bucket(source_filename): _move_s3_object(scan_bucket, error_file, scan_bucket, source_filename) -def move_scan_to_invalid_pdf_bucket(source_filename): - scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME'] - invalid_pdf_bucket = current_app.config['INVALID_PDF_BUCKET_NAME'] - _move_s3_object(scan_bucket, source_filename, invalid_pdf_bucket, source_filename) +def move_scan_to_invalid_pdf_bucket(source_filename, message=None, invalid_pages=None, page_count=None): + metadata = {} + if message: + metadata["validation_failed_message"] = message + if invalid_pages: + metadata["invalid_pages"] = [str(p) for p in invalid_pages] + if page_count: + metadata["page_count"] = str(page_count) + _move_s3_object( + source_bucket=current_app.config['LETTERS_SCAN_BUCKET_NAME'], + source_filename=source_filename, + target_bucket=current_app.config['INVALID_PDF_BUCKET_NAME'], + target_filename=source_filename, + metadata=metadata + ) def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename): @@ -138,7 +149,7 @@ def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename): source_bucket=current_app.config['TRANSIENT_UPLOADED_LETTERS'], source_filename=source_filename, target_bucket=current_app.config['LETTERS_PDF_BUCKET_NAME'], - target_filename=upload_filename + target_filename=upload_filename, ) @@ -164,7 +175,7 @@ def get_letter_pdf(notification): return obj.get()["Body"].read() -def _move_s3_object(source_bucket, source_filename, target_bucket, target_filename): +def _move_s3_object(source_bucket, source_filename, target_bucket, target_filename, metadata=None): s3 = boto3.resource('s3') copy_source = {'Bucket': source_bucket, 'Key': source_filename} @@ -174,7 +185,10 @@ def _move_s3_object(source_bucket, source_filename, target_bucket, target_filena # Tags are copied across but the expiration time is reset in the destination bucket # e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket, # in the destination bucket the expiration time will be reset to 7 days left to expire - obj.copy(copy_source, ExtraArgs={'ServerSideEncryption': 'AES256'}) + put_args = {'ServerSideEncryption': 'AES256'} + if metadata: + metadata = put_args['Metadata'] = metadata + obj.copy(copy_source, ExtraArgs=put_args) s3.Object(source_bucket, source_filename).delete() diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 441e3bffc..0bf87bf5b 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -436,10 +436,9 @@ def test_process_letter_task_check_virus_scan_passed( json={ "file": base64.b64encode(b"new_pdf").decode("utf-8"), "validation_passed": True, - "errors": { - "content_outside_of_printable_area": [], - "document_not_a4_size_portrait_orientation": [], - } + "message": "", + "invalid_pages": [], + "page_count": 1 }, status_code=200 ) @@ -479,7 +478,16 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK sample_letter_notification.key_type = key_type mock_move_s3 = mocker.patch('app.letters.utils._move_s3_object') - mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precompiled_pdf', return_value=None) + sanitise_response = { + "file": base64.b64encode(b"nyan").decode("utf-8"), + "validation_passed": False, + "message": "content-outside-printable-area", + "invalid_pages": [1], + "page_count": 1 + } + mock_sanitise = mocker.patch( + 'app.celery.letters_pdf_tasks._sanitise_precompiled_pdf', return_value=sanitise_response + ) mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks.get_page_count', return_value=2) process_virus_scan_passed(filename) @@ -492,8 +500,12 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( b'pdf_content' ) mock_move_s3.assert_called_once_with( - source_bucket_name, filename, - target_bucket_name, filename + source_bucket=source_bucket_name, source_filename=filename, + target_bucket=target_bucket_name, target_filename=filename, metadata={ + "validation_failed_message": "content-outside-printable-area", + "invalid_pages": ["1"], + "page_count": "1" + } ) mock_get_page_count.assert_called_once_with(b'pdf_content') @@ -533,10 +545,9 @@ def test_process_letter_task_check_virus_scan_passed_when_redaction_fails( "file": base64.b64encode(b"new_pdf").decode("utf-8"), "validation_passed": True, "redaction_failed_message": "No matches for address block during redaction procedure", - "errors": { - "content_outside_of_printable_area": [], - "document_not_a4_size_portrait_orientation": [] - } + "message": "", + "invalid_pages": "", + "page_count": 2 }, status_code=200 ) @@ -582,8 +593,8 @@ def test_process_letter_task_check_virus_scan_passed_when_file_cannot_be_opened( mock_sanitise.assert_not_called() mock_get_page_count.assert_called_once_with(b'pdf_content') mock_move_s3.assert_called_once_with( - source_bucket_name, filename, - target_bucket_name, filename + source_bucket=source_bucket_name, source_filename=filename, + target_bucket=target_bucket_name, target_filename=filename, metadata={} ) assert sample_letter_notification.status == NOTIFICATION_VALIDATION_FAILED assert sample_letter_notification.billable_units == 0 @@ -626,10 +637,9 @@ def test_process_virus_scan_passed_logs_error_and_sets_tech_failure_if_s3_error_ json={ "file": base64.b64encode(b"new_pdf").decode("utf-8"), "validation_passed": True, - "errors": { - "content_outside_of_printable_area": [], - "document_not_a4_size_portrait_orientation": [], - } + "message": "", + "invalid_pages": [], + "page_count": 1 }, status_code=200 ) @@ -656,7 +666,11 @@ def test_move_invalid_letter_and_update_status_logs_error_and_sets_tech_failure_ side_effect=ClientError(error_response, 'operation_name')) mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception') - _move_invalid_letter_and_update_status(sample_letter_notification, 'filename', mocker.Mock()) + _move_invalid_letter_and_update_status( + notification=sample_letter_notification, + filename='filename', + scan_pdf_object=mocker.Mock() + ) assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE mock_logger.assert_called_once_with( @@ -720,10 +734,9 @@ def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, samp json={ "file": base64.b64encode(b"new_pdf").decode("utf-8"), "validation_passed": True, - "errors": { - "content_outside_of_printable_area": [], - "document_not_a4_size_portrait_orientation": [], - } + "message": "", + "invalid_pages": [], + "page_count": 1 }, status_code=200 ) @@ -732,26 +745,26 @@ def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, samp assert rmock.called assert rmock.request_history[0].url == endpoint - assert base64.b64decode(response.json()["file"].encode()) == b"new_pdf" + assert base64.b64decode(response["file"].encode()) == b"new_pdf" assert rmock.last_request.text == 'old_pdf' -def test_sanitise_precompiled_pdf_returns_none_on_validation_error(rmock, sample_letter_notification): +def test_sanitise_precompiled_pdf_return_validation_error(rmock, sample_letter_notification): sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK endpoint = 'http://localhost:9999/precompiled/sanitise' + response_json = { + "file": base64.b64encode(b"nyan").decode("utf-8"), + "validation_passed": False, + "message": "content-outside-printable-area", + "invalid_pages": [1], + "page_count": 1 + } with requests_mock.mock() as rmock: rmock.request( "POST", endpoint, - json={ - "file": base64.b64encode(b"nyan").decode("utf-8"), - "validation_passed": False, - "errors": { - "content_outside_of_printable_area": [1], - "document_not_a4_size_portrait_orientation": [], - } - }, + json=response_json, status_code=400 ) mock_celery = Mock(**{'retry.side_effect': Retry}) @@ -759,7 +772,7 @@ def test_sanitise_precompiled_pdf_returns_none_on_validation_error(rmock, sample assert rmock.called assert rmock.request_history[0].url == endpoint - assert response is None + assert response == response_json def test_sanitise_precompiled_pdf_passes_the_service_id_and_notification_id_to_template_preview( From 0b65e75fe974ad3d75720be59fea3007dfecfd81 Mon Sep 17 00:00:00 2001 From: Pea Tyczynska Date: Mon, 14 Oct 2019 10:41:37 +0100 Subject: [PATCH 2/4] Format metadata correctly and use MetadataDirective to put new metadata in S3 object --- app/letters/utils.py | 6 ++++-- tests/app/celery/test_letters_pdf_tasks.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/app/letters/utils.py b/app/letters/utils.py index f67ba00d1..916274e2f 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -132,9 +132,10 @@ def move_scan_to_invalid_pdf_bucket(source_filename, message=None, invalid_pages if message: metadata["validation_failed_message"] = message if invalid_pages: - metadata["invalid_pages"] = [str(p) for p in invalid_pages] + metadata["invalid_pages"] = "-".join(map(str, invalid_pages)) if page_count: metadata["page_count"] = str(page_count) + _move_s3_object( source_bucket=current_app.config['LETTERS_SCAN_BUCKET_NAME'], source_filename=source_filename, @@ -187,7 +188,8 @@ def _move_s3_object(source_bucket, source_filename, target_bucket, target_filena # in the destination bucket the expiration time will be reset to 7 days left to expire put_args = {'ServerSideEncryption': 'AES256'} if metadata: - metadata = put_args['Metadata'] = metadata + put_args['Metadata'] = metadata + put_args["MetadataDirective"] = "REPLACE" obj.copy(copy_source, ExtraArgs=put_args) s3.Object(source_bucket, source_filename).delete() diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 0bf87bf5b..41bc7bb69 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -482,7 +482,7 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( "file": base64.b64encode(b"nyan").decode("utf-8"), "validation_passed": False, "message": "content-outside-printable-area", - "invalid_pages": [1], + "invalid_pages": [1, 2], "page_count": 1 } mock_sanitise = mocker.patch( @@ -503,7 +503,7 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( source_bucket=source_bucket_name, source_filename=filename, target_bucket=target_bucket_name, target_filename=filename, metadata={ "validation_failed_message": "content-outside-printable-area", - "invalid_pages": ["1"], + "invalid_pages": "1-2", "page_count": "1" } ) From 6ee7ac6cac3f9de6366577e98f653ae398efd9f9 Mon Sep 17 00:00:00 2001 From: Pea Tyczynska Date: Wed, 16 Oct 2019 13:35:30 +0100 Subject: [PATCH 3/4] Refactor and harmonise metadata for invalid letters with those sent from admin app --- app/celery/letters_pdf_tasks.py | 28 +++++-------- app/letters/utils.py | 21 ++++++---- app/service/send_notification.py | 4 +- tests/app/celery/test_letters_pdf_tasks.py | 46 +++++++++++----------- 4 files changed, 51 insertions(+), 48 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index ebe541cd9..366a9f954 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -5,7 +5,6 @@ from uuid import UUID from hashlib import sha512 from base64 import urlsafe_b64encode -from PyPDF2.utils import PdfReadError from botocore.exceptions import ClientError as BotoClientError from flask import current_app from requests import ( @@ -30,6 +29,7 @@ from app.dao.notifications_dao import ( from app.errors import VirusScanError from app.letters.utils import ( copy_redaction_failed_pdf, + get_billable_units_for_letter_page_count, get_reference_from_filename, get_folder_name, upload_letter_pdf, @@ -38,7 +38,6 @@ from app.letters.utils import ( move_scan_to_invalid_pdf_bucket, move_error_pdf_to_scan_bucket, get_file_names_from_error_bucket, - get_page_count, ) from app.models import ( KEY_TYPE_TEST, @@ -209,17 +208,8 @@ def process_virus_scan_passed(self, filename): scan_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename) old_pdf = scan_pdf_object.get()['Body'].read() - try: - billable_units = get_page_count(old_pdf) - except PdfReadError: - current_app.logger.exception(msg='Invalid PDF received for notification_id: {}'.format(notification.id)) - _move_invalid_letter_and_update_status( - notification=notification, filename=filename, scan_pdf_object=scan_pdf_object - ) - return - - sanitise_response = _sanitise_precompiled_pdf(self, notification, old_pdf) - if sanitise_response["message"]: # is response without message attribute possible now? I think not? + sanitise_response, result = _sanitise_precompiled_pdf(self, notification, old_pdf) + if result == "validation_failed": new_pdf = None else: new_pdf = base64.b64decode(sanitise_response["file"].encode()) @@ -231,6 +221,8 @@ def process_virus_scan_passed(self, filename): ) copy_redaction_failed_pdf(filename) + billable_units = get_billable_units_for_letter_page_count(sanitise_response.get("page_count")) + # TODO: Remove this once CYSP update their template to not cross over the margins if notification.service_id == UUID('fe44178f-3b45-4625-9f85-2264a36dd9ec'): # CYSP # Check your state pension submit letters with good addresses and notify tags, so just use their supplied pdf @@ -322,19 +314,19 @@ def _sanitise_precompiled_pdf(self, notification, precompiled_pdf): 'Notification-ID': str(notification.id)} ) response.raise_for_status() - return response.json() + return response.json(), "validation_passed" except RequestException as ex: if ex.response is not None and ex.response.status_code == 400: message = "sanitise_precompiled_pdf validation error for notification: {}. ".format(notification.id) - if "message" in response.json(): + if response.json().get("message"): message += response.json()["message"] - if "invalid_pages" in response.json(): - message += (" on pages: " + ", ".join(map(str, response.json()["invalid_pages"]))) + if response.json().get("invalid_pages"): + message += (" on pages: " + ", ".join(map(str, response.json()["invalid_pages"]))) current_app.logger.info( message ) - return response.json() + return response.json(), "validation_failed" try: current_app.logger.exception( diff --git a/app/letters/utils.py b/app/letters/utils.py index 916274e2f..4aa4ee5c7 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -1,9 +1,13 @@ +import boto3 import io +import json import math + +from app.models import KEY_TYPE_TEST, SECOND_CLASS, RESOLVE_POSTAGE_FOR_FILE_NAME, NOTIFICATION_VALIDATION_FAILED + from datetime import datetime, timedelta from enum import Enum -import boto3 from flask import current_app from notifications_utils.letter_timings import LETTER_PROCESSING_DEADLINE @@ -11,8 +15,6 @@ from notifications_utils.pdf import pdf_page_count from notifications_utils.s3 import s3upload from notifications_utils.timezones import convert_utc_to_bst -from app.models import KEY_TYPE_TEST, SECOND_CLASS, RESOLVE_POSTAGE_FOR_FILE_NAME, NOTIFICATION_VALIDATION_FAILED - class ScanErrorType(Enum): ERROR = 1 @@ -130,9 +132,9 @@ def move_error_pdf_to_scan_bucket(source_filename): def move_scan_to_invalid_pdf_bucket(source_filename, message=None, invalid_pages=None, page_count=None): metadata = {} if message: - metadata["validation_failed_message"] = message + metadata["message"] = message if invalid_pages: - metadata["invalid_pages"] = "-".join(map(str, invalid_pages)) + metadata["invalid_pages"] = json.dumps(invalid_pages) if page_count: metadata["page_count"] = str(page_count) @@ -228,7 +230,12 @@ def letter_print_day(created_at): def get_page_count(pdf): - pages = pdf_page_count(io.BytesIO(pdf)) + return pdf_page_count(io.BytesIO(pdf)) + + +def get_billable_units_for_letter_page_count(page_count): + if not page_count: + return 0 pages_per_sheet = 2 - billable_units = math.ceil(pages / pages_per_sheet) + billable_units = math.ceil(page_count / pages_per_sheet) return billable_units diff --git a/app/service/send_notification.py b/app/service/send_notification.py index 00c78936b..4589cff40 100644 --- a/app/service/send_notification.py +++ b/app/service/send_notification.py @@ -30,6 +30,7 @@ from app.dao.services_dao import dao_fetch_service_by_id from app.dao.templates_dao import dao_get_template_by_id_and_service_id, get_precompiled_letter_template from app.dao.users_dao import get_user_by_id from app.letters.utils import ( + get_billable_units_for_letter_page_count, get_letter_pdf_filename, get_page_count, move_uploaded_pdf_to_letters_bucket, @@ -152,7 +153,8 @@ def send_pdf_letter_notification(service_id, post_data): raise e # Getting the page count won't raise an error since admin has already checked the PDF is valid - billable_units = get_page_count(letter.read()) + page_count = get_page_count(letter.read()) + billable_units = get_billable_units_for_letter_page_count(page_count) personalisation = { 'address_line_1': post_data['filename'] diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 41bc7bb69..38e61deef 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -2,7 +2,6 @@ from unittest.mock import Mock, call, ANY import base64 import boto3 -from PyPDF2.utils import PdfReadError from moto import mock_s3 from flask import current_app from freezegun import freeze_time @@ -426,7 +425,6 @@ def test_process_letter_task_check_virus_scan_passed( s3 = boto3.client('s3', region_name='eu-west-1') s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'old_pdf') - mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks.get_page_count', return_value=1) mock_s3upload = mocker.patch('app.celery.letters_pdf_tasks.s3upload') endpoint = 'http://localhost:9999/precompiled/sanitise' with requests_mock.mock() as rmock: @@ -455,7 +453,6 @@ def test_process_letter_task_check_virus_scan_passed( file_location=destination_folder + filename, region='eu-west-1', ) - mock_get_page_count.assert_called_once_with(b'old_pdf') @freeze_time('2018-01-01 18:00') @@ -486,9 +483,8 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( "page_count": 1 } mock_sanitise = mocker.patch( - 'app.celery.letters_pdf_tasks._sanitise_precompiled_pdf', return_value=sanitise_response + 'app.celery.letters_pdf_tasks._sanitise_precompiled_pdf', return_value=(sanitise_response, "validation_failed") ) - mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks.get_page_count', return_value=2) process_virus_scan_passed(filename) @@ -502,14 +498,12 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( mock_move_s3.assert_called_once_with( source_bucket=source_bucket_name, source_filename=filename, target_bucket=target_bucket_name, target_filename=filename, metadata={ - "validation_failed_message": "content-outside-printable-area", - "invalid_pages": "1-2", + "message": "content-outside-printable-area", + "invalid_pages": "[1, 2]", "page_count": "1" } ) - mock_get_page_count.assert_called_once_with(b'pdf_content') - @freeze_time('2018-01-01 18:00') @mock_s3 @@ -534,7 +528,6 @@ def test_process_letter_task_check_virus_scan_passed_when_redaction_fails( sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK sample_letter_notification.key_type = key_type mock_copy_s3 = mocker.patch('app.letters.utils._copy_s3_object') - mocker.patch('app.celery.letters_pdf_tasks.get_page_count', return_value=2) endpoint = 'http://localhost:9999/precompiled/sanitise' with requests_mock.mock() as rmock: @@ -547,7 +540,7 @@ def test_process_letter_task_check_virus_scan_passed_when_redaction_fails( "redaction_failed_message": "No matches for address block during redaction procedure", "message": "", "invalid_pages": "", - "page_count": 2 + "page_count": 3 }, status_code=200 ) @@ -585,16 +578,25 @@ def test_process_letter_task_check_virus_scan_passed_when_file_cannot_be_opened( sample_letter_notification.key_type = key_type mock_move_s3 = mocker.patch('app.letters.utils._move_s3_object') - mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks.get_page_count', side_effect=PdfReadError) - mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precompiled_pdf') + endpoint = 'http://localhost:9999/precompiled/sanitise' + with requests_mock.mock() as rmock: + rmock.request( + "POST", + endpoint, + json={ + "page_count": None, + "recipient_address": None, + "message": 'unable-to-read-the-file', + "invalid_pages": None, + "file": None + }, + status_code=400 + ) + process_virus_scan_passed(filename) - process_virus_scan_passed(filename) - - mock_sanitise.assert_not_called() - mock_get_page_count.assert_called_once_with(b'pdf_content') mock_move_s3.assert_called_once_with( source_bucket=source_bucket_name, source_filename=filename, - target_bucket=target_bucket_name, target_filename=filename, metadata={} + target_bucket=target_bucket_name, target_filename=filename, metadata={'message': 'unable-to-read-the-file'} ) assert sample_letter_notification.status == NOTIFICATION_VALIDATION_FAILED assert sample_letter_notification.billable_units == 0 @@ -617,8 +619,6 @@ def test_process_virus_scan_passed_logs_error_and_sets_tech_failure_if_s3_error_ s3 = boto3.client('s3', region_name='eu-west-1') s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') - mocker.patch('app.celery.letters_pdf_tasks.get_page_count', return_value=1) - error_response = { 'Error': { 'Code': 'InvalidParameterValue', @@ -741,10 +741,11 @@ def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, samp status_code=200 ) mock_celery = Mock(**{'retry.side_effect': Retry}) - response = _sanitise_precompiled_pdf(mock_celery, sample_letter_notification, b'old_pdf') + response, result = _sanitise_precompiled_pdf(mock_celery, sample_letter_notification, b'old_pdf') assert rmock.called assert rmock.request_history[0].url == endpoint + assert result == "validation_passed" assert base64.b64decode(response["file"].encode()) == b"new_pdf" assert rmock.last_request.text == 'old_pdf' @@ -768,10 +769,11 @@ def test_sanitise_precompiled_pdf_return_validation_error(rmock, sample_letter_n status_code=400 ) mock_celery = Mock(**{'retry.side_effect': Retry}) - response = _sanitise_precompiled_pdf(mock_celery, sample_letter_notification, b'old_pdf') + response, result = _sanitise_precompiled_pdf(mock_celery, sample_letter_notification, b'old_pdf') assert rmock.called assert rmock.request_history[0].url == endpoint + assert result == "validation_failed" assert response == response_json From ad14f96b8d93f1a75e14954de22122553247a4fa Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Thu, 17 Oct 2019 15:17:43 +0100 Subject: [PATCH 4/4] A small change to make the code just a little bit clearer. --- app/celery/letters_pdf_tasks.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 366a9f954..761a97c78 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -209,9 +209,8 @@ def process_virus_scan_passed(self, filename): old_pdf = scan_pdf_object.get()['Body'].read() sanitise_response, result = _sanitise_precompiled_pdf(self, notification, old_pdf) - if result == "validation_failed": - new_pdf = None - else: + new_pdf = None + if result == 'validation_passed': new_pdf = base64.b64decode(sanitise_response["file"].encode()) redaction_failed_message = sanitise_response.get("redaction_failed_message") @@ -228,7 +227,7 @@ def process_virus_scan_passed(self, filename): # Check your state pension submit letters with good addresses and notify tags, so just use their supplied pdf new_pdf = old_pdf - if not new_pdf: + if result == 'validation_failed' and not new_pdf: current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename)) _move_invalid_letter_and_update_status( notification=notification, @@ -239,9 +238,6 @@ def process_virus_scan_passed(self, filename): page_count=sanitise_response.get("page_count") ) return - else: - current_app.logger.info( - "Validation was successful for precompiled pdf {} ({})".format(notification.id, filename)) current_app.logger.info('notification id {} ({}) sanitised and ready to send'.format(notification.id, filename))