From 426311718951407a26ca7a57ee158c0f65182097 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Tue, 16 Oct 2018 15:08:15 +0100 Subject: [PATCH 1/3] We were getting the page count for the letter before virus scan happened. This PR moves setting the billale_units for the letter after virus scan has passed. --- app/celery/letters_pdf_tasks.py | 31 +++++++++++--- app/v2/notifications/post_notifications.py | 32 +++++---------- tests/app/celery/test_letters_pdf_tasks.py | 41 +++++++++++++------ .../test_post_letter_notifications.py | 27 +----------- 4 files changed, 67 insertions(+), 64 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 595f617d7..cc0a20922 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -1,8 +1,11 @@ +import io import math from datetime import datetime +from PyPDF2.utils import PdfReadError from botocore.exceptions import ClientError as BotoClientError from flask import current_app +from notifications_utils.pdf import pdf_page_count from requests import ( post as requests_post, RequestException @@ -38,7 +41,7 @@ from app.models import ( NOTIFICATION_DELIVERED, NOTIFICATION_VIRUS_SCAN_FAILED, NOTIFICATION_TECHNICAL_FAILURE, - # NOTIFICATION_VALIDATION_FAILED + NOTIFICATION_PERMANENT_FAILURE ) @@ -197,20 +200,37 @@ def process_virus_scan_passed(self, filename): current_app.logger.info('notification id {} ({}) sanitised and ready to send'.format(notification.id, filename)) - # temporarily upload original pdf while testing sanitise flow. _upload_pdf_to_test_or_live_pdf_bucket( old_pdf, # TODO: change to new_pdf filename, is_test_letter=is_test_key) + billable_units = _get_page_count(notification, old_pdf) + update_letter_pdf_status( - reference, - NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED + reference=reference, + status=NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED, + billable_units=billable_units ) scan_pdf_object.delete() +def _get_page_count(notification, old_pdf): + try: + pages = pdf_page_count(io.BytesIO(old_pdf)) + pages_per_sheet = 2 + billable_units = math.ceil(pages / pages_per_sheet) + return billable_units + except PdfReadError as e: + current_app.logger.exception(msg='Invalid PDF received for notification_id: {}'.format(notification.id)) + update_letter_pdf_status( + reference=notification.reference, + status=NOTIFICATION_PERMANENT_FAILURE + ) + raise e + + def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter): target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME' target_bucket_name = current_app.config[target_bucket_config] @@ -290,11 +310,12 @@ def process_virus_scan_error(filename): raise error -def update_letter_pdf_status(reference, status): +def update_letter_pdf_status(reference, status, billable_units=0): return dao_update_notifications_by_reference( references=[reference], update_dict={ 'status': status, + 'billable_units': billable_units, 'updated_at': datetime.utcnow() })[0] diff --git a/app/v2/notifications/post_notifications.py b/app/v2/notifications/post_notifications.py index bbaef8385..b40e4e4b6 100644 --- a/app/v2/notifications/post_notifications.py +++ b/app/v2/notifications/post_notifications.py @@ -1,14 +1,13 @@ import base64 import functools -import io -import math import werkzeug from flask import request, jsonify, current_app, abort -from notifications_utils.pdf import pdf_page_count, PdfReadError from notifications_utils.recipients import try_validate_and_format_phone_number from app import api_user, authenticated_service, notify_celery, document_download_client +from app.celery.letters_pdf_tasks import create_letters_pdf +from app.celery.research_mode_tasks import create_fake_letter_response_file from app.clients.document_download import DocumentDownloadError from app.config import QueueNames, TaskNames from app.dao.notifications_dao import update_notification_status_by_reference @@ -30,17 +29,15 @@ from app.models import ( NOTIFICATION_DELIVERED, NOTIFICATION_PENDING_VIRUS_CHECK, ) -from app.celery.letters_pdf_tasks import create_letters_pdf -from app.celery.research_mode_tasks import create_fake_letter_response_file +from app.notifications.process_letter_notifications import ( + create_letter_notification +) from app.notifications.process_notifications import ( persist_notification, persist_scheduled_notification, send_notification_to_queue, simulated_recipient ) -from app.notifications.process_letter_notifications import ( - create_letter_notification -) from app.notifications.validators import ( validate_and_format_recipient, check_rate_limiting, @@ -53,17 +50,17 @@ from app.notifications.validators import ( from app.schema_validation import validate from app.v2.errors import BadRequestError from app.v2.notifications import v2_notification_blueprint +from app.v2.notifications.create_response import ( + create_post_sms_response_from_notification, + create_post_email_response_from_notification, + create_post_letter_response_from_notification +) from app.v2.notifications.notification_schemas import ( post_sms_request, post_email_request, post_letter_request, post_precompiled_letter_request ) -from app.v2.notifications.create_response import ( - create_post_sms_response_from_notification, - create_post_email_response_from_notification, - create_post_letter_response_from_notification -) @v2_notification_blueprint.route('/{}'.format(LETTER_TYPE), methods=['POST']) @@ -293,21 +290,14 @@ def process_precompiled_letter_notifications(*, letter_data, api_key, template, try: status = NOTIFICATION_PENDING_VIRUS_CHECK letter_content = base64.b64decode(letter_data['content']) - pages = pdf_page_count(io.BytesIO(letter_content)) except ValueError: raise BadRequestError(message='Cannot decode letter content (invalid base64 encoding)', status_code=400) - except PdfReadError: - current_app.logger.exception(msg='Invalid PDF received') - raise BadRequestError(message='Letter content is not a valid PDF', status_code=400) - pages_per_sheet = 2 - billable_units = math.ceil(pages / pages_per_sheet) notification = create_letter_notification(letter_data=letter_data, template=template, api_key=api_key, status=status, - reply_to_text=reply_to_text, - billable_units=billable_units) + reply_to_text=reply_to_text) filename = upload_letter_pdf(notification, letter_content, precompiled=True) diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 53c54d37e..59edba5fd 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -1,6 +1,7 @@ from unittest.mock import Mock, call, ANY import boto3 +from PyPDF2.utils import PdfReadError from moto import mock_s3 from flask import current_app from freezegun import freeze_time @@ -22,7 +23,8 @@ from app.celery.letters_pdf_tasks import ( process_virus_scan_failed, process_virus_scan_error, replay_letters_in_error, - _sanitise_precomiled_pdf + _sanitise_precomiled_pdf, + _get_page_count ) from app.letters.utils import get_letter_pdf_filename, ScanErrorType from app.models import ( @@ -31,9 +33,12 @@ from app.models import ( Notification, NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, - NOTIFICATION_VIRUS_SCAN_FAILED, + NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_SENDING, - NOTIFICATION_TECHNICAL_FAILURE) + NOTIFICATION_TECHNICAL_FAILURE, + NOTIFICATION_VIRUS_SCAN_FAILED, +) +from tests.app.db import create_notification from tests.conftest import set_config_values @@ -333,14 +338,17 @@ def test_letter_in_created_state_fails_if_notification_doesnt_exist(sample_notif @freeze_time('2018-01-01 18:00') @mock_s3 -@pytest.mark.parametrize('key_type,is_test_letter,noti_status,bucket_config_name,destination_folder', [ - (KEY_TYPE_NORMAL, False, NOTIFICATION_CREATED, 'LETTERS_PDF_BUCKET_NAME', '2018-01-02/'), - (KEY_TYPE_TEST, True, NOTIFICATION_DELIVERED, 'TEST_LETTERS_BUCKET_NAME', '') +@pytest.mark.parametrize('key_type, noti_status,bucket_config_name,destination_folder', [ + (KEY_TYPE_NORMAL, NOTIFICATION_CREATED, 'LETTERS_PDF_BUCKET_NAME', '2018-01-02/'), + (KEY_TYPE_TEST, NOTIFICATION_DELIVERED, 'TEST_LETTERS_BUCKET_NAME', '') ]) def test_process_letter_task_check_virus_scan_passed( - sample_letter_notification, mocker, key_type, is_test_letter, noti_status, bucket_config_name, destination_folder + sample_letter_template, mocker, key_type, noti_status, bucket_config_name, destination_folder ): - filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) + letter_notification = create_notification(template=sample_letter_template, billable_units=0, + status='pending-virus-check', key_type=key_type, + reference='{} letter'.format(key_type)) + filename = 'NOTIFY.{}'.format(letter_notification.reference) source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME'] target_bucket_name = current_app.config[bucket_config_name] @@ -351,14 +359,14 @@ def test_process_letter_task_check_virus_scan_passed( s3 = boto3.client('s3', region_name='eu-west-1') s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') - sample_letter_notification.status = 'pending-virus-check' - sample_letter_notification.key_type = key_type + mocker.patch('app.celery.letters_pdf_tasks.pdf_page_count', return_value=1) mock_s3upload = mocker.patch('app.celery.letters_pdf_tasks.s3upload') mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precomiled_pdf') process_virus_scan_passed(filename) - assert sample_letter_notification.status == noti_status + assert letter_notification.status == noti_status + assert letter_notification.billable_units == 1 mock_s3upload.assert_called_once_with( filedata=b'pdf_content', region='eu-west-1', @@ -367,11 +375,20 @@ def test_process_letter_task_check_virus_scan_passed( ) mock_sanitise.assert_called_once_with( ANY, - sample_letter_notification, + letter_notification, b'pdf_content' ) +def test_get_page_count_set_notification_to_permanent_failure_when_not_pdf( + sample_letter_notification +): + with pytest.raises(expected_exception=PdfReadError): + _get_page_count(sample_letter_notification, b'pdf_content') + updated_notification = Notification.query.filter_by(id=sample_letter_notification.id).first() + assert updated_notification.status == NOTIFICATION_PERMANENT_FAILURE + + def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker): filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) sample_letter_notification.status = 'pending-virus-check' diff --git a/tests/app/v2/notifications/test_post_letter_notifications.py b/tests/app/v2/notifications/test_post_letter_notifications.py index a984d5e78..34fe22594 100644 --- a/tests/app/v2/notifications/test_post_letter_notifications.py +++ b/tests/app/v2/notifications/test_post_letter_notifications.py @@ -382,7 +382,6 @@ def test_post_letter_notification_is_delivered_and_has_pdf_uploaded_to_test_lett ): sample_letter_service = create_service(service_permissions=['letter', 'precompiled_letter']) s3mock = mocker.patch('app.v2.notifications.post_notifications.upload_letter_pdf', return_value='test.pdf') - mocker.patch('app.v2.notifications.post_notifications.pdf_page_count', return_value=1) mock_celery = mocker.patch("app.letters.rest.notify_celery.send_task") data = { "reference": "letter-reference", @@ -468,7 +467,6 @@ def test_post_precompiled_letter_notification_returns_201(client, notify_user, m sample_service = create_service(service_permissions=['letter', 'precompiled_letter']) sample_service.postage = postage s3mock = mocker.patch('app.v2.notifications.post_notifications.upload_letter_pdf') - mocker.patch('app.v2.notifications.post_notifications.pdf_page_count', return_value=5) mocker.patch("app.letters.rest.notify_celery.send_task") data = { "reference": "letter-reference", @@ -486,7 +484,7 @@ def test_post_precompiled_letter_notification_returns_201(client, notify_user, m notification = Notification.query.one() - assert notification.billable_units == 3 + assert notification.billable_units == 0 assert notification.status == NOTIFICATION_PENDING_VIRUS_CHECK assert notification.postage == postage @@ -495,26 +493,3 @@ def test_post_precompiled_letter_notification_returns_201(client, notify_user, m resp_json = json.loads(response.get_data(as_text=True)) assert resp_json == {'id': str(notification.id), 'reference': 'letter-reference'} - - -def test_post_precompiled_letter_notification_returns_400_with_invalid_pdf(client, notify_user, mocker): - sample_service = create_service(service_permissions=['letter', 'precompiled_letter']) - s3mock = mocker.patch('app.v2.notifications.post_notifications.upload_letter_pdf') - data = { - "reference": "letter-reference", - "content": "bGV0dGVyLWNvbnRlbnQ=" - } - auth_header = create_authorization_header(service_id=sample_service.id) - response = client.post( - path="v2/notifications/letter", - data=json.dumps(data), - headers=[('Content-Type', 'application/json'), auth_header]) - - resp_json = json.loads(response.get_data(as_text=True)) - - assert response.status_code == 400, response.get_data(as_text=True) - assert resp_json['errors'][0]['message'] == 'Letter content is not a valid PDF' - - assert s3mock.called is False - - assert Notification.query.count() == 0 From 537ab2e9652f85b9876f2f1bfa745c90669a3076 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Wed, 24 Oct 2018 14:38:09 +0100 Subject: [PATCH 2/3] Fix merge error. Moved the billable unit calculation before the santise call. --- app/celery/letters_pdf_tasks.py | 9 ++++----- tests/app/celery/test_letters_pdf_tasks.py | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 854ea18c8..4e2577b73 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -41,7 +41,6 @@ from app.models import ( KEY_TYPE_TEST, NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, - NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_TECHNICAL_FAILURE, NOTIFICATION_VALIDATION_FAILED, NOTIFICATION_VIRUS_SCAN_FAILED, @@ -189,6 +188,8 @@ def process_virus_scan_passed(self, filename): scan_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename) old_pdf = scan_pdf_object.get()['Body'].read() + billable_units = _get_page_count(notification, old_pdf) + new_pdf = _sanitise_precomiled_pdf(self, notification, old_pdf) # TODO: Remove this once CYSP update their template to not cross over the margins @@ -213,12 +214,10 @@ def process_virus_scan_passed(self, filename): # temporarily upload original pdf while testing sanitise flow. _upload_pdf_to_test_or_live_pdf_bucket( - old_pdf, # TODO: change to new_pdf + new_pdf, filename, is_test_letter=is_test_key) - billable_units = _get_page_count(notification, old_pdf) - update_letter_pdf_status( reference=reference, status=NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED, @@ -238,7 +237,7 @@ def _get_page_count(notification, old_pdf): current_app.logger.exception(msg='Invalid PDF received for notification_id: {}'.format(notification.id)) update_letter_pdf_status( reference=notification.reference, - status=NOTIFICATION_PERMANENT_FAILURE + status=NOTIFICATION_VALIDATION_FAILED ) raise e diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index 11c8cd797..2be4bc8d6 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -34,7 +34,6 @@ from app.models import ( NOTIFICATION_CREATED, NOTIFICATION_DELIVERED, NOTIFICATION_PENDING_VIRUS_CHECK, - NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_SENDING, NOTIFICATION_TECHNICAL_FAILURE, NOTIFICATION_VALIDATION_FAILED, @@ -362,9 +361,9 @@ def test_process_letter_task_check_virus_scan_passed( s3 = boto3.client('s3', region_name='eu-west-1') s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content') - mocker.patch('app.celery.letters_pdf_tasks.pdf_page_count', return_value=1) + mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks._get_page_count', return_value=1) mock_s3upload = mocker.patch('app.celery.letters_pdf_tasks.s3upload') - mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precomiled_pdf', return_value="pdf_content") + mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precomiled_pdf', return_value=b'pdf_content') process_virus_scan_passed(filename) @@ -381,6 +380,10 @@ def test_process_letter_task_check_virus_scan_passed( file_location=destination_folder + filename, region='eu-west-1', ) + mock_get_page_count.assert_called_once_with( + letter_notification, + b'pdf_content' + ) @freeze_time('2018-01-01 18:00') @@ -406,6 +409,7 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( sample_letter_notification.key_type = key_type mock_move_s3 = mocker.patch('app.letters.utils._move_s3_object') mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precomiled_pdf', return_value=None) + mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks._get_page_count', return_value=2) process_virus_scan_passed(filename) @@ -420,6 +424,10 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails( target_bucket_name, filename ) + mock_get_page_count.assert_called_once_with( + sample_letter_notification, b'pdf_content' + ) + def test_get_page_count_set_notification_to_permanent_failure_when_not_pdf( sample_letter_notification @@ -427,7 +435,7 @@ def test_get_page_count_set_notification_to_permanent_failure_when_not_pdf( with pytest.raises(expected_exception=PdfReadError): _get_page_count(sample_letter_notification, b'pdf_content') updated_notification = Notification.query.filter_by(id=sample_letter_notification.id).first() - assert updated_notification.status == NOTIFICATION_PERMANENT_FAILURE + assert updated_notification.status == NOTIFICATION_VALIDATION_FAILED def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker): From 1cc2d2658689e4752301e92cf9e6d626fe1ac315 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Wed, 24 Oct 2018 14:50:50 +0100 Subject: [PATCH 3/3] Explicitly set the billable units for update_letter_pdf_status --- app/celery/letters_pdf_tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/celery/letters_pdf_tasks.py b/app/celery/letters_pdf_tasks.py index 4e2577b73..d888e4c3a 100644 --- a/app/celery/letters_pdf_tasks.py +++ b/app/celery/letters_pdf_tasks.py @@ -293,7 +293,7 @@ def process_virus_scan_failed(filename): move_failed_pdf(filename, ScanErrorType.FAILURE) reference = get_reference_from_filename(filename) notification = dao_get_notification_by_reference(reference) - updated_count = update_letter_pdf_status(reference, NOTIFICATION_VIRUS_SCAN_FAILED) + updated_count = update_letter_pdf_status(reference, NOTIFICATION_VIRUS_SCAN_FAILED, billable_units=0) if updated_count != 1: raise Exception( @@ -312,7 +312,7 @@ def process_virus_scan_error(filename): move_failed_pdf(filename, ScanErrorType.ERROR) reference = get_reference_from_filename(filename) notification = dao_get_notification_by_reference(reference) - updated_count = update_letter_pdf_status(reference, NOTIFICATION_TECHNICAL_FAILURE) + updated_count = update_letter_pdf_status(reference, NOTIFICATION_TECHNICAL_FAILURE, billable_units=0) if updated_count != 1: raise Exception(