Merge pull request #2057 from alphagov/sanitise-precompiled

Sanitise precompiled pdfs after virus scan
This commit is contained in:
Leo Hemsted
2018-08-29 12:53:27 +01:00
committed by GitHub
4 changed files with 152 additions and 60 deletions

View File

@@ -7,8 +7,9 @@ from requests import (
post as requests_post, post as requests_post,
RequestException RequestException
) )
from celery.exceptions import MaxRetriesExceededError
from notifications_utils.statsd_decorators import statsd from notifications_utils.statsd_decorators import statsd
from notifications_utils.s3 import s3upload
from app import notify_celery from app import notify_celery
from app.aws import s3 from app.aws import s3
@@ -24,9 +25,11 @@ from app.dao.notifications_dao import (
from app.errors import VirusScanError from app.errors import VirusScanError
from app.letters.utils import ( from app.letters.utils import (
get_reference_from_filename, get_reference_from_filename,
move_scanned_pdf_to_test_or_live_pdf_bucket, get_folder_name,
upload_letter_pdf, upload_letter_pdf,
move_failed_pdf, ScanErrorType, move_error_pdf_to_scan_bucket, move_failed_pdf,
ScanErrorType,
move_error_pdf_to_scan_bucket,
get_file_names_from_error_bucket get_file_names_from_error_bucket
) )
from app.models import ( from app.models import (
@@ -34,7 +37,8 @@ from app.models import (
NOTIFICATION_CREATED, NOTIFICATION_CREATED,
NOTIFICATION_DELIVERED, NOTIFICATION_DELIVERED,
NOTIFICATION_VIRUS_SCAN_FAILED, NOTIFICATION_VIRUS_SCAN_FAILED,
NOTIFICATION_TECHNICAL_FAILURE NOTIFICATION_TECHNICAL_FAILURE,
# NOTIFICATION_VALIDATION_FAILED
) )
@@ -66,7 +70,7 @@ def create_letters_pdf(self, notification_id):
"Letters PDF notification creation for id: {} failed".format(notification_id) "Letters PDF notification creation for id: {} failed".format(notification_id)
) )
self.retry(queue=QueueNames.RETRY) self.retry(queue=QueueNames.RETRY)
except self.MaxRetriesExceededError: except MaxRetriesExceededError:
current_app.logger.exception( current_app.logger.exception(
"RETRY FAILED: task create_letters_pdf failed for notification {}".format(notification_id), "RETRY FAILED: task create_letters_pdf failed for notification {}".format(notification_id),
) )
@@ -163,22 +167,83 @@ def letter_in_created_state(filename):
return False return False
@notify_celery.task(name='process-virus-scan-passed') @notify_celery.task(bind=True, name='process-virus-scan-passed', max_retries=15, default_retry_delay=300)
def process_virus_scan_passed(filename): def process_virus_scan_passed(self, filename):
reference = get_reference_from_filename(filename) reference = get_reference_from_filename(filename)
notification = dao_get_notification_by_reference(reference) notification = dao_get_notification_by_reference(reference)
current_app.logger.info('notification id {} Virus scan passed: {}'.format(notification.id, filename)) current_app.logger.info('notification id {} Virus scan passed: {}'.format(notification.id, filename))
is_test_key = notification.key_type == KEY_TYPE_TEST is_test_key = notification.key_type == KEY_TYPE_TEST
move_scanned_pdf_to_test_or_live_pdf_bucket(
scan_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename)
old_pdf = scan_pdf_object.get()['Body'].read()
new_pdf = _sanitise_precomiled_pdf(self, notification, old_pdf)
if not new_pdf:
current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename))
# update_notification_status_by_id(notification.id, NOTIFICATION_VALIDATION_FAILED)
# move_scan_to_invalid_pdf_bucket() # TODO: implement this (and create bucket etc)
# scan_pdf_object.delete()
# return
current_app.logger.info('notification id {} ({}) sanitised and ready to send'.format(notification.id, filename))
# temporarily upload original pdf while testing sanitise flow.
_upload_pdf_to_test_or_live_pdf_bucket(
old_pdf, # TODO: change to new_pdf
filename, filename,
is_test_letter=is_test_key is_test_letter=is_test_key)
)
update_letter_pdf_status( update_letter_pdf_status(
reference, reference,
NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED
) )
scan_pdf_object.delete()
def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter):
target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME'
target_bucket_name = current_app.config[target_bucket_config]
target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + filename
s3upload(
filedata=pdf_data,
region=current_app.config['AWS_REGION'],
bucket_name=target_bucket_name,
file_location=target_filename
)
def _sanitise_precomiled_pdf(self, notification, precompiled_pdf):
try:
resp = requests_post(
'{}/precompiled/sanitise'.format(
current_app.config['TEMPLATE_PREVIEW_API_HOST']
),
data=precompiled_pdf,
headers={'Authorization': 'Token {}'.format(current_app.config['TEMPLATE_PREVIEW_API_KEY'])}
)
resp.raise_for_status()
return resp.content
except RequestException as ex:
if ex.response is not None and ex.response.status_code == 400:
# validation error
return None
try:
current_app.logger.exception(
"sanitise_precomiled_pdf failed for notification: {}".format(notification.id)
)
self.retry(queue=QueueNames.RETRY)
except MaxRetriesExceededError:
current_app.logger.exception(
"RETRY FAILED: sanitise_precomiled_pdf failed for notification {}".format(notification.id),
)
update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE)
raise
@notify_celery.task(name='process-virus-scan-failed') @notify_celery.task(name='process-virus-scan-failed')
def process_virus_scan_failed(filename): def process_virus_scan_failed(filename):

View File

@@ -90,16 +90,6 @@ def upload_letter_pdf(notification, pdf_data, precompiled=False):
return upload_file_name return upload_file_name
def move_scanned_pdf_to_test_or_live_pdf_bucket(source_filename, is_test_letter=False):
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME'
target_bucket_name = current_app.config[target_bucket_config]
target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + source_filename
_move_s3_object(source_bucket_name, source_filename, target_bucket_name, target_filename)
def move_failed_pdf(source_filename, scan_error_type): def move_failed_pdf(source_filename, scan_error_type):
scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME'] scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME']

View File

@@ -1,12 +1,13 @@
from unittest.mock import Mock, call, ANY
import boto3
from moto import mock_s3
from flask import current_app from flask import current_app
from unittest.mock import call
from freezegun import freeze_time from freezegun import freeze_time
import pytest import pytest
import requests_mock import requests_mock
from botocore.exceptions import ClientError from botocore.exceptions import ClientError
from celery.exceptions import MaxRetriesExceededError from celery.exceptions import MaxRetriesExceededError, Retry
from requests import RequestException from requests import RequestException
from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.orm.exc import NoResultFound
@@ -19,7 +20,9 @@ from app.celery.letters_pdf_tasks import (
letter_in_created_state, letter_in_created_state,
process_virus_scan_passed, process_virus_scan_passed,
process_virus_scan_failed, process_virus_scan_failed,
process_virus_scan_error, replay_letters_in_error process_virus_scan_error,
replay_letters_in_error,
_sanitise_precomiled_pdf
) )
from app.letters.utils import get_letter_pdf_filename, ScanErrorType from app.letters.utils import get_letter_pdf_filename, ScanErrorType
from app.models import ( from app.models import (
@@ -320,22 +323,45 @@ def test_letter_in_created_state_fails_if_notification_doesnt_exist(sample_notif
assert letter_in_created_state(filename) is False assert letter_in_created_state(filename) is False
@pytest.mark.parametrize('key_type,is_test_letter,noti_status', [ @freeze_time('2018-01-01 18:00')
(KEY_TYPE_NORMAL, False, NOTIFICATION_CREATED), @mock_s3
(KEY_TYPE_TEST, True, NOTIFICATION_DELIVERED) @pytest.mark.parametrize('key_type,is_test_letter,noti_status,bucket_config_name,destination_folder', [
(KEY_TYPE_NORMAL, False, NOTIFICATION_CREATED, 'LETTERS_PDF_BUCKET_NAME', '2018-01-02/'),
(KEY_TYPE_TEST, True, NOTIFICATION_DELIVERED, 'TEST_LETTERS_BUCKET_NAME', '')
]) ])
def test_process_letter_task_check_virus_scan_passed( def test_process_letter_task_check_virus_scan_passed(
sample_letter_notification, mocker, key_type, is_test_letter, noti_status sample_letter_notification, mocker, key_type, is_test_letter, noti_status, bucket_config_name, destination_folder
): ):
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference) filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_bucket_name = current_app.config[bucket_config_name]
conn = boto3.resource('s3', region_name='eu-west-1')
conn.create_bucket(Bucket=source_bucket_name)
conn.create_bucket(Bucket=target_bucket_name)
s3 = boto3.client('s3', region_name='eu-west-1')
s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content')
sample_letter_notification.status = 'pending-virus-check' sample_letter_notification.status = 'pending-virus-check'
sample_letter_notification.key_type = key_type sample_letter_notification.key_type = key_type
mock_move_pdf = mocker.patch('app.celery.letters_pdf_tasks.move_scanned_pdf_to_test_or_live_pdf_bucket') mock_s3upload = mocker.patch('app.celery.letters_pdf_tasks.s3upload')
mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precomiled_pdf')
process_virus_scan_passed(filename) process_virus_scan_passed(filename)
mock_move_pdf.assert_called_once_with(filename, is_test_letter=is_test_letter)
assert sample_letter_notification.status == noti_status assert sample_letter_notification.status == noti_status
mock_s3upload.assert_called_once_with(
filedata=b'pdf_content',
region='eu-west-1',
bucket_name=target_bucket_name,
file_location=destination_folder + filename
)
mock_sanitise.assert_called_once_with(
ANY,
sample_letter_notification,
b'pdf_content'
)
def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker): def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker):
@@ -365,7 +391,6 @@ def test_process_letter_task_check_virus_scan_error(sample_letter_notification,
def test_replay_letters_in_error_for_all_letters_in_error_bucket(notify_api, mocker): def test_replay_letters_in_error_for_all_letters_in_error_bucket(notify_api, mocker):
import boto3
mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name') mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name')
mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject]) mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject])
mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket") mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket")
@@ -376,7 +401,6 @@ def test_replay_letters_in_error_for_all_letters_in_error_bucket(notify_api, moc
def test_replay_letters_in_error_for_one_file(notify_api, mocker): def test_replay_letters_in_error_for_one_file(notify_api, mocker):
import boto3
mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name') mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name')
mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject]) mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject])
mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket") mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket")
@@ -384,3 +408,43 @@ def test_replay_letters_in_error_for_one_file(notify_api, mocker):
replay_letters_in_error("file_name") replay_letters_in_error("file_name")
mock_move.assert_called_once_with('file_name') mock_move.assert_called_once_with('file_name')
mock_celery.assert_called_once_with(name='scan-file', kwargs={'filename': 'file_name'}, queue='antivirus-tasks') mock_celery.assert_called_once_with(name='scan-file', kwargs={'filename': 'file_name'}, queue='antivirus-tasks')
def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, sample_letter_notification):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=200)
mock_celery = Mock(**{'retry.side_effect': Retry})
res = _sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
assert res == b'new_pdf'
assert rmock.last_request.text == 'old_pdf'
def test_sanitise_precompiled_pdf_returns_none_on_validation_error(rmock, sample_letter_notification):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=400)
mock_celery = Mock(**{'retry.side_effect': Retry})
res = _sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
assert res is None
def test_sanitise_precompiled_pdf_retries_on_http_error(rmock, sample_letter_notification):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=500)
mock_celery = Mock(**{'retry.side_effect': Retry})
with pytest.raises(Retry):
_sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
def test_sanitise_precompiled_pdf_sets_notification_to_technical_failure_after_too_many_errors(
rmock,
sample_letter_notification
):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=500)
mock_celery = Mock(**{'retry.side_effect': MaxRetriesExceededError})
with pytest.raises(MaxRetriesExceededError):
_sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE

View File

@@ -11,7 +11,6 @@ from app.letters.utils import (
get_letter_pdf_filename, get_letter_pdf_filename,
get_letter_pdf, get_letter_pdf,
upload_letter_pdf, upload_letter_pdf,
move_scanned_pdf_to_test_or_live_pdf_bucket,
ScanErrorType, move_failed_pdf, get_folder_name ScanErrorType, move_failed_pdf, get_folder_name
) )
from app.models import KEY_TYPE_NORMAL, KEY_TYPE_TEST, PRECOMPILED_TEMPLATE_NAME from app.models import KEY_TYPE_NORMAL, KEY_TYPE_TEST, PRECOMPILED_TEMPLATE_NAME
@@ -138,32 +137,6 @@ def test_upload_letter_pdf_to_correct_bucket(
) )
@mock_s3
@pytest.mark.parametrize('is_test_letter,bucket_config_name,folder_date_name', [
(False, 'LETTERS_PDF_BUCKET_NAME', '2018-03-14/'),
(True, 'TEST_LETTERS_BUCKET_NAME', '')
])
@freeze_time(FROZEN_DATE_TIME)
def test_move_scanned_letter_pdf_to_processing_bucket(
notify_api, is_test_letter, bucket_config_name, folder_date_name
):
filename = 'test.pdf'
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_bucket_name = current_app.config[bucket_config_name]
conn = boto3.resource('s3', region_name='eu-west-1')
source_bucket = conn.create_bucket(Bucket=source_bucket_name)
target_bucket = conn.create_bucket(Bucket=target_bucket_name)
s3 = boto3.client('s3', region_name='eu-west-1')
s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content')
move_scanned_pdf_to_test_or_live_pdf_bucket(filename, is_test_letter=is_test_letter)
assert folder_date_name + filename in [o.key for o in target_bucket.objects.all()]
assert filename not in [o.key for o in source_bucket.objects.all()]
@mock_s3 @mock_s3
@freeze_time(FROZEN_DATE_TIME) @freeze_time(FROZEN_DATE_TIME)
def test_move_failed_pdf_error(notify_api): def test_move_failed_pdf_error(notify_api):