Merge pull request #2057 from alphagov/sanitise-precompiled

Sanitise precompiled pdfs after virus scan
This commit is contained in:
Leo Hemsted
2018-08-29 12:53:27 +01:00
committed by GitHub
4 changed files with 152 additions and 60 deletions

View File

@@ -7,8 +7,9 @@ from requests import (
post as requests_post,
RequestException
)
from celery.exceptions import MaxRetriesExceededError
from notifications_utils.statsd_decorators import statsd
from notifications_utils.s3 import s3upload
from app import notify_celery
from app.aws import s3
@@ -24,9 +25,11 @@ from app.dao.notifications_dao import (
from app.errors import VirusScanError
from app.letters.utils import (
get_reference_from_filename,
move_scanned_pdf_to_test_or_live_pdf_bucket,
get_folder_name,
upload_letter_pdf,
move_failed_pdf, ScanErrorType, move_error_pdf_to_scan_bucket,
move_failed_pdf,
ScanErrorType,
move_error_pdf_to_scan_bucket,
get_file_names_from_error_bucket
)
from app.models import (
@@ -34,7 +37,8 @@ from app.models import (
NOTIFICATION_CREATED,
NOTIFICATION_DELIVERED,
NOTIFICATION_VIRUS_SCAN_FAILED,
NOTIFICATION_TECHNICAL_FAILURE
NOTIFICATION_TECHNICAL_FAILURE,
# NOTIFICATION_VALIDATION_FAILED
)
@@ -66,7 +70,7 @@ def create_letters_pdf(self, notification_id):
"Letters PDF notification creation for id: {} failed".format(notification_id)
)
self.retry(queue=QueueNames.RETRY)
except self.MaxRetriesExceededError:
except MaxRetriesExceededError:
current_app.logger.exception(
"RETRY FAILED: task create_letters_pdf failed for notification {}".format(notification_id),
)
@@ -163,22 +167,83 @@ def letter_in_created_state(filename):
return False
@notify_celery.task(name='process-virus-scan-passed')
def process_virus_scan_passed(filename):
@notify_celery.task(bind=True, name='process-virus-scan-passed', max_retries=15, default_retry_delay=300)
def process_virus_scan_passed(self, filename):
reference = get_reference_from_filename(filename)
notification = dao_get_notification_by_reference(reference)
current_app.logger.info('notification id {} Virus scan passed: {}'.format(notification.id, filename))
is_test_key = notification.key_type == KEY_TYPE_TEST
move_scanned_pdf_to_test_or_live_pdf_bucket(
scan_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename)
old_pdf = scan_pdf_object.get()['Body'].read()
new_pdf = _sanitise_precomiled_pdf(self, notification, old_pdf)
if not new_pdf:
current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename))
# update_notification_status_by_id(notification.id, NOTIFICATION_VALIDATION_FAILED)
# move_scan_to_invalid_pdf_bucket() # TODO: implement this (and create bucket etc)
# scan_pdf_object.delete()
# return
current_app.logger.info('notification id {} ({}) sanitised and ready to send'.format(notification.id, filename))
# temporarily upload original pdf while testing sanitise flow.
_upload_pdf_to_test_or_live_pdf_bucket(
old_pdf, # TODO: change to new_pdf
filename,
is_test_letter=is_test_key
)
is_test_letter=is_test_key)
update_letter_pdf_status(
reference,
NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED
)
scan_pdf_object.delete()
def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter):
target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME'
target_bucket_name = current_app.config[target_bucket_config]
target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + filename
s3upload(
filedata=pdf_data,
region=current_app.config['AWS_REGION'],
bucket_name=target_bucket_name,
file_location=target_filename
)
def _sanitise_precomiled_pdf(self, notification, precompiled_pdf):
try:
resp = requests_post(
'{}/precompiled/sanitise'.format(
current_app.config['TEMPLATE_PREVIEW_API_HOST']
),
data=precompiled_pdf,
headers={'Authorization': 'Token {}'.format(current_app.config['TEMPLATE_PREVIEW_API_KEY'])}
)
resp.raise_for_status()
return resp.content
except RequestException as ex:
if ex.response is not None and ex.response.status_code == 400:
# validation error
return None
try:
current_app.logger.exception(
"sanitise_precomiled_pdf failed for notification: {}".format(notification.id)
)
self.retry(queue=QueueNames.RETRY)
except MaxRetriesExceededError:
current_app.logger.exception(
"RETRY FAILED: sanitise_precomiled_pdf failed for notification {}".format(notification.id),
)
update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE)
raise
@notify_celery.task(name='process-virus-scan-failed')
def process_virus_scan_failed(filename):

View File

@@ -90,16 +90,6 @@ def upload_letter_pdf(notification, pdf_data, precompiled=False):
return upload_file_name
def move_scanned_pdf_to_test_or_live_pdf_bucket(source_filename, is_test_letter=False):
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME'
target_bucket_name = current_app.config[target_bucket_config]
target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + source_filename
_move_s3_object(source_bucket_name, source_filename, target_bucket_name, target_filename)
def move_failed_pdf(source_filename, scan_error_type):
scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME']

View File

@@ -1,12 +1,13 @@
from unittest.mock import Mock, call, ANY
import boto3
from moto import mock_s3
from flask import current_app
from unittest.mock import call
from freezegun import freeze_time
import pytest
import requests_mock
from botocore.exceptions import ClientError
from celery.exceptions import MaxRetriesExceededError
from celery.exceptions import MaxRetriesExceededError, Retry
from requests import RequestException
from sqlalchemy.orm.exc import NoResultFound
@@ -19,7 +20,9 @@ from app.celery.letters_pdf_tasks import (
letter_in_created_state,
process_virus_scan_passed,
process_virus_scan_failed,
process_virus_scan_error, replay_letters_in_error
process_virus_scan_error,
replay_letters_in_error,
_sanitise_precomiled_pdf
)
from app.letters.utils import get_letter_pdf_filename, ScanErrorType
from app.models import (
@@ -320,22 +323,45 @@ def test_letter_in_created_state_fails_if_notification_doesnt_exist(sample_notif
assert letter_in_created_state(filename) is False
@pytest.mark.parametrize('key_type,is_test_letter,noti_status', [
(KEY_TYPE_NORMAL, False, NOTIFICATION_CREATED),
(KEY_TYPE_TEST, True, NOTIFICATION_DELIVERED)
@freeze_time('2018-01-01 18:00')
@mock_s3
@pytest.mark.parametrize('key_type,is_test_letter,noti_status,bucket_config_name,destination_folder', [
(KEY_TYPE_NORMAL, False, NOTIFICATION_CREATED, 'LETTERS_PDF_BUCKET_NAME', '2018-01-02/'),
(KEY_TYPE_TEST, True, NOTIFICATION_DELIVERED, 'TEST_LETTERS_BUCKET_NAME', '')
])
def test_process_letter_task_check_virus_scan_passed(
sample_letter_notification, mocker, key_type, is_test_letter, noti_status
sample_letter_notification, mocker, key_type, is_test_letter, noti_status, bucket_config_name, destination_folder
):
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_bucket_name = current_app.config[bucket_config_name]
conn = boto3.resource('s3', region_name='eu-west-1')
conn.create_bucket(Bucket=source_bucket_name)
conn.create_bucket(Bucket=target_bucket_name)
s3 = boto3.client('s3', region_name='eu-west-1')
s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content')
sample_letter_notification.status = 'pending-virus-check'
sample_letter_notification.key_type = key_type
mock_move_pdf = mocker.patch('app.celery.letters_pdf_tasks.move_scanned_pdf_to_test_or_live_pdf_bucket')
mock_s3upload = mocker.patch('app.celery.letters_pdf_tasks.s3upload')
mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precomiled_pdf')
process_virus_scan_passed(filename)
mock_move_pdf.assert_called_once_with(filename, is_test_letter=is_test_letter)
assert sample_letter_notification.status == noti_status
mock_s3upload.assert_called_once_with(
filedata=b'pdf_content',
region='eu-west-1',
bucket_name=target_bucket_name,
file_location=destination_folder + filename
)
mock_sanitise.assert_called_once_with(
ANY,
sample_letter_notification,
b'pdf_content'
)
def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker):
@@ -365,7 +391,6 @@ def test_process_letter_task_check_virus_scan_error(sample_letter_notification,
def test_replay_letters_in_error_for_all_letters_in_error_bucket(notify_api, mocker):
import boto3
mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name')
mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject])
mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket")
@@ -376,7 +401,6 @@ def test_replay_letters_in_error_for_all_letters_in_error_bucket(notify_api, moc
def test_replay_letters_in_error_for_one_file(notify_api, mocker):
import boto3
mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name')
mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject])
mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket")
@@ -384,3 +408,43 @@ def test_replay_letters_in_error_for_one_file(notify_api, mocker):
replay_letters_in_error("file_name")
mock_move.assert_called_once_with('file_name')
mock_celery.assert_called_once_with(name='scan-file', kwargs={'filename': 'file_name'}, queue='antivirus-tasks')
def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, sample_letter_notification):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=200)
mock_celery = Mock(**{'retry.side_effect': Retry})
res = _sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
assert res == b'new_pdf'
assert rmock.last_request.text == 'old_pdf'
def test_sanitise_precompiled_pdf_returns_none_on_validation_error(rmock, sample_letter_notification):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=400)
mock_celery = Mock(**{'retry.side_effect': Retry})
res = _sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
assert res is None
def test_sanitise_precompiled_pdf_retries_on_http_error(rmock, sample_letter_notification):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=500)
mock_celery = Mock(**{'retry.side_effect': Retry})
with pytest.raises(Retry):
_sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
def test_sanitise_precompiled_pdf_sets_notification_to_technical_failure_after_too_many_errors(
rmock,
sample_letter_notification
):
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=500)
mock_celery = Mock(**{'retry.side_effect': MaxRetriesExceededError})
with pytest.raises(MaxRetriesExceededError):
_sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE

View File

@@ -11,7 +11,6 @@ from app.letters.utils import (
get_letter_pdf_filename,
get_letter_pdf,
upload_letter_pdf,
move_scanned_pdf_to_test_or_live_pdf_bucket,
ScanErrorType, move_failed_pdf, get_folder_name
)
from app.models import KEY_TYPE_NORMAL, KEY_TYPE_TEST, PRECOMPILED_TEMPLATE_NAME
@@ -138,32 +137,6 @@ def test_upload_letter_pdf_to_correct_bucket(
)
@mock_s3
@pytest.mark.parametrize('is_test_letter,bucket_config_name,folder_date_name', [
(False, 'LETTERS_PDF_BUCKET_NAME', '2018-03-14/'),
(True, 'TEST_LETTERS_BUCKET_NAME', '')
])
@freeze_time(FROZEN_DATE_TIME)
def test_move_scanned_letter_pdf_to_processing_bucket(
notify_api, is_test_letter, bucket_config_name, folder_date_name
):
filename = 'test.pdf'
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_bucket_name = current_app.config[bucket_config_name]
conn = boto3.resource('s3', region_name='eu-west-1')
source_bucket = conn.create_bucket(Bucket=source_bucket_name)
target_bucket = conn.create_bucket(Bucket=target_bucket_name)
s3 = boto3.client('s3', region_name='eu-west-1')
s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content')
move_scanned_pdf_to_test_or_live_pdf_bucket(filename, is_test_letter=is_test_letter)
assert folder_date_name + filename in [o.key for o in target_bucket.objects.all()]
assert filename not in [o.key for o in source_bucket.objects.all()]
@mock_s3
@freeze_time(FROZEN_DATE_TIME)
def test_move_failed_pdf_error(notify_api):