mirror of
https://github.com/GSA/notifications-api.git
synced 2025-12-24 17:31:34 -05:00
Merge pull request #2057 from alphagov/sanitise-precompiled
Sanitise precompiled pdfs after virus scan
This commit is contained in:
@@ -7,8 +7,9 @@ from requests import (
|
||||
post as requests_post,
|
||||
RequestException
|
||||
)
|
||||
|
||||
from celery.exceptions import MaxRetriesExceededError
|
||||
from notifications_utils.statsd_decorators import statsd
|
||||
from notifications_utils.s3 import s3upload
|
||||
|
||||
from app import notify_celery
|
||||
from app.aws import s3
|
||||
@@ -24,9 +25,11 @@ from app.dao.notifications_dao import (
|
||||
from app.errors import VirusScanError
|
||||
from app.letters.utils import (
|
||||
get_reference_from_filename,
|
||||
move_scanned_pdf_to_test_or_live_pdf_bucket,
|
||||
get_folder_name,
|
||||
upload_letter_pdf,
|
||||
move_failed_pdf, ScanErrorType, move_error_pdf_to_scan_bucket,
|
||||
move_failed_pdf,
|
||||
ScanErrorType,
|
||||
move_error_pdf_to_scan_bucket,
|
||||
get_file_names_from_error_bucket
|
||||
)
|
||||
from app.models import (
|
||||
@@ -34,7 +37,8 @@ from app.models import (
|
||||
NOTIFICATION_CREATED,
|
||||
NOTIFICATION_DELIVERED,
|
||||
NOTIFICATION_VIRUS_SCAN_FAILED,
|
||||
NOTIFICATION_TECHNICAL_FAILURE
|
||||
NOTIFICATION_TECHNICAL_FAILURE,
|
||||
# NOTIFICATION_VALIDATION_FAILED
|
||||
)
|
||||
|
||||
|
||||
@@ -66,7 +70,7 @@ def create_letters_pdf(self, notification_id):
|
||||
"Letters PDF notification creation for id: {} failed".format(notification_id)
|
||||
)
|
||||
self.retry(queue=QueueNames.RETRY)
|
||||
except self.MaxRetriesExceededError:
|
||||
except MaxRetriesExceededError:
|
||||
current_app.logger.exception(
|
||||
"RETRY FAILED: task create_letters_pdf failed for notification {}".format(notification_id),
|
||||
)
|
||||
@@ -163,22 +167,83 @@ def letter_in_created_state(filename):
|
||||
return False
|
||||
|
||||
|
||||
@notify_celery.task(name='process-virus-scan-passed')
|
||||
def process_virus_scan_passed(filename):
|
||||
@notify_celery.task(bind=True, name='process-virus-scan-passed', max_retries=15, default_retry_delay=300)
|
||||
def process_virus_scan_passed(self, filename):
|
||||
reference = get_reference_from_filename(filename)
|
||||
notification = dao_get_notification_by_reference(reference)
|
||||
current_app.logger.info('notification id {} Virus scan passed: {}'.format(notification.id, filename))
|
||||
|
||||
is_test_key = notification.key_type == KEY_TYPE_TEST
|
||||
move_scanned_pdf_to_test_or_live_pdf_bucket(
|
||||
|
||||
scan_pdf_object = s3.get_s3_object(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename)
|
||||
old_pdf = scan_pdf_object.get()['Body'].read()
|
||||
|
||||
new_pdf = _sanitise_precomiled_pdf(self, notification, old_pdf)
|
||||
|
||||
if not new_pdf:
|
||||
current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename))
|
||||
# update_notification_status_by_id(notification.id, NOTIFICATION_VALIDATION_FAILED)
|
||||
# move_scan_to_invalid_pdf_bucket() # TODO: implement this (and create bucket etc)
|
||||
# scan_pdf_object.delete()
|
||||
# return
|
||||
|
||||
current_app.logger.info('notification id {} ({}) sanitised and ready to send'.format(notification.id, filename))
|
||||
|
||||
# temporarily upload original pdf while testing sanitise flow.
|
||||
_upload_pdf_to_test_or_live_pdf_bucket(
|
||||
old_pdf, # TODO: change to new_pdf
|
||||
filename,
|
||||
is_test_letter=is_test_key
|
||||
)
|
||||
is_test_letter=is_test_key)
|
||||
|
||||
update_letter_pdf_status(
|
||||
reference,
|
||||
NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED
|
||||
)
|
||||
|
||||
scan_pdf_object.delete()
|
||||
|
||||
|
||||
def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter):
|
||||
target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME'
|
||||
target_bucket_name = current_app.config[target_bucket_config]
|
||||
target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + filename
|
||||
|
||||
s3upload(
|
||||
filedata=pdf_data,
|
||||
region=current_app.config['AWS_REGION'],
|
||||
bucket_name=target_bucket_name,
|
||||
file_location=target_filename
|
||||
)
|
||||
|
||||
|
||||
def _sanitise_precomiled_pdf(self, notification, precompiled_pdf):
|
||||
try:
|
||||
resp = requests_post(
|
||||
'{}/precompiled/sanitise'.format(
|
||||
current_app.config['TEMPLATE_PREVIEW_API_HOST']
|
||||
),
|
||||
data=precompiled_pdf,
|
||||
headers={'Authorization': 'Token {}'.format(current_app.config['TEMPLATE_PREVIEW_API_KEY'])}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.content
|
||||
except RequestException as ex:
|
||||
if ex.response is not None and ex.response.status_code == 400:
|
||||
# validation error
|
||||
return None
|
||||
|
||||
try:
|
||||
current_app.logger.exception(
|
||||
"sanitise_precomiled_pdf failed for notification: {}".format(notification.id)
|
||||
)
|
||||
self.retry(queue=QueueNames.RETRY)
|
||||
except MaxRetriesExceededError:
|
||||
current_app.logger.exception(
|
||||
"RETRY FAILED: sanitise_precomiled_pdf failed for notification {}".format(notification.id),
|
||||
)
|
||||
update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE)
|
||||
raise
|
||||
|
||||
|
||||
@notify_celery.task(name='process-virus-scan-failed')
|
||||
def process_virus_scan_failed(filename):
|
||||
|
||||
@@ -90,16 +90,6 @@ def upload_letter_pdf(notification, pdf_data, precompiled=False):
|
||||
return upload_file_name
|
||||
|
||||
|
||||
def move_scanned_pdf_to_test_or_live_pdf_bucket(source_filename, is_test_letter=False):
|
||||
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
|
||||
target_bucket_config = 'TEST_LETTERS_BUCKET_NAME' if is_test_letter else 'LETTERS_PDF_BUCKET_NAME'
|
||||
target_bucket_name = current_app.config[target_bucket_config]
|
||||
|
||||
target_filename = get_folder_name(datetime.utcnow(), is_test_letter) + source_filename
|
||||
|
||||
_move_s3_object(source_bucket_name, source_filename, target_bucket_name, target_filename)
|
||||
|
||||
|
||||
def move_failed_pdf(source_filename, scan_error_type):
|
||||
scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME']
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
from unittest.mock import Mock, call, ANY
|
||||
|
||||
import boto3
|
||||
from moto import mock_s3
|
||||
from flask import current_app
|
||||
|
||||
from unittest.mock import call
|
||||
|
||||
from freezegun import freeze_time
|
||||
import pytest
|
||||
import requests_mock
|
||||
from botocore.exceptions import ClientError
|
||||
from celery.exceptions import MaxRetriesExceededError
|
||||
from celery.exceptions import MaxRetriesExceededError, Retry
|
||||
from requests import RequestException
|
||||
from sqlalchemy.orm.exc import NoResultFound
|
||||
|
||||
@@ -19,7 +20,9 @@ from app.celery.letters_pdf_tasks import (
|
||||
letter_in_created_state,
|
||||
process_virus_scan_passed,
|
||||
process_virus_scan_failed,
|
||||
process_virus_scan_error, replay_letters_in_error
|
||||
process_virus_scan_error,
|
||||
replay_letters_in_error,
|
||||
_sanitise_precomiled_pdf
|
||||
)
|
||||
from app.letters.utils import get_letter_pdf_filename, ScanErrorType
|
||||
from app.models import (
|
||||
@@ -320,22 +323,45 @@ def test_letter_in_created_state_fails_if_notification_doesnt_exist(sample_notif
|
||||
assert letter_in_created_state(filename) is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize('key_type,is_test_letter,noti_status', [
|
||||
(KEY_TYPE_NORMAL, False, NOTIFICATION_CREATED),
|
||||
(KEY_TYPE_TEST, True, NOTIFICATION_DELIVERED)
|
||||
@freeze_time('2018-01-01 18:00')
|
||||
@mock_s3
|
||||
@pytest.mark.parametrize('key_type,is_test_letter,noti_status,bucket_config_name,destination_folder', [
|
||||
(KEY_TYPE_NORMAL, False, NOTIFICATION_CREATED, 'LETTERS_PDF_BUCKET_NAME', '2018-01-02/'),
|
||||
(KEY_TYPE_TEST, True, NOTIFICATION_DELIVERED, 'TEST_LETTERS_BUCKET_NAME', '')
|
||||
])
|
||||
def test_process_letter_task_check_virus_scan_passed(
|
||||
sample_letter_notification, mocker, key_type, is_test_letter, noti_status
|
||||
sample_letter_notification, mocker, key_type, is_test_letter, noti_status, bucket_config_name, destination_folder
|
||||
):
|
||||
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
|
||||
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
|
||||
target_bucket_name = current_app.config[bucket_config_name]
|
||||
|
||||
conn = boto3.resource('s3', region_name='eu-west-1')
|
||||
conn.create_bucket(Bucket=source_bucket_name)
|
||||
conn.create_bucket(Bucket=target_bucket_name)
|
||||
|
||||
s3 = boto3.client('s3', region_name='eu-west-1')
|
||||
s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content')
|
||||
|
||||
sample_letter_notification.status = 'pending-virus-check'
|
||||
sample_letter_notification.key_type = key_type
|
||||
mock_move_pdf = mocker.patch('app.celery.letters_pdf_tasks.move_scanned_pdf_to_test_or_live_pdf_bucket')
|
||||
mock_s3upload = mocker.patch('app.celery.letters_pdf_tasks.s3upload')
|
||||
mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precomiled_pdf')
|
||||
|
||||
process_virus_scan_passed(filename)
|
||||
|
||||
mock_move_pdf.assert_called_once_with(filename, is_test_letter=is_test_letter)
|
||||
assert sample_letter_notification.status == noti_status
|
||||
mock_s3upload.assert_called_once_with(
|
||||
filedata=b'pdf_content',
|
||||
region='eu-west-1',
|
||||
bucket_name=target_bucket_name,
|
||||
file_location=destination_folder + filename
|
||||
)
|
||||
mock_sanitise.assert_called_once_with(
|
||||
ANY,
|
||||
sample_letter_notification,
|
||||
b'pdf_content'
|
||||
)
|
||||
|
||||
|
||||
def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker):
|
||||
@@ -365,7 +391,6 @@ def test_process_letter_task_check_virus_scan_error(sample_letter_notification,
|
||||
|
||||
|
||||
def test_replay_letters_in_error_for_all_letters_in_error_bucket(notify_api, mocker):
|
||||
import boto3
|
||||
mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name')
|
||||
mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject])
|
||||
mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket")
|
||||
@@ -376,7 +401,6 @@ def test_replay_letters_in_error_for_all_letters_in_error_bucket(notify_api, moc
|
||||
|
||||
|
||||
def test_replay_letters_in_error_for_one_file(notify_api, mocker):
|
||||
import boto3
|
||||
mockObject = boto3.resource('s3').Object('ERROR', 'ERROR/file_name')
|
||||
mocker.patch("app.celery.letters_pdf_tasks.get_file_names_from_error_bucket", return_value=[mockObject])
|
||||
mock_move = mocker.patch("app.celery.letters_pdf_tasks.move_error_pdf_to_scan_bucket")
|
||||
@@ -384,3 +408,43 @@ def test_replay_letters_in_error_for_one_file(notify_api, mocker):
|
||||
replay_letters_in_error("file_name")
|
||||
mock_move.assert_called_once_with('file_name')
|
||||
mock_celery.assert_called_once_with(name='scan-file', kwargs={'filename': 'file_name'}, queue='antivirus-tasks')
|
||||
|
||||
|
||||
def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, sample_letter_notification):
|
||||
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=200)
|
||||
mock_celery = Mock(**{'retry.side_effect': Retry})
|
||||
|
||||
res = _sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
|
||||
|
||||
assert res == b'new_pdf'
|
||||
assert rmock.last_request.text == 'old_pdf'
|
||||
|
||||
|
||||
def test_sanitise_precompiled_pdf_returns_none_on_validation_error(rmock, sample_letter_notification):
|
||||
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=400)
|
||||
mock_celery = Mock(**{'retry.side_effect': Retry})
|
||||
|
||||
res = _sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
|
||||
|
||||
assert res is None
|
||||
|
||||
|
||||
def test_sanitise_precompiled_pdf_retries_on_http_error(rmock, sample_letter_notification):
|
||||
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=500)
|
||||
mock_celery = Mock(**{'retry.side_effect': Retry})
|
||||
|
||||
with pytest.raises(Retry):
|
||||
_sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
|
||||
|
||||
|
||||
def test_sanitise_precompiled_pdf_sets_notification_to_technical_failure_after_too_many_errors(
|
||||
rmock,
|
||||
sample_letter_notification
|
||||
):
|
||||
rmock.post('http://localhost:9999/precompiled/sanitise', content=b'new_pdf', status_code=500)
|
||||
mock_celery = Mock(**{'retry.side_effect': MaxRetriesExceededError})
|
||||
|
||||
with pytest.raises(MaxRetriesExceededError):
|
||||
_sanitise_precomiled_pdf(mock_celery, sample_letter_notification, b'old_pdf')
|
||||
|
||||
assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE
|
||||
|
||||
@@ -11,7 +11,6 @@ from app.letters.utils import (
|
||||
get_letter_pdf_filename,
|
||||
get_letter_pdf,
|
||||
upload_letter_pdf,
|
||||
move_scanned_pdf_to_test_or_live_pdf_bucket,
|
||||
ScanErrorType, move_failed_pdf, get_folder_name
|
||||
)
|
||||
from app.models import KEY_TYPE_NORMAL, KEY_TYPE_TEST, PRECOMPILED_TEMPLATE_NAME
|
||||
@@ -138,32 +137,6 @@ def test_upload_letter_pdf_to_correct_bucket(
|
||||
)
|
||||
|
||||
|
||||
@mock_s3
|
||||
@pytest.mark.parametrize('is_test_letter,bucket_config_name,folder_date_name', [
|
||||
(False, 'LETTERS_PDF_BUCKET_NAME', '2018-03-14/'),
|
||||
(True, 'TEST_LETTERS_BUCKET_NAME', '')
|
||||
])
|
||||
@freeze_time(FROZEN_DATE_TIME)
|
||||
def test_move_scanned_letter_pdf_to_processing_bucket(
|
||||
notify_api, is_test_letter, bucket_config_name, folder_date_name
|
||||
):
|
||||
filename = 'test.pdf'
|
||||
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
|
||||
target_bucket_name = current_app.config[bucket_config_name]
|
||||
|
||||
conn = boto3.resource('s3', region_name='eu-west-1')
|
||||
source_bucket = conn.create_bucket(Bucket=source_bucket_name)
|
||||
target_bucket = conn.create_bucket(Bucket=target_bucket_name)
|
||||
|
||||
s3 = boto3.client('s3', region_name='eu-west-1')
|
||||
s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content')
|
||||
|
||||
move_scanned_pdf_to_test_or_live_pdf_bucket(filename, is_test_letter=is_test_letter)
|
||||
|
||||
assert folder_date_name + filename in [o.key for o in target_bucket.objects.all()]
|
||||
assert filename not in [o.key for o in source_bucket.objects.all()]
|
||||
|
||||
|
||||
@mock_s3
|
||||
@freeze_time(FROZEN_DATE_TIME)
|
||||
def test_move_failed_pdf_error(notify_api):
|
||||
|
||||
Reference in New Issue
Block a user