Merge pull request #2608 from alphagov/put_non_redacted_files_in_a_bucket

Copy original file to redaction_failure folder when redaction fails
This commit is contained in:
Pea (Malgorzata Tyczynska)
2019-09-10 16:12:32 +01:00
committed by GitHub
4 changed files with 108 additions and 8 deletions

View File

@@ -31,6 +31,7 @@ from app.dao.notifications_dao import (
)
from app.errors import VirusScanError
from app.letters.utils import (
copy_redaction_failed_pdf,
get_reference_from_filename,
get_folder_name,
upload_letter_pdf,
@@ -219,10 +220,19 @@ def process_virus_scan_passed(self, filename):
if not sanitise_response:
new_pdf = None
else:
sanitise_response = sanitise_response.json()
try:
new_pdf = base64.b64decode(sanitise_response.json()["file"].encode())
new_pdf = base64.b64decode(sanitise_response["file"].encode())
except JSONDecodeError:
new_pdf = sanitise_response.content
redaction_failed_message = sanitise_response.get("redaction_failed_message")
if redaction_failed_message:
current_app.logger.info('{} for notification id {} ({})'.format(
redaction_failed_message, notification.id, filename)
)
copy_redaction_failed_pdf(filename)
# TODO: Remove this once CYSP update their template to not cross over the margins
if notification.service_id == UUID('fe44178f-3b45-4625-9f85-2264a36dd9ec'): # CYSP
# Check your state pension submit letters with good addresses and notify tags, so just use their supplied pdf

View File

@@ -113,6 +113,14 @@ def move_failed_pdf(source_filename, scan_error_type):
_move_s3_object(scan_bucket, source_filename, scan_bucket, target_filename)
def copy_redaction_failed_pdf(source_filename):
scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_filename = 'REDACTION_FAILURE/' + source_filename
_copy_s3_object(scan_bucket, source_filename, scan_bucket, target_filename)
def move_error_pdf_to_scan_bucket(source_filename):
scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME']
error_file = 'ERROR/' + source_filename
@@ -166,6 +174,22 @@ def _move_s3_object(source_bucket, source_filename, target_bucket, target_filena
source_bucket, source_filename, target_bucket, target_filename))
def _copy_s3_object(source_bucket, source_filename, target_bucket, target_filename):
s3 = boto3.resource('s3')
copy_source = {'Bucket': source_bucket, 'Key': source_filename}
target_bucket = s3.Bucket(target_bucket)
obj = target_bucket.Object(target_filename)
# Tags are copied across but the expiration time is reset in the destination bucket
# e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket,
# in the destination bucket the expiration time will be reset to 7 days left to expire
obj.copy(copy_source, ExtraArgs={'ServerSideEncryption': 'AES256'})
current_app.logger.info("Copied letter PDF: {}/{} to {}/{}".format(
source_bucket, source_filename, target_bucket, target_filename))
def letter_print_day(created_at):
bst_print_datetime = convert_utc_to_bst(created_at) + timedelta(hours=6, minutes=30)
bst_print_date = bst_print_datetime.date()

View File

@@ -452,11 +452,9 @@ def test_process_letter_task_check_virus_scan_passed(
@freeze_time('2018-01-01 18:00')
@mock_s3
@pytest.mark.parametrize('key_type,is_test_letter', [
(KEY_TYPE_NORMAL, False), (KEY_TYPE_TEST, True)
])
@pytest.mark.parametrize('key_type', [KEY_TYPE_NORMAL, KEY_TYPE_TEST])
def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails(
sample_letter_notification, mocker, key_type, is_test_letter
sample_letter_notification, mocker, key_type
):
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
@@ -496,11 +494,60 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails(
@freeze_time('2018-01-01 18:00')
@mock_s3
@pytest.mark.parametrize('key_type,is_test_letter', [
(KEY_TYPE_NORMAL, False), (KEY_TYPE_TEST, True)
@pytest.mark.parametrize('key_type,notification_status,bucket_config_name', [
(KEY_TYPE_NORMAL, NOTIFICATION_CREATED, 'LETTERS_PDF_BUCKET_NAME'),
(KEY_TYPE_TEST, NOTIFICATION_DELIVERED, 'TEST_LETTERS_BUCKET_NAME')
])
def test_process_letter_task_check_virus_scan_passed_when_redaction_fails(
sample_letter_notification, mocker, key_type, notification_status, bucket_config_name
):
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
target_bucket_name = current_app.config[bucket_config_name]
conn = boto3.resource('s3', region_name='eu-west-1')
conn.create_bucket(Bucket=bucket_name)
conn.create_bucket(Bucket=target_bucket_name)
s3 = boto3.client('s3', region_name='eu-west-1')
s3.put_object(Bucket=bucket_name, Key=filename, Body=b'pdf_content')
sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK
sample_letter_notification.key_type = key_type
mock_copy_s3 = mocker.patch('app.letters.utils._copy_s3_object')
mocker.patch('app.celery.letters_pdf_tasks._get_page_count', return_value=2)
endpoint = 'http://localhost:9999/precompiled/sanitise'
with requests_mock.mock() as rmock:
rmock.request(
"POST",
endpoint,
json={
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
"validation_passed": True,
"redaction_failed_message": "No matches for address block during redaction procedure",
"errors": {
"content_outside_of_printable_area": [],
"document_not_a4_size_portrait_orientation": []
}
},
status_code=200
)
process_virus_scan_passed(filename)
assert sample_letter_notification.billable_units == 2
assert sample_letter_notification.status == notification_status
mock_copy_s3.assert_called_once_with(
bucket_name, filename,
bucket_name, 'REDACTION_FAILURE/' + filename
)
@freeze_time('2018-01-01 18:00')
@mock_s3
@pytest.mark.parametrize('key_type', [KEY_TYPE_NORMAL, KEY_TYPE_TEST])
def test_process_letter_task_check_virus_scan_passed_when_file_cannot_be_opened(
sample_letter_notification, mocker, key_type, is_test_letter
sample_letter_notification, mocker, key_type
):
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']

View File

@@ -7,6 +7,7 @@ from freezegun import freeze_time
from moto import mock_s3
from app.letters.utils import (
copy_redaction_failed_pdf,
get_bucket_name_and_prefix_for_notification,
get_letter_pdf_filename,
get_letter_pdf,
@@ -265,6 +266,24 @@ def test_move_failed_pdf_scan_failed(notify_api):
assert filename not in [o.key for o in bucket.objects.all()]
@mock_s3
@freeze_time(FROZEN_DATE_TIME)
def test_copy_redaction_failed_pdf(notify_api):
filename = 'test.pdf'
bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
conn = boto3.resource('s3', region_name='eu-west-1')
bucket = conn.create_bucket(Bucket=bucket_name)
s3 = boto3.client('s3', region_name='eu-west-1')
s3.put_object(Bucket=bucket_name, Key=filename, Body=b'pdf_content')
copy_redaction_failed_pdf(filename)
assert 'REDACTION_FAILURE/' + filename in [o.key for o in bucket.objects.all()]
assert filename in [o.key for o in bucket.objects.all()]
@pytest.mark.parametrize("freeze_date, expected_folder_name",
[("2018-04-01 17:50:00", "2018-04-02/"),
("2018-07-02 16:29:00", "2018-07-02/"),