mirror of
https://github.com/GSA/notifications-api.git
synced 2026-02-04 02:11:11 -05:00
Put pdf letter validation failed message in S3 metadata
So it can be used to tell the user why the letter has failed validation
This commit is contained in:
@@ -49,7 +49,6 @@ from app.models import (
|
|||||||
NOTIFICATION_VIRUS_SCAN_FAILED,
|
NOTIFICATION_VIRUS_SCAN_FAILED,
|
||||||
)
|
)
|
||||||
from app.cronitor import cronitor
|
from app.cronitor import cronitor
|
||||||
from json import JSONDecodeError
|
|
||||||
|
|
||||||
|
|
||||||
@notify_celery.task(bind=True, name="create-letters-pdf", max_retries=15, default_retry_delay=300)
|
@notify_celery.task(bind=True, name="create-letters-pdf", max_retries=15, default_retry_delay=300)
|
||||||
@@ -214,18 +213,16 @@ def process_virus_scan_passed(self, filename):
|
|||||||
billable_units = get_page_count(old_pdf)
|
billable_units = get_page_count(old_pdf)
|
||||||
except PdfReadError:
|
except PdfReadError:
|
||||||
current_app.logger.exception(msg='Invalid PDF received for notification_id: {}'.format(notification.id))
|
current_app.logger.exception(msg='Invalid PDF received for notification_id: {}'.format(notification.id))
|
||||||
_move_invalid_letter_and_update_status(notification, filename, scan_pdf_object)
|
_move_invalid_letter_and_update_status(
|
||||||
|
notification=notification, filename=filename, scan_pdf_object=scan_pdf_object
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
sanitise_response = _sanitise_precompiled_pdf(self, notification, old_pdf)
|
sanitise_response = _sanitise_precompiled_pdf(self, notification, old_pdf)
|
||||||
if not sanitise_response:
|
if sanitise_response["message"]: # is response without message attribute possible now? I think not?
|
||||||
new_pdf = None
|
new_pdf = None
|
||||||
else:
|
else:
|
||||||
sanitise_response = sanitise_response.json()
|
new_pdf = base64.b64decode(sanitise_response["file"].encode())
|
||||||
try:
|
|
||||||
new_pdf = base64.b64decode(sanitise_response["file"].encode())
|
|
||||||
except JSONDecodeError:
|
|
||||||
new_pdf = sanitise_response.content
|
|
||||||
|
|
||||||
redaction_failed_message = sanitise_response.get("redaction_failed_message")
|
redaction_failed_message = sanitise_response.get("redaction_failed_message")
|
||||||
if redaction_failed_message and not is_test_key:
|
if redaction_failed_message and not is_test_key:
|
||||||
@@ -241,7 +238,14 @@ def process_virus_scan_passed(self, filename):
|
|||||||
|
|
||||||
if not new_pdf:
|
if not new_pdf:
|
||||||
current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename))
|
current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename))
|
||||||
_move_invalid_letter_and_update_status(notification, filename, scan_pdf_object)
|
_move_invalid_letter_and_update_status(
|
||||||
|
notification=notification,
|
||||||
|
filename=filename,
|
||||||
|
scan_pdf_object=scan_pdf_object,
|
||||||
|
message=sanitise_response["message"],
|
||||||
|
invalid_pages=sanitise_response.get("invalid_pages"),
|
||||||
|
page_count=sanitise_response.get("page_count")
|
||||||
|
)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
current_app.logger.info(
|
current_app.logger.info(
|
||||||
@@ -270,9 +274,16 @@ def process_virus_scan_passed(self, filename):
|
|||||||
update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE)
|
update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE)
|
||||||
|
|
||||||
|
|
||||||
def _move_invalid_letter_and_update_status(notification, filename, scan_pdf_object):
|
def _move_invalid_letter_and_update_status(
|
||||||
|
*, notification, filename, scan_pdf_object, message=None, invalid_pages=None, page_count=None
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
move_scan_to_invalid_pdf_bucket(filename)
|
move_scan_to_invalid_pdf_bucket(
|
||||||
|
source_filename=filename,
|
||||||
|
message=message,
|
||||||
|
invalid_pages=invalid_pages,
|
||||||
|
page_count=page_count
|
||||||
|
)
|
||||||
scan_pdf_object.delete()
|
scan_pdf_object.delete()
|
||||||
|
|
||||||
update_letter_pdf_status(
|
update_letter_pdf_status(
|
||||||
@@ -311,17 +322,19 @@ def _sanitise_precompiled_pdf(self, notification, precompiled_pdf):
|
|||||||
'Notification-ID': str(notification.id)}
|
'Notification-ID': str(notification.id)}
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response
|
return response.json()
|
||||||
except RequestException as ex:
|
except RequestException as ex:
|
||||||
if ex.response is not None and ex.response.status_code == 400:
|
if ex.response is not None and ex.response.status_code == 400:
|
||||||
message = "sanitise_precompiled_pdf validation error for notification: {}. ".format(notification.id)
|
message = "sanitise_precompiled_pdf validation error for notification: {}. ".format(notification.id)
|
||||||
if "message" in response.json():
|
if "message" in response.json():
|
||||||
message += response.json()["message"]
|
message += response.json()["message"]
|
||||||
|
if "invalid_pages" in response.json():
|
||||||
|
message += (" on pages: " + ", ".join(map(str, response.json()["invalid_pages"])))
|
||||||
|
|
||||||
current_app.logger.info(
|
current_app.logger.info(
|
||||||
message
|
message
|
||||||
)
|
)
|
||||||
return None
|
return response.json()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_app.logger.exception(
|
current_app.logger.exception(
|
||||||
|
|||||||
@@ -127,10 +127,21 @@ def move_error_pdf_to_scan_bucket(source_filename):
|
|||||||
_move_s3_object(scan_bucket, error_file, scan_bucket, source_filename)
|
_move_s3_object(scan_bucket, error_file, scan_bucket, source_filename)
|
||||||
|
|
||||||
|
|
||||||
def move_scan_to_invalid_pdf_bucket(source_filename):
|
def move_scan_to_invalid_pdf_bucket(source_filename, message=None, invalid_pages=None, page_count=None):
|
||||||
scan_bucket = current_app.config['LETTERS_SCAN_BUCKET_NAME']
|
metadata = {}
|
||||||
invalid_pdf_bucket = current_app.config['INVALID_PDF_BUCKET_NAME']
|
if message:
|
||||||
_move_s3_object(scan_bucket, source_filename, invalid_pdf_bucket, source_filename)
|
metadata["validation_failed_message"] = message
|
||||||
|
if invalid_pages:
|
||||||
|
metadata["invalid_pages"] = [str(p) for p in invalid_pages]
|
||||||
|
if page_count:
|
||||||
|
metadata["page_count"] = str(page_count)
|
||||||
|
_move_s3_object(
|
||||||
|
source_bucket=current_app.config['LETTERS_SCAN_BUCKET_NAME'],
|
||||||
|
source_filename=source_filename,
|
||||||
|
target_bucket=current_app.config['INVALID_PDF_BUCKET_NAME'],
|
||||||
|
target_filename=source_filename,
|
||||||
|
metadata=metadata
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename):
|
def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename):
|
||||||
@@ -138,7 +149,7 @@ def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename):
|
|||||||
source_bucket=current_app.config['TRANSIENT_UPLOADED_LETTERS'],
|
source_bucket=current_app.config['TRANSIENT_UPLOADED_LETTERS'],
|
||||||
source_filename=source_filename,
|
source_filename=source_filename,
|
||||||
target_bucket=current_app.config['LETTERS_PDF_BUCKET_NAME'],
|
target_bucket=current_app.config['LETTERS_PDF_BUCKET_NAME'],
|
||||||
target_filename=upload_filename
|
target_filename=upload_filename,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -164,7 +175,7 @@ def get_letter_pdf(notification):
|
|||||||
return obj.get()["Body"].read()
|
return obj.get()["Body"].read()
|
||||||
|
|
||||||
|
|
||||||
def _move_s3_object(source_bucket, source_filename, target_bucket, target_filename):
|
def _move_s3_object(source_bucket, source_filename, target_bucket, target_filename, metadata=None):
|
||||||
s3 = boto3.resource('s3')
|
s3 = boto3.resource('s3')
|
||||||
copy_source = {'Bucket': source_bucket, 'Key': source_filename}
|
copy_source = {'Bucket': source_bucket, 'Key': source_filename}
|
||||||
|
|
||||||
@@ -174,7 +185,10 @@ def _move_s3_object(source_bucket, source_filename, target_bucket, target_filena
|
|||||||
# Tags are copied across but the expiration time is reset in the destination bucket
|
# Tags are copied across but the expiration time is reset in the destination bucket
|
||||||
# e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket,
|
# e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket,
|
||||||
# in the destination bucket the expiration time will be reset to 7 days left to expire
|
# in the destination bucket the expiration time will be reset to 7 days left to expire
|
||||||
obj.copy(copy_source, ExtraArgs={'ServerSideEncryption': 'AES256'})
|
put_args = {'ServerSideEncryption': 'AES256'}
|
||||||
|
if metadata:
|
||||||
|
metadata = put_args['Metadata'] = metadata
|
||||||
|
obj.copy(copy_source, ExtraArgs=put_args)
|
||||||
|
|
||||||
s3.Object(source_bucket, source_filename).delete()
|
s3.Object(source_bucket, source_filename).delete()
|
||||||
|
|
||||||
|
|||||||
@@ -436,10 +436,9 @@ def test_process_letter_task_check_virus_scan_passed(
|
|||||||
json={
|
json={
|
||||||
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
||||||
"validation_passed": True,
|
"validation_passed": True,
|
||||||
"errors": {
|
"message": "",
|
||||||
"content_outside_of_printable_area": [],
|
"invalid_pages": [],
|
||||||
"document_not_a4_size_portrait_orientation": [],
|
"page_count": 1
|
||||||
}
|
|
||||||
},
|
},
|
||||||
status_code=200
|
status_code=200
|
||||||
)
|
)
|
||||||
@@ -479,7 +478,16 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails(
|
|||||||
sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK
|
sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK
|
||||||
sample_letter_notification.key_type = key_type
|
sample_letter_notification.key_type = key_type
|
||||||
mock_move_s3 = mocker.patch('app.letters.utils._move_s3_object')
|
mock_move_s3 = mocker.patch('app.letters.utils._move_s3_object')
|
||||||
mock_sanitise = mocker.patch('app.celery.letters_pdf_tasks._sanitise_precompiled_pdf', return_value=None)
|
sanitise_response = {
|
||||||
|
"file": base64.b64encode(b"nyan").decode("utf-8"),
|
||||||
|
"validation_passed": False,
|
||||||
|
"message": "content-outside-printable-area",
|
||||||
|
"invalid_pages": [1],
|
||||||
|
"page_count": 1
|
||||||
|
}
|
||||||
|
mock_sanitise = mocker.patch(
|
||||||
|
'app.celery.letters_pdf_tasks._sanitise_precompiled_pdf', return_value=sanitise_response
|
||||||
|
)
|
||||||
mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks.get_page_count', return_value=2)
|
mock_get_page_count = mocker.patch('app.celery.letters_pdf_tasks.get_page_count', return_value=2)
|
||||||
|
|
||||||
process_virus_scan_passed(filename)
|
process_virus_scan_passed(filename)
|
||||||
@@ -492,8 +500,12 @@ def test_process_letter_task_check_virus_scan_passed_when_sanitise_fails(
|
|||||||
b'pdf_content'
|
b'pdf_content'
|
||||||
)
|
)
|
||||||
mock_move_s3.assert_called_once_with(
|
mock_move_s3.assert_called_once_with(
|
||||||
source_bucket_name, filename,
|
source_bucket=source_bucket_name, source_filename=filename,
|
||||||
target_bucket_name, filename
|
target_bucket=target_bucket_name, target_filename=filename, metadata={
|
||||||
|
"validation_failed_message": "content-outside-printable-area",
|
||||||
|
"invalid_pages": ["1"],
|
||||||
|
"page_count": "1"
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_get_page_count.assert_called_once_with(b'pdf_content')
|
mock_get_page_count.assert_called_once_with(b'pdf_content')
|
||||||
@@ -533,10 +545,9 @@ def test_process_letter_task_check_virus_scan_passed_when_redaction_fails(
|
|||||||
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
||||||
"validation_passed": True,
|
"validation_passed": True,
|
||||||
"redaction_failed_message": "No matches for address block during redaction procedure",
|
"redaction_failed_message": "No matches for address block during redaction procedure",
|
||||||
"errors": {
|
"message": "",
|
||||||
"content_outside_of_printable_area": [],
|
"invalid_pages": "",
|
||||||
"document_not_a4_size_portrait_orientation": []
|
"page_count": 2
|
||||||
}
|
|
||||||
},
|
},
|
||||||
status_code=200
|
status_code=200
|
||||||
)
|
)
|
||||||
@@ -582,8 +593,8 @@ def test_process_letter_task_check_virus_scan_passed_when_file_cannot_be_opened(
|
|||||||
mock_sanitise.assert_not_called()
|
mock_sanitise.assert_not_called()
|
||||||
mock_get_page_count.assert_called_once_with(b'pdf_content')
|
mock_get_page_count.assert_called_once_with(b'pdf_content')
|
||||||
mock_move_s3.assert_called_once_with(
|
mock_move_s3.assert_called_once_with(
|
||||||
source_bucket_name, filename,
|
source_bucket=source_bucket_name, source_filename=filename,
|
||||||
target_bucket_name, filename
|
target_bucket=target_bucket_name, target_filename=filename, metadata={}
|
||||||
)
|
)
|
||||||
assert sample_letter_notification.status == NOTIFICATION_VALIDATION_FAILED
|
assert sample_letter_notification.status == NOTIFICATION_VALIDATION_FAILED
|
||||||
assert sample_letter_notification.billable_units == 0
|
assert sample_letter_notification.billable_units == 0
|
||||||
@@ -626,10 +637,9 @@ def test_process_virus_scan_passed_logs_error_and_sets_tech_failure_if_s3_error_
|
|||||||
json={
|
json={
|
||||||
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
||||||
"validation_passed": True,
|
"validation_passed": True,
|
||||||
"errors": {
|
"message": "",
|
||||||
"content_outside_of_printable_area": [],
|
"invalid_pages": [],
|
||||||
"document_not_a4_size_portrait_orientation": [],
|
"page_count": 1
|
||||||
}
|
|
||||||
},
|
},
|
||||||
status_code=200
|
status_code=200
|
||||||
)
|
)
|
||||||
@@ -656,7 +666,11 @@ def test_move_invalid_letter_and_update_status_logs_error_and_sets_tech_failure_
|
|||||||
side_effect=ClientError(error_response, 'operation_name'))
|
side_effect=ClientError(error_response, 'operation_name'))
|
||||||
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
|
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
|
||||||
|
|
||||||
_move_invalid_letter_and_update_status(sample_letter_notification, 'filename', mocker.Mock())
|
_move_invalid_letter_and_update_status(
|
||||||
|
notification=sample_letter_notification,
|
||||||
|
filename='filename',
|
||||||
|
scan_pdf_object=mocker.Mock()
|
||||||
|
)
|
||||||
|
|
||||||
assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE
|
assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE
|
||||||
mock_logger.assert_called_once_with(
|
mock_logger.assert_called_once_with(
|
||||||
@@ -720,10 +734,9 @@ def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, samp
|
|||||||
json={
|
json={
|
||||||
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
"file": base64.b64encode(b"new_pdf").decode("utf-8"),
|
||||||
"validation_passed": True,
|
"validation_passed": True,
|
||||||
"errors": {
|
"message": "",
|
||||||
"content_outside_of_printable_area": [],
|
"invalid_pages": [],
|
||||||
"document_not_a4_size_portrait_orientation": [],
|
"page_count": 1
|
||||||
}
|
|
||||||
},
|
},
|
||||||
status_code=200
|
status_code=200
|
||||||
)
|
)
|
||||||
@@ -732,26 +745,26 @@ def test_sanitise_precompiled_pdf_returns_data_from_template_preview(rmock, samp
|
|||||||
assert rmock.called
|
assert rmock.called
|
||||||
assert rmock.request_history[0].url == endpoint
|
assert rmock.request_history[0].url == endpoint
|
||||||
|
|
||||||
assert base64.b64decode(response.json()["file"].encode()) == b"new_pdf"
|
assert base64.b64decode(response["file"].encode()) == b"new_pdf"
|
||||||
assert rmock.last_request.text == 'old_pdf'
|
assert rmock.last_request.text == 'old_pdf'
|
||||||
|
|
||||||
|
|
||||||
def test_sanitise_precompiled_pdf_returns_none_on_validation_error(rmock, sample_letter_notification):
|
def test_sanitise_precompiled_pdf_return_validation_error(rmock, sample_letter_notification):
|
||||||
sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK
|
sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK
|
||||||
|
|
||||||
endpoint = 'http://localhost:9999/precompiled/sanitise'
|
endpoint = 'http://localhost:9999/precompiled/sanitise'
|
||||||
|
response_json = {
|
||||||
|
"file": base64.b64encode(b"nyan").decode("utf-8"),
|
||||||
|
"validation_passed": False,
|
||||||
|
"message": "content-outside-printable-area",
|
||||||
|
"invalid_pages": [1],
|
||||||
|
"page_count": 1
|
||||||
|
}
|
||||||
with requests_mock.mock() as rmock:
|
with requests_mock.mock() as rmock:
|
||||||
rmock.request(
|
rmock.request(
|
||||||
"POST",
|
"POST",
|
||||||
endpoint,
|
endpoint,
|
||||||
json={
|
json=response_json,
|
||||||
"file": base64.b64encode(b"nyan").decode("utf-8"),
|
|
||||||
"validation_passed": False,
|
|
||||||
"errors": {
|
|
||||||
"content_outside_of_printable_area": [1],
|
|
||||||
"document_not_a4_size_portrait_orientation": [],
|
|
||||||
}
|
|
||||||
},
|
|
||||||
status_code=400
|
status_code=400
|
||||||
)
|
)
|
||||||
mock_celery = Mock(**{'retry.side_effect': Retry})
|
mock_celery = Mock(**{'retry.side_effect': Retry})
|
||||||
@@ -759,7 +772,7 @@ def test_sanitise_precompiled_pdf_returns_none_on_validation_error(rmock, sample
|
|||||||
assert rmock.called
|
assert rmock.called
|
||||||
assert rmock.request_history[0].url == endpoint
|
assert rmock.request_history[0].url == endpoint
|
||||||
|
|
||||||
assert response is None
|
assert response == response_json
|
||||||
|
|
||||||
|
|
||||||
def test_sanitise_precompiled_pdf_passes_the_service_id_and_notification_id_to_template_preview(
|
def test_sanitise_precompiled_pdf_passes_the_service_id_and_notification_id_to_template_preview(
|
||||||
|
|||||||
Reference in New Issue
Block a user