Merge pull request #2535 from alphagov/more-letter-logging

More logging and retrying for letters
This commit is contained in:
Katie Smith
2019-06-20 09:48:55 +01:00
committed by GitHub
6 changed files with 267 additions and 25 deletions

View File

@@ -210,7 +210,7 @@ def process_virus_scan_passed(self, filename):
try:
billable_units = _get_page_count(notification, old_pdf)
except PdfReadError:
_move_invalid_letter_and_update_status(notification.reference, filename, scan_pdf_object)
_move_invalid_letter_and_update_status(notification, filename, scan_pdf_object)
return
new_pdf = _sanitise_precompiled_pdf(self, notification, old_pdf)
@@ -222,7 +222,7 @@ def process_virus_scan_passed(self, filename):
if not new_pdf:
current_app.logger.info('Invalid precompiled pdf received {} ({})'.format(notification.id, filename))
_move_invalid_letter_and_update_status(notification.reference, filename, scan_pdf_object)
_move_invalid_letter_and_update_status(notification, filename, scan_pdf_object)
return
else:
current_app.logger.info(
@@ -230,18 +230,23 @@ def process_virus_scan_passed(self, filename):
current_app.logger.info('notification id {} ({}) sanitised and ready to send'.format(notification.id, filename))
_upload_pdf_to_test_or_live_pdf_bucket(
new_pdf,
filename,
is_test_letter=is_test_key)
try:
_upload_pdf_to_test_or_live_pdf_bucket(
new_pdf,
filename,
is_test_letter=is_test_key)
update_letter_pdf_status(
reference=reference,
status=NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED,
billable_units=billable_units
)
scan_pdf_object.delete()
update_letter_pdf_status(
reference=reference,
status=NOTIFICATION_DELIVERED if is_test_key else NOTIFICATION_CREATED,
billable_units=billable_units
)
scan_pdf_object.delete()
except BotoClientError:
current_app.logger.exception(
"Error uploading letter to live pdf bucket for notification: {}".format(notification.id)
)
update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE)
def _get_page_count(notification, old_pdf):
@@ -255,14 +260,20 @@ def _get_page_count(notification, old_pdf):
raise e
def _move_invalid_letter_and_update_status(notification_reference, filename, scan_pdf_object):
move_scan_to_invalid_pdf_bucket(filename)
scan_pdf_object.delete()
def _move_invalid_letter_and_update_status(notification, filename, scan_pdf_object):
try:
move_scan_to_invalid_pdf_bucket(filename)
scan_pdf_object.delete()
update_letter_pdf_status(
reference=notification_reference,
status=NOTIFICATION_VALIDATION_FAILED,
billable_units=0)
update_letter_pdf_status(
reference=notification.reference,
status=NOTIFICATION_VALIDATION_FAILED,
billable_units=0)
except BotoClientError:
current_app.logger.exception(
"Error when moving letter with id {} to invalid PDF bucket".format(notification.id)
)
update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE)
def _upload_pdf_to_test_or_live_pdf_bucket(pdf_data, filename, is_test_letter):

View File

@@ -8,7 +8,7 @@ from notifications_utils.statsd_decorators import statsd
from sqlalchemy import and_
from sqlalchemy.exc import SQLAlchemyError
from app import notify_celery
from app import notify_celery, zendesk_client
from app.celery.tasks import process_job
from app.config import QueueNames, TaskNames
from app.dao.invited_org_user_dao import delete_org_invitations_created_more_than_two_days_ago
@@ -19,7 +19,9 @@ from app.dao.notifications_dao import (
is_delivery_slow_for_provider,
dao_get_scheduled_notifications,
set_scheduled_notification_to_processed,
notifications_not_yet_sent
notifications_not_yet_sent,
dao_precompiled_letters_still_pending_virus_check,
dao_old_letters_with_created_status,
)
from app.dao.provider_details_dao import (
get_current_provider,
@@ -178,3 +180,45 @@ def replay_created_notifications():
for n in notifications_to_resend:
send_notification_to_queue(notification=n, research_mode=n.service.research_mode)
@notify_celery.task(name='check-precompiled-letter-state')
@statsd(namespace="tasks")
def check_precompiled_letter_state():
letters = dao_precompiled_letters_still_pending_virus_check()
if len(letters) > 0:
letter_ids = [str(letter.id) for letter in letters]
msg = "{} precompiled letters have been pending-virus-check for over 90 minutes. " \
"Notifications: {}".format(len(letters), letter_ids)
current_app.logger.exception(msg)
if current_app.config['NOTIFY_ENVIRONMENT'] in ['live', 'production', 'test']:
zendesk_client.create_ticket(
subject="[{}] Letters still pending virus check".format(current_app.config['NOTIFY_ENVIRONMENT']),
message=msg,
ticket_type=zendesk_client.TYPE_INCIDENT
)
@notify_celery.task(name='check-templated-letter-state')
@statsd(namespace="tasks")
def check_templated_letter_state():
letters = dao_old_letters_with_created_status()
if len(letters) > 0:
letter_ids = [str(letter.id) for letter in letters]
msg = "{} letters were created before 17.30 yesterday and still have 'created' status. " \
"Notifications: {}".format(len(letters), letter_ids)
current_app.logger.exception(msg)
if current_app.config['NOTIFY_ENVIRONMENT'] in ['live', 'production', 'test']:
zendesk_client.create_ticket(
subject="[{}] Letters still in 'created' status".format(current_app.config['NOTIFY_ENVIRONMENT']),
message=msg,
ticket_type=zendesk_client.TYPE_INCIDENT
)

View File

@@ -259,6 +259,11 @@ class Config(object):
# since we mark jobs as archived
'options': {'queue': QueueNames.PERIODIC},
},
'check-templated-letter-state': {
'task': 'check-templated-letter-state',
'schedule': crontab(day_of_week='mon-fri', hour=9, minute=0),
'options': {'queue', QueueNames.PERIODIC}
},
'raise-alert-if-letter-notifications-still-sending': {
'task': 'raise-alert-if-letter-notifications-still-sending',
'schedule': crontab(hour=16, minute=30),
@@ -276,6 +281,11 @@ class Config(object):
'schedule': crontab(hour=23, minute=00),
'options': {'queue': QueueNames.PERIODIC}
},
'check-precompiled-letter-state': {
'task': 'check-precompiled-letter-state',
'schedule': crontab(day_of_week='mon-fri', hour='9,15', minute=0),
'options': {'queue', QueueNames.PERIODIC}
},
}
CELERY_QUEUES = []

View File

@@ -14,7 +14,7 @@ from notifications_utils.recipients import (
try_validate_and_format_phone_number
)
from notifications_utils.statsd_decorators import statsd
from notifications_utils.timezones import convert_utc_to_bst
from notifications_utils.timezones import convert_bst_to_utc, convert_utc_to_bst
from sqlalchemy import (desc, func, asc)
from sqlalchemy.orm import joinedload
from sqlalchemy.orm.exc import NoResultFound
@@ -692,6 +692,32 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ
return notifications
def dao_old_letters_with_created_status():
yesterday_bst = convert_utc_to_bst(datetime.utcnow()) - timedelta(days=1)
last_processing_deadline = yesterday_bst.replace(hour=17, minute=30, second=0, microsecond=0)
notifications = Notification.query.filter(
Notification.created_at < convert_bst_to_utc(last_processing_deadline),
Notification.notification_type == LETTER_TYPE,
Notification.status == NOTIFICATION_CREATED
).order_by(
Notification.created_at
).all()
return notifications
def dao_precompiled_letters_still_pending_virus_check():
ninety_minutes_ago = datetime.utcnow() - timedelta(seconds=5400)
notifications = Notification.query.filter(
Notification.created_at < ninety_minutes_ago,
Notification.status == NOTIFICATION_PENDING_VIRUS_CHECK
).order_by(
Notification.created_at
).all()
return notifications
def guess_notification_type(search_term):
if set(search_term) & set(string.ascii_letters + '@'):
return EMAIL_TYPE

View File

@@ -23,6 +23,7 @@ from app.celery.letters_pdf_tasks import (
process_virus_scan_failed,
process_virus_scan_error,
replay_letters_in_error,
_move_invalid_letter_and_update_status,
_sanitise_precompiled_pdf
)
from app.letters.utils import get_letter_pdf_filename, ScanErrorType
@@ -46,6 +47,10 @@ from tests.conftest import set_config_values
def test_should_have_decorated_tasks_functions():
assert create_letters_pdf.__wrapped__.__name__ == 'create_letters_pdf'
assert collate_letter_pdfs_for_day.__wrapped__.__name__ == 'collate_letter_pdfs_for_day'
assert process_virus_scan_passed.__wrapped__.__name__ == 'process_virus_scan_passed'
assert process_virus_scan_failed.__wrapped__.__name__ == 'process_virus_scan_failed'
assert process_virus_scan_error.__wrapped__.__name__ == 'process_virus_scan_error'
@pytest.mark.parametrize('personalisation', [{'name': 'test'}, None])
@@ -517,6 +522,67 @@ def test_process_letter_task_check_virus_scan_passed_when_file_cannot_be_opened(
assert sample_letter_notification.billable_units == 0
@mock_s3
def test_process_virus_scan_passed_logs_error_and_sets_tech_failure_if_s3_error_uploading_to_live_bucket(
mocker,
sample_letter_notification,
):
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
source_bucket_name = current_app.config['LETTERS_SCAN_BUCKET_NAME']
conn = boto3.resource('s3', region_name='eu-west-1')
conn.create_bucket(Bucket=source_bucket_name)
s3 = boto3.client('s3', region_name='eu-west-1')
s3.put_object(Bucket=source_bucket_name, Key=filename, Body=b'pdf_content')
mocker.patch('app.celery.letters_pdf_tasks._get_page_count', return_value=1)
mocker.patch('app.celery.letters_pdf_tasks._sanitise_precompiled_pdf', return_value=b'pdf_content')
error_response = {
'Error': {
'Code': 'InvalidParameterValue',
'Message': 'some error message from amazon',
'Type': 'Sender'
}
}
mocker.patch('app.celery.letters_pdf_tasks._upload_pdf_to_test_or_live_pdf_bucket',
side_effect=ClientError(error_response, 'operation_name'))
process_virus_scan_passed(filename)
assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE
mock_logger.assert_called_once_with(
'Error uploading letter to live pdf bucket for notification: {}'.format(sample_letter_notification.id)
)
def test_move_invalid_letter_and_update_status_logs_error_and_sets_tech_failure_state_if_s3_error(
mocker,
sample_letter_notification,
):
error_response = {
'Error': {
'Code': 'InvalidParameterValue',
'Message': 'some error message from amazon',
'Type': 'Sender'
}
}
mocker.patch('app.celery.letters_pdf_tasks.move_scan_to_invalid_pdf_bucket',
side_effect=ClientError(error_response, 'operation_name'))
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
_move_invalid_letter_and_update_status(sample_letter_notification, 'filename', mocker.Mock())
assert sample_letter_notification.status == NOTIFICATION_TECHNICAL_FAILURE
mock_logger.assert_called_once_with(
'Error when moving letter with id {} to invalid PDF bucket'.format(sample_letter_notification.id)
)
def test_process_letter_task_check_virus_scan_failed(sample_letter_notification, mocker):
filename = 'NOTIFY.{}'.format(sample_letter_notification.reference)
sample_letter_notification.status = NOTIFICATION_PENDING_VIRUS_CHECK

View File

@@ -13,7 +13,9 @@ from app.celery.scheduled_tasks import (
run_scheduled_jobs,
send_scheduled_notifications,
switch_current_sms_provider_on_slow_delivery,
replay_created_notifications
replay_created_notifications,
check_precompiled_letter_state,
check_templated_letter_state,
)
from app.config import QueueNames, TaskNames
from app.dao.jobs_dao import dao_get_job_by_id
@@ -26,6 +28,8 @@ from app.models import (
JOB_STATUS_IN_PROGRESS,
JOB_STATUS_ERROR,
JOB_STATUS_FINISHED,
NOTIFICATION_DELIVERED,
NOTIFICATION_PENDING_VIRUS_CHECK,
)
from app.v2.errors import JobIncompleteError
@@ -58,7 +62,7 @@ def prepare_current_provider(restore_provider_details):
db.session.commit()
def test_should_call_delete_codes_on_delete_verify_codes_task(notify_api, mocker):
def test_should_call_delete_codes_on_delete_verify_codes_task(notify_db_session, mocker):
mocker.patch('app.celery.scheduled_tasks.delete_codes_older_created_more_than_a_day_ago')
delete_verify_codes()
assert scheduled_tasks.delete_codes_older_created_more_than_a_day_ago.call_count == 1
@@ -334,3 +338,84 @@ def test_check_job_status_task_does_not_raise_error(sample_template):
job_status=JOB_STATUS_FINISHED)
check_job_status()
@freeze_time("2019-05-30 14:00:00")
def test_check_precompiled_letter_state(mocker, sample_letter_template):
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
mock_create_ticket = mocker.patch('app.celery.nightly_tasks.zendesk_client.create_ticket')
create_notification(template=sample_letter_template,
status=NOTIFICATION_PENDING_VIRUS_CHECK,
created_at=datetime.utcnow() - timedelta(seconds=5400))
create_notification(template=sample_letter_template,
status=NOTIFICATION_DELIVERED,
created_at=datetime.utcnow() - timedelta(seconds=6000))
noti_1 = create_notification(template=sample_letter_template,
status=NOTIFICATION_PENDING_VIRUS_CHECK,
created_at=datetime.utcnow() - timedelta(seconds=5401))
noti_2 = create_notification(template=sample_letter_template,
status=NOTIFICATION_PENDING_VIRUS_CHECK,
created_at=datetime.utcnow() - timedelta(seconds=70000))
check_precompiled_letter_state()
message = "2 precompiled letters have been pending-virus-check for over 90 minutes. " \
"Notifications: ['{}', '{}']".format(noti_2.id, noti_1.id)
mock_logger.assert_called_once_with(message)
mock_create_ticket.assert_called_with(
message=message,
subject='[test] Letters still pending virus check',
ticket_type='incident'
)
@freeze_time("2019-05-30 14:00:00")
def test_check_templated_letter_state_during_bst(mocker, sample_letter_template):
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
mock_create_ticket = mocker.patch('app.celery.nightly_tasks.zendesk_client.create_ticket')
noti_1 = create_notification(template=sample_letter_template, created_at=datetime(2019, 5, 1, 12, 0))
noti_2 = create_notification(template=sample_letter_template, created_at=datetime(2019, 5, 29, 16, 29))
create_notification(template=sample_letter_template, created_at=datetime(2019, 5, 29, 16, 30))
create_notification(template=sample_letter_template, created_at=datetime(2019, 5, 29, 17, 29))
create_notification(template=sample_letter_template, status='delivered', created_at=datetime(2019, 5, 28, 10, 0))
create_notification(template=sample_letter_template, created_at=datetime(2019, 5, 30, 10, 0))
check_templated_letter_state()
message = "2 letters were created before 17.30 yesterday and still have 'created' status. " \
"Notifications: ['{}', '{}']".format(noti_1.id, noti_2.id)
mock_logger.assert_called_once_with(message)
mock_create_ticket.assert_called_with(
message=message,
subject="[test] Letters still in 'created' status",
ticket_type='incident'
)
@freeze_time("2019-01-30 14:00:00")
def test_check_templated_letter_state_during_utc(mocker, sample_letter_template):
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
mock_create_ticket = mocker.patch('app.celery.nightly_tasks.zendesk_client.create_ticket')
noti_1 = create_notification(template=sample_letter_template, created_at=datetime(2018, 12, 1, 12, 0))
noti_2 = create_notification(template=sample_letter_template, created_at=datetime(2019, 1, 29, 17, 29))
create_notification(template=sample_letter_template, created_at=datetime(2019, 1, 29, 17, 30))
create_notification(template=sample_letter_template, created_at=datetime(2019, 1, 29, 18, 29))
create_notification(template=sample_letter_template, status='delivered', created_at=datetime(2019, 1, 29, 10, 0))
create_notification(template=sample_letter_template, created_at=datetime(2019, 1, 30, 10, 0))
check_templated_letter_state()
message = "2 letters were created before 17.30 yesterday and still have 'created' status. " \
"Notifications: ['{}', '{}']".format(noti_1.id, noti_2.id)
mock_logger.assert_called_once_with(message)
mock_create_ticket.assert_called_with(
message=message,
subject="[test] Letters still in 'created' status",
ticket_type='incident'
)