Add new task to alert about created email / sms

This will log an error when email or SMS notifications have been
stuck in 'created' for too long - normally they should be 'sending'
in seconds, noting that we have a goal of < 10s wait time for most
notifications being processed our platform.

In the next commits we'll decouple similar functionality from the
existing 'timeout-sending-notifications' task.
This commit is contained in:
Ben Thorner
2021-11-24 13:47:48 +00:00
parent a8cad79def
commit f96ba5361a
5 changed files with 77 additions and 0 deletions

View File

@@ -30,6 +30,7 @@ from app.dao.jobs_dao import (
find_missing_row_for_job,
)
from app.dao.notifications_dao import (
dao_check_notifications_still_in_created,
dao_old_letters_with_created_status,
dao_precompiled_letters_still_pending_virus_check,
is_delivery_slow_for_providers,
@@ -118,6 +119,19 @@ def switch_current_sms_provider_on_slow_delivery():
dao_reduce_sms_provider_priority(provider_name, time_threshold=timedelta(minutes=10))
@notify_celery.task(name='raise-alert-if-email-sms-still-in-created')
def raise_alert_if_email_sms_still_in_created():
alert_above_age = current_app.config.get('CREATED_NOTIFICATIONS_ALERT_AGE')
still_in_created = dao_check_notifications_still_in_created(alert_above_age)
message = f"{still_in_created} notifications are still in 'created'."
if still_in_created == 0:
current_app.logger.info(message)
return
current_app.logger.error(message)
@notify_celery.task(name='tend-providers-back-to-middle')
def tend_providers_back_to_middle():
dao_adjust_provider_priority_back_to_resting_points()

View File

@@ -310,6 +310,11 @@ class Config(object):
'schedule': crontab(hour=15, minute=30),
'options': {'queue': QueueNames.PERIODIC}
},
'raise-alert-if-email-sms-still-in-created': {
'task': 'raise-alert-if-email-sms-still-in-created',
'schedule': crontab(minute=30),
'options': {'queue': QueueNames.PERIODIC},
},
# The collate-letter-pdf does assume it is called in an hour that BST does not make a
# difference to the truncate date which translates to the filename to process
'collate-letter-pdfs-to-be-sent': {
@@ -355,6 +360,7 @@ class Config(object):
STATSD_ENABLED = bool(STATSD_HOST)
SENDING_NOTIFICATIONS_TIMEOUT_PERIOD = 259200 # 3 days
CREATED_NOTIFICATIONS_ALERT_AGE = 3600 # 1 hour
SIMULATED_EMAIL_ADDRESSES = (
'simulate-delivered@notifications.service.gov.uk',

View File

@@ -512,6 +512,16 @@ def _timeout_notifications(current_statuses, new_status, timeout_start, updated_
return notifications
def dao_check_notifications_still_in_created(minimum_age_in_seconds):
min_created_at = datetime.utcnow() - timedelta(seconds=minimum_age_in_seconds)
return Notification.query.filter(
Notification.created_at < min_created_at,
Notification.status == NOTIFICATION_CREATED,
Notification.notification_type.in_([SMS_TYPE, EMAIL_TYPE])
).count()
def dao_timeout_notifications(timeout_period_in_seconds):
"""
Timeout SMS and email notifications by the following rules:

View File

@@ -19,6 +19,7 @@ from app.celery.scheduled_tasks import (
check_job_status,
delete_invitations,
delete_verify_codes,
raise_alert_if_email_sms_still_in_created,
remove_yesterdays_planned_tests_on_govuk_alerts,
replay_created_notifications,
run_scheduled_jobs,
@@ -108,6 +109,24 @@ def test_should_update_all_scheduled_jobs_and_put_on_queue(sample_template, mock
])
def test_raise_alert_if_email_sms_still_in_created(notify_api, mocker):
mock_check = mocker.patch('app.celery.scheduled_tasks.dao_check_notifications_still_in_created')
mock_logger = mocker.patch('app.celery.scheduled_tasks.current_app.logger')
mock_check.return_value = 0
raise_alert_if_email_sms_still_in_created()
mock_logger.info.assert_called_once_with("0 notifications are still in 'created'.")
assert mock_check.called_once_with(
notify_api.config['CREATED_NOTIFICATIONS_ALERT_AGE']
)
# try again with something to alert about
mock_check.return_value = 1
raise_alert_if_email_sms_still_in_created()
mock_logger.error.assert_called_once_with("1 notifications are still in 'created'.")
@freeze_time('2017-05-01 14:00:00')
def test_switch_current_sms_provider_on_slow_delivery_switches_when_one_provider_is_slow(
mocker,

View File

@@ -8,6 +8,7 @@ from sqlalchemy.exc import IntegrityError, SQLAlchemyError
from sqlalchemy.orm.exc import NoResultFound
from app.dao.notifications_dao import (
dao_check_notifications_still_in_created,
dao_create_notification,
dao_delete_notifications_by_id,
dao_get_last_notification_added_for_job_id,
@@ -664,6 +665,33 @@ def _notification_json(sample_template, job_id=None, id=None, status=None):
return data
def test_dao_check_notifications_still_in_created(
sample_template,
sample_email_template,
sample_letter_template
):
with freeze_time(datetime.utcnow() - timedelta(minutes=2)):
# old sending notification (ignored)
create_notification(sample_template, status='sending')
# old letter notification (ignored)
create_notification(sample_letter_template, status='created')
with freeze_time(datetime.utcnow() + timedelta(minutes=10)):
# new / future notification (ignored)
create_notification(sample_template, status='created')
# first prove all the above notifications are ignored
assert dao_check_notifications_still_in_created(1) == 0
with freeze_time(datetime.utcnow() - timedelta(minutes=2)):
# now add old, eligible notifications
create_notification(sample_template, status='created')
create_notification(sample_email_template, status='created')
# now prove the check only picks up on these ones
assert dao_check_notifications_still_in_created(1) == 2
def test_dao_timeout_notifications(sample_template):
with freeze_time(datetime.utcnow() - timedelta(minutes=2)):
created = create_notification(sample_template, status='created')