From f96ba5361af1a09002f95b082e2063fbd220b700 Mon Sep 17 00:00:00 2001 From: Ben Thorner Date: Wed, 24 Nov 2021 13:47:48 +0000 Subject: [PATCH] Add new task to alert about created email / sms This will log an error when email or SMS notifications have been stuck in 'created' for too long - normally they should be 'sending' in seconds, noting that we have a goal of < 10s wait time for most notifications being processed our platform. In the next commits we'll decouple similar functionality from the existing 'timeout-sending-notifications' task. --- app/celery/scheduled_tasks.py | 14 ++++++++++ app/config.py | 6 ++++ app/dao/notifications_dao.py | 10 +++++++ tests/app/celery/test_scheduled_tasks.py | 19 +++++++++++++ .../notification_dao/test_notification_dao.py | 28 +++++++++++++++++++ 5 files changed, 77 insertions(+) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 968a4231d..804194cb5 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -30,6 +30,7 @@ from app.dao.jobs_dao import ( find_missing_row_for_job, ) from app.dao.notifications_dao import ( + dao_check_notifications_still_in_created, dao_old_letters_with_created_status, dao_precompiled_letters_still_pending_virus_check, is_delivery_slow_for_providers, @@ -118,6 +119,19 @@ def switch_current_sms_provider_on_slow_delivery(): dao_reduce_sms_provider_priority(provider_name, time_threshold=timedelta(minutes=10)) +@notify_celery.task(name='raise-alert-if-email-sms-still-in-created') +def raise_alert_if_email_sms_still_in_created(): + alert_above_age = current_app.config.get('CREATED_NOTIFICATIONS_ALERT_AGE') + still_in_created = dao_check_notifications_still_in_created(alert_above_age) + message = f"{still_in_created} notifications are still in 'created'." + + if still_in_created == 0: + current_app.logger.info(message) + return + + current_app.logger.error(message) + + @notify_celery.task(name='tend-providers-back-to-middle') def tend_providers_back_to_middle(): dao_adjust_provider_priority_back_to_resting_points() diff --git a/app/config.py b/app/config.py index ab5a08b27..d953e1433 100644 --- a/app/config.py +++ b/app/config.py @@ -310,6 +310,11 @@ class Config(object): 'schedule': crontab(hour=15, minute=30), 'options': {'queue': QueueNames.PERIODIC} }, + 'raise-alert-if-email-sms-still-in-created': { + 'task': 'raise-alert-if-email-sms-still-in-created', + 'schedule': crontab(minute=30), + 'options': {'queue': QueueNames.PERIODIC}, + }, # The collate-letter-pdf does assume it is called in an hour that BST does not make a # difference to the truncate date which translates to the filename to process 'collate-letter-pdfs-to-be-sent': { @@ -355,6 +360,7 @@ class Config(object): STATSD_ENABLED = bool(STATSD_HOST) SENDING_NOTIFICATIONS_TIMEOUT_PERIOD = 259200 # 3 days + CREATED_NOTIFICATIONS_ALERT_AGE = 3600 # 1 hour SIMULATED_EMAIL_ADDRESSES = ( 'simulate-delivered@notifications.service.gov.uk', diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 44f2bddee..1a823c5dd 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -512,6 +512,16 @@ def _timeout_notifications(current_statuses, new_status, timeout_start, updated_ return notifications +def dao_check_notifications_still_in_created(minimum_age_in_seconds): + min_created_at = datetime.utcnow() - timedelta(seconds=minimum_age_in_seconds) + + return Notification.query.filter( + Notification.created_at < min_created_at, + Notification.status == NOTIFICATION_CREATED, + Notification.notification_type.in_([SMS_TYPE, EMAIL_TYPE]) + ).count() + + def dao_timeout_notifications(timeout_period_in_seconds): """ Timeout SMS and email notifications by the following rules: diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index edd8cbeee..68ae6d073 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -19,6 +19,7 @@ from app.celery.scheduled_tasks import ( check_job_status, delete_invitations, delete_verify_codes, + raise_alert_if_email_sms_still_in_created, remove_yesterdays_planned_tests_on_govuk_alerts, replay_created_notifications, run_scheduled_jobs, @@ -108,6 +109,24 @@ def test_should_update_all_scheduled_jobs_and_put_on_queue(sample_template, mock ]) +def test_raise_alert_if_email_sms_still_in_created(notify_api, mocker): + mock_check = mocker.patch('app.celery.scheduled_tasks.dao_check_notifications_still_in_created') + mock_logger = mocker.patch('app.celery.scheduled_tasks.current_app.logger') + + mock_check.return_value = 0 + raise_alert_if_email_sms_still_in_created() + mock_logger.info.assert_called_once_with("0 notifications are still in 'created'.") + + assert mock_check.called_once_with( + notify_api.config['CREATED_NOTIFICATIONS_ALERT_AGE'] + ) + + # try again with something to alert about + mock_check.return_value = 1 + raise_alert_if_email_sms_still_in_created() + mock_logger.error.assert_called_once_with("1 notifications are still in 'created'.") + + @freeze_time('2017-05-01 14:00:00') def test_switch_current_sms_provider_on_slow_delivery_switches_when_one_provider_is_slow( mocker, diff --git a/tests/app/dao/notification_dao/test_notification_dao.py b/tests/app/dao/notification_dao/test_notification_dao.py index a237bf5fd..25e4ca261 100644 --- a/tests/app/dao/notification_dao/test_notification_dao.py +++ b/tests/app/dao/notification_dao/test_notification_dao.py @@ -8,6 +8,7 @@ from sqlalchemy.exc import IntegrityError, SQLAlchemyError from sqlalchemy.orm.exc import NoResultFound from app.dao.notifications_dao import ( + dao_check_notifications_still_in_created, dao_create_notification, dao_delete_notifications_by_id, dao_get_last_notification_added_for_job_id, @@ -664,6 +665,33 @@ def _notification_json(sample_template, job_id=None, id=None, status=None): return data +def test_dao_check_notifications_still_in_created( + sample_template, + sample_email_template, + sample_letter_template +): + with freeze_time(datetime.utcnow() - timedelta(minutes=2)): + # old sending notification (ignored) + create_notification(sample_template, status='sending') + # old letter notification (ignored) + create_notification(sample_letter_template, status='created') + + with freeze_time(datetime.utcnow() + timedelta(minutes=10)): + # new / future notification (ignored) + create_notification(sample_template, status='created') + + # first prove all the above notifications are ignored + assert dao_check_notifications_still_in_created(1) == 0 + + with freeze_time(datetime.utcnow() - timedelta(minutes=2)): + # now add old, eligible notifications + create_notification(sample_template, status='created') + create_notification(sample_email_template, status='created') + + # now prove the check only picks up on these ones + assert dao_check_notifications_still_in_created(1) == 2 + + def test_dao_timeout_notifications(sample_template): with freeze_time(datetime.utcnow() - timedelta(minutes=2)): created = create_notification(sample_template, status='created')