Stop 'timing out' old 'created' notifications

This is being replaced with a new alert and runbook [1]. It's not
always appropriate to change the status to 'technical-failure', and
the new alert means we'll act to fix the underlying issue promptly.

We'll look at tidying up the remaining code in the next commits.

[1]: https://github.com/alphagov/notifications-manuals/wiki/Support-Runbook#deal-with-email-or-sms-still-in-created
This commit is contained in:
Ben Thorner
2021-11-25 16:59:26 +00:00
parent 2acc4ee67d
commit 0318229216
4 changed files with 11 additions and 27 deletions

View File

@@ -30,7 +30,6 @@ from app.dao.notifications_dao import (
from app.dao.service_callback_api_dao import (
get_service_delivery_status_callback_api_for_service,
)
from app.exceptions import NotificationTechnicalFailureException
from app.models import (
EMAIL_TYPE,
KEY_TYPE_NORMAL,
@@ -123,10 +122,9 @@ def timeout_notifications():
# dao_timeout_notifications to return up to 100K notifications, so this task
# will operate on up to 500K - normally we only get around 20K.
for _ in range(0, 5):
technical_failure_notifications, temporary_failure_notifications = \
notifications = \
dao_timeout_notifications(current_app.config.get('SENDING_NOTIFICATIONS_TIMEOUT_PERIOD'))
notifications = technical_failure_notifications + temporary_failure_notifications
for notification in notifications:
# queue callback task only if the service_callback_api exists
service_callback_api = get_service_delivery_status_callback_api_for_service(service_id=notification.service_id) # noqa: E501
@@ -137,11 +135,6 @@ def timeout_notifications():
current_app.logger.info(
"Timeout period reached for {} notifications, status has been updated.".format(len(notifications)))
if technical_failure_notifications:
message = "{} notifications have been updated to technical-failure because they " \
"have timed out and are still in created.Notification ids: {}".format(
len(technical_failure_notifications), [str(x.id) for x in technical_failure_notifications])
raise NotificationTechnicalFailureException(message)
if len(notifications) < 100000:
return

View File

@@ -526,9 +526,6 @@ def dao_timeout_notifications(timeout_period_in_seconds):
"""
Timeout SMS and email notifications by the following rules:
we never sent the notification to the provider for some reason
created -> technical-failure
the notification was sent to the provider but there was not a delivery receipt
sending -> temporary-failure
pending -> temporary-failure
@@ -539,16 +536,13 @@ def dao_timeout_notifications(timeout_period_in_seconds):
updated_at = datetime.utcnow()
timeout = functools.partial(_timeout_notifications, timeout_start=timeout_start, updated_at=updated_at)
# Notifications still in created status are marked with a technical-failure:
technical_failure_notifications = timeout([NOTIFICATION_CREATED], NOTIFICATION_TECHNICAL_FAILURE)
# Notifications still in sending or pending status are marked with a temporary-failure:
temporary_failure_notifications = timeout([NOTIFICATION_SENDING, NOTIFICATION_PENDING],
NOTIFICATION_TEMPORARY_FAILURE)
db.session.commit()
return technical_failure_notifications, temporary_failure_notifications
return temporary_failure_notifications
def is_delivery_slow_for_providers(

View File

@@ -28,7 +28,6 @@ from app.celery.service_callback_tasks import (
create_delivery_status_callback_data,
)
from app.config import QueueNames
from app.exceptions import NotificationTechnicalFailureException
from app.models import EMAIL_TYPE, LETTER_TYPE, SMS_TYPE, FactProcessingTime
from tests.app.db import (
create_job,
@@ -184,11 +183,9 @@ def test_update_status_of_notifications_after_timeout(notify_api, sample_templat
status='pending',
created_at=datetime.utcnow() - timedelta(
seconds=current_app.config.get('SENDING_NOTIFICATIONS_TIMEOUT_PERIOD') + 10))
with pytest.raises(NotificationTechnicalFailureException) as e:
timeout_notifications()
assert str(not2.id) in str(e.value)
timeout_notifications()
assert not1.status == 'temporary-failure'
assert not2.status == 'technical-failure'
assert not2.status == 'created'
assert not3.status == 'temporary-failure'

View File

@@ -703,12 +703,12 @@ def test_dao_timeout_notifications(sample_template):
assert Notification.query.get(sending.id).status == 'sending'
assert Notification.query.get(pending.id).status == 'pending'
assert Notification.query.get(delivered.id).status == 'delivered'
technical_failure_notifications, temporary_failure_notifications = dao_timeout_notifications(1)
assert Notification.query.get(created.id).status == 'technical-failure'
temporary_failure_notifications = dao_timeout_notifications(1)
assert Notification.query.get(created.id).status == 'created'
assert Notification.query.get(sending.id).status == 'temporary-failure'
assert Notification.query.get(pending.id).status == 'temporary-failure'
assert Notification.query.get(delivered.id).status == 'delivered'
assert len(technical_failure_notifications + temporary_failure_notifications) == 3
assert len(temporary_failure_notifications) == 2
def test_dao_timeout_notifications_only_updates_for_older_notifications(sample_template):
@@ -722,8 +722,8 @@ def test_dao_timeout_notifications_only_updates_for_older_notifications(sample_t
assert Notification.query.get(sending.id).status == 'sending'
assert Notification.query.get(pending.id).status == 'pending'
assert Notification.query.get(delivered.id).status == 'delivered'
technical_failure_notifications, temporary_failure_notifications = dao_timeout_notifications(1)
assert len(technical_failure_notifications + temporary_failure_notifications) == 0
temporary_failure_notifications = dao_timeout_notifications(1)
assert len(temporary_failure_notifications) == 0
def test_dao_timeout_notifications_doesnt_affect_letters(sample_letter_template):
@@ -737,8 +737,8 @@ def test_dao_timeout_notifications_doesnt_affect_letters(sample_letter_template)
assert Notification.query.get(sending.id).status == 'sending'
assert Notification.query.get(pending.id).status == 'pending'
assert Notification.query.get(delivered.id).status == 'delivered'
technical_failure_notifications, temporary_failure_notifications = dao_timeout_notifications(1)
temporary_failure_notifications = dao_timeout_notifications(1)
assert len(temporary_failure_notifications) == 0
def test_should_return_notifications_excluding_jobs_by_default(sample_template, sample_job, sample_api_key):