retry sending broadcasts

Retry tasks if they fail to send a broadcast event. Note that each task
tries the regular proxy and the failover proxy for that provider. This
runs a bit differently than our other retries:

Retry with exponential backoff. Our other tasks retry with a fixed delay
of 5 minutes between tries. If we can't send a broadcast, we want to try
immediately. So instead, implement an exponential backoff (1, 2, 4, 8,
... seconds delay). We can't delay for longer than 310 seconds due to
visibility timeout settings in SQS, so cap the delay at that amount.

Normally we give up retrying after a set amount of retries (often 4
hours). As broadcast content is much more important than normal
notifications, we don't ever want to give up on sending them to phones...

...UNLESS WE DO!

Sometimes we do want to give up sending a broadcast though! Broadcasts
have an expiry time, when they stop showing up on peoples devices, so if
that has passed then we don't need to send the broadcast out.

Broadcast events can also be superceded by updates or cancels. Check
that the event is the most recent event for that broadcast message, if
not, give up, as we don't want to accidentally send out two conflicting
events for the same message.
This commit is contained in:
Leo Hemsted
2021-01-19 10:05:48 +00:00
parent d390eb2cac
commit ac34fb9c05
6 changed files with 251 additions and 52 deletions

View File

@@ -1,7 +1,9 @@
import uuid
from datetime import datetime
from unittest.mock import call, ANY
from freezegun import freeze_time
from celery.exceptions import MaxRetriesExceededError
import pytest
from app.models import (
@@ -12,7 +14,14 @@ from app.models import (
ServiceBroadcastProviderRestriction,
ServiceBroadcastSettings,
)
from app.celery.broadcast_message_tasks import send_broadcast_event, send_broadcast_provider_message, trigger_link_test
from app.clients.cbc_proxy import CBCProxyRetryableException
from app.celery.broadcast_message_tasks import (
check_provider_message_should_retry,
get_retry_delay,
send_broadcast_event,
send_broadcast_provider_message,
trigger_link_test,
)
from tests.app.db import (
create_template,
@@ -415,13 +424,11 @@ def test_send_broadcast_provider_message_errors(mocker, sample_service, provider
mock_create_broadcast = mocker.patch(
f'app.clients.cbc_proxy.CBCProxy{provider_capitalised}.create_and_send_broadcast',
side_effect=Exception('oh no'),
side_effect=CBCProxyRetryableException('oh no'),
)
mock_retry = mocker.patch('app.celery.broadcast_message_tasks.send_broadcast_provider_message.retry')
with pytest.raises(Exception) as ex:
send_broadcast_provider_message(provider=provider, broadcast_event_id=str(event.id))
assert ex.match('oh no')
send_broadcast_provider_message(provider=provider, broadcast_event_id=str(event.id))
mock_create_broadcast.assert_called_once_with(
identifier=ANY,
@@ -439,6 +446,56 @@ def test_send_broadcast_provider_message_errors(mocker, sample_service, provider
expires=event.transmitted_finishes_at_as_cap_datetime_string,
channel="test"
)
mock_retry.assert_called_once_with(
countdown=1,
exc=mock_create_broadcast.side_effect,
queue='broadcast-tasks'
)
@pytest.mark.parametrize('num_retries, expected_countdown', [
(0, 1),
(5, 32),
(20, 300),
])
def test_send_broadcast_provider_message_delays_retry_exponentially(
mocker,
sample_service,
num_retries,
expected_countdown
):
template = create_template(sample_service, BROADCAST_TYPE)
broadcast_message = create_broadcast_message(template, status=BroadcastStatusType.BROADCASTING)
event = create_broadcast_event(broadcast_message)
mock_create_broadcast = mocker.patch(
'app.clients.cbc_proxy.CBCProxyEE.create_and_send_broadcast',
side_effect=CBCProxyRetryableException('oh no'),
)
mock_retry = mocker.patch('app.celery.broadcast_message_tasks.send_broadcast_provider_message.retry')
# patch celery request context as shown here: https://stackoverflow.com/a/59870468
mock_celery_task_request_context = mocker.patch("celery.app.task.Task.request")
mock_celery_task_request_context.retries = num_retries
send_broadcast_provider_message(provider='ee', broadcast_event_id=str(event.id))
mock_create_broadcast.assert_called_once_with(
identifier=ANY,
message_number=mocker.ANY,
headline="GOV.UK Notify Broadcast",
description='this is an emergency broadcast message',
areas=[],
sent=event.sent_at_as_cap_datetime_string,
expires=event.transmitted_finishes_at_as_cap_datetime_string,
channel='test',
)
mock_retry.assert_called_once_with(
countdown=expected_countdown,
exc=mock_create_broadcast.side_effect,
queue='broadcast-tasks'
)
@pytest.mark.parametrize("provider,provider_capitalised", [
@@ -471,3 +528,90 @@ def test_trigger_link_tests_invokes_cbc_proxy_client(
assert len(mock_send_link_test.mock_calls[0][1][1]) == 8
else:
assert not mock_send_link_test.mock_calls[0][1][1]
@pytest.mark.parametrize('retry_count, expected_delay', [
(0, 1),
(1, 2),
(2, 4),
(8, 256),
(9, 300),
(10, 300),
(1000, 300),
])
def test_get_retry_delay_has_capped_backoff(retry_count, expected_delay):
assert get_retry_delay(retry_count) == expected_delay
@freeze_time('2021-01-01 12:00')
def test_check_provider_message_should_retry_doesnt_raise_if_event_hasnt_expired_yet(sample_template):
broadcast_message = create_broadcast_message(sample_template)
current_event = create_broadcast_event(
broadcast_message,
transmitted_starts_at=datetime(2021, 1, 1, 0, 0),
transmitted_finishes_at=datetime(2021, 1, 1, 12, 1),
)
provider_message = create_broadcast_provider_message(current_event, 'ee')
check_provider_message_should_retry(provider_message)
@freeze_time('2021-01-01 12:00')
def test_check_provider_message_should_retry_raises_if_event_has_expired(sample_template):
broadcast_message = create_broadcast_message(sample_template)
current_event = create_broadcast_event(
broadcast_message,
transmitted_starts_at=datetime(2021, 1, 1, 0, 0),
transmitted_finishes_at=datetime(2021, 1, 1, 11, 59),
)
provider_message = create_broadcast_provider_message(current_event, 'ee')
with pytest.raises(MaxRetriesExceededError) as exc:
check_provider_message_should_retry(provider_message)
assert 'The expiry time of 2021-01-01 11:59:00 has already passed' in str(exc.value)
@freeze_time('2021-01-01 12:00')
def test_check_provider_message_should_retry_raises_if_a_newer_event_exists(sample_template):
broadcast_message = create_broadcast_message(sample_template)
# event approved at midnight
past_event = create_broadcast_event(
broadcast_message,
message_type='alert',
sent_at=datetime(2021, 1, 1, 0, 0),
transmitted_starts_at=datetime(2021, 1, 1, 0, 0),
transmitted_finishes_at=datetime(2021, 1, 2, 0, 0),
)
# event updated at 5am (this is the event we're currently trying to send)
current_event = create_broadcast_event(
broadcast_message,
message_type='update',
sent_at=datetime(2021, 1, 1, 5, 0),
transmitted_starts_at=datetime(2021, 1, 1, 0, 0),
transmitted_finishes_at=datetime(2021, 1, 2, 0, 0),
)
# event updated at 7am
future_event = create_broadcast_event(
broadcast_message,
message_type='update',
sent_at=datetime(2021, 1, 1, 7, 0),
transmitted_starts_at=datetime(2021, 1, 1, 0, 0),
transmitted_finishes_at=datetime(2021, 1, 2, 0, 0),
)
# event cancelled at 10am
futurest_event = create_broadcast_event(
broadcast_message,
message_type='cancel',
sent_at=datetime(2021, 1, 1, 10, 0),
transmitted_starts_at=datetime(2021, 1, 1, 0, 0),
transmitted_finishes_at=datetime(2021, 1, 2, 0, 0),
)
provider_message = create_broadcast_provider_message(current_event, 'ee')
# even though the task is going on until midnight tomorrow, we shouldn't send the update now, because the cancel
# message will be in the pipeline somewhere.
with pytest.raises(MaxRetriesExceededError) as exc:
check_provider_message_should_retry(provider_message)
assert f'This event has been superceeded by cancel broadcast_event {futurest_event.id}' in str(exc.value)

View File

@@ -7,7 +7,7 @@ from unittest.mock import Mock, call
import pytest
from app.clients.cbc_proxy import (
CBCProxyClient, CBCProxyException, CBCProxyEE, CBCProxyCanary, CBCProxyVodafone, CBCProxyThree, CBCProxyO2
CBCProxyClient, CBCProxyRetryableException, CBCProxyEE, CBCProxyCanary, CBCProxyVodafone, CBCProxyThree, CBCProxyO2
)
from app.utils import DATETIME_FORMAT
@@ -433,7 +433,7 @@ def test_cbc_proxy_create_and_send_tries_failover_lambda_on_invoke_error_and_rai
'StatusCode': 400,
}
with pytest.raises(CBCProxyException) as e:
with pytest.raises(CBCProxyRetryableException) as e:
cbc_proxy.create_and_send_broadcast(
identifier='my-identifier',
message_number='0000007b',
@@ -482,7 +482,7 @@ def test_cbc_proxy_create_and_send_tries_failover_lambda_on_function_error_and_r
}
}
with pytest.raises(CBCProxyException) as e:
with pytest.raises(CBCProxyRetryableException) as e:
cbc_proxy.create_and_send_broadcast(
identifier='my-identifier',
message_number='0000007b',

View File

@@ -1049,7 +1049,7 @@ def create_broadcast_message(
starts_at=starts_at,
finishes_at=finishes_at,
created_by_id=created_by.id if created_by else service.created_by_id,
areas=areas or {},
areas=areas or {'areas': [], 'simple_polygons': []},
content=content,
stubbed=stubbed
)
@@ -1077,7 +1077,7 @@ def create_broadcast_event(
transmitted_areas=transmitted_areas or broadcast_message.areas,
transmitted_sender=transmitted_sender or 'www.notifications.service.gov.uk',
transmitted_starts_at=transmitted_starts_at,
transmitted_finishes_at=transmitted_finishes_at or datetime.utcnow(),
transmitted_finishes_at=transmitted_finishes_at or datetime.utcnow() + timedelta(hours=24),
)
db.session.add(b_e)
db.session.commit()
@@ -1105,4 +1105,4 @@ def create_broadcast_provider_message(
broadcast_provider_message_id=broadcast_provider_message_id)
db.session.add(provider_message_number)
db.session.commit()
return provider_message, provider_message_number
return provider_message