Split up authorisation vs. sequencing checks

While both of these are integrity errors (since we should never
reach this point in the code + data), this just means the original
method comment is still relevant to what immediately follows it.
This commit is contained in:
Ben Thorner
2021-04-19 16:06:13 +01:00
parent 936c9ebdfe
commit a2af8b052a
2 changed files with 28 additions and 25 deletions

View File

@@ -37,25 +37,7 @@ def get_retry_delay(retry_count):
return min(delay, 240) return min(delay, 240)
def check_provider_message_should_send(broadcast_event, provider): def check_event_is_authorised_to_be_sent(broadcast_event, provider):
"""
If any previous event hasn't sent yet for that provider, then we shouldn't send the current event. Instead, fail and
raise a P1 - so that a notify team member can assess the state of the previous messages, and if necessary, can
replay the `send_broadcast_provider_message` task if the previous message has now been sent.
Note: This is called before the new broadcast_provider_message is created.
# Help, I've come across this code following a pagerduty alert, what should I do?
1. Find the failing broadcast_provider_message associated with the previous event that caused this to trip.
2. If that provider message is still failing to send, fix the issue causing that. The task to send that previous
message might still be retrying in the background - look for logs related to that task.
3. If that provider message has sent succesfully, you might need to send this task off depending on context. This
might not always be true though, for example, it may not be necessary to send a cancel if the original alert has
already expired.
4. If you need to re-send this task off again, you'll need to run the following command on paas:
`send_broadcast_provider_message.apply_async(args=(broadcast_event_id, provider), queue=QueueNames.BROADCASTS)`
"""
if not broadcast_event.service.active: if not broadcast_event.service.active:
raise BroadcastIntegrityError( raise BroadcastIntegrityError(
f'Cannot send broadcast_event {broadcast_event.id} ' + f'Cannot send broadcast_event {broadcast_event.id} ' +
@@ -74,6 +56,26 @@ def check_provider_message_should_send(broadcast_event, provider):
f'to provider {provider}: the broadcast message is stubbed' f'to provider {provider}: the broadcast message is stubbed'
) )
def check_event_makes_sense_in_sequence(broadcast_event, provider):
"""
If any previous event hasn't sent yet for that provider, then we shouldn't send the current event. Instead, fail and
raise a P1 - so that a notify team member can assess the state of the previous messages, and if necessary, can
replay the `send_broadcast_provider_message` task if the previous message has now been sent.
Note: This is called before the new broadcast_provider_message is created.
# Help, I've come across this code following a pagerduty alert, what should I do?
1. Find the failing broadcast_provider_message associated with the previous event that caused this to trip.
2. If that provider message is still failing to send, fix the issue causing that. The task to send that previous
message might still be retrying in the background - look for logs related to that task.
3. If that provider message has sent succesfully, you might need to send this task off depending on context. This
might not always be true though, for example, it may not be necessary to send a cancel if the original alert has
already expired.
4. If you need to re-send this task off again, you'll need to run the following command on paas:
`send_broadcast_provider_message.apply_async(args=(broadcast_event_id, provider), queue=QueueNames.BROADCASTS)`
"""
current_provider_message = broadcast_event.get_provider_message(provider) current_provider_message = broadcast_event.get_provider_message(provider)
# if this is the first time a task is being executed, it won't have a provider message yet # if this is the first time a task is being executed, it won't have a provider message yet
if current_provider_message and current_provider_message.status != BroadcastProviderMessageStatus.SENDING: if current_provider_message and current_provider_message.status != BroadcastProviderMessageStatus.SENDING:
@@ -168,7 +170,8 @@ def send_broadcast_provider_message(self, broadcast_event_id, provider):
broadcast_event = dao_get_broadcast_event_by_id(broadcast_event_id) broadcast_event = dao_get_broadcast_event_by_id(broadcast_event_id)
check_provider_message_should_send(broadcast_event, provider) check_event_is_authorised_to_be_sent(broadcast_event, provider)
check_event_makes_sense_in_sequence(broadcast_event, provider)
# the broadcast_provider_message may already exist if we retried previously # the broadcast_provider_message may already exist if we retried previously
broadcast_provider_message = broadcast_event.get_provider_message(provider) broadcast_provider_message = broadcast_event.get_provider_message(provider)

View File

@@ -8,7 +8,7 @@ from freezegun import freeze_time
from app.celery.broadcast_message_tasks import ( from app.celery.broadcast_message_tasks import (
BroadcastIntegrityError, BroadcastIntegrityError,
check_provider_message_should_send, check_event_makes_sense_in_sequence,
get_retry_delay, get_retry_delay,
send_broadcast_event, send_broadcast_event,
send_broadcast_provider_message, send_broadcast_provider_message,
@@ -600,14 +600,14 @@ def test_get_retry_delay_has_capped_backoff(retry_count, expected_delay):
@freeze_time('2021-01-01 12:00') @freeze_time('2021-01-01 12:00')
def test_check_provider_message_should_send_doesnt_raise_if_event_hasnt_expired_yet(sample_template): def test_check_event_makes_sense_in_sequence_doesnt_raise_if_event_hasnt_expired_yet(sample_template):
broadcast_message = create_broadcast_message(sample_template) broadcast_message = create_broadcast_message(sample_template)
current_event = create_broadcast_event( current_event = create_broadcast_event(
broadcast_message, broadcast_message,
transmitted_starts_at=datetime(2021, 1, 1, 0, 0), transmitted_starts_at=datetime(2021, 1, 1, 0, 0),
transmitted_finishes_at=datetime(2021, 1, 1, 12, 1), transmitted_finishes_at=datetime(2021, 1, 1, 12, 1),
) )
check_provider_message_should_send(current_event, 'ee') check_event_makes_sense_in_sequence(current_event, 'ee')
@freeze_time('2021-01-01 12:00') @freeze_time('2021-01-01 12:00')
@@ -688,7 +688,7 @@ def test_send_broadcast_provider_message_raises_if_older_event_hasnt_started_sen
@freeze_time('2021-01-01 12:00') @freeze_time('2021-01-01 12:00')
def test_check_provider_message_should_send_doesnt_raise_if_newer_event_not_acked_yet(sample_template): def test_check_event_makes_sense_in_sequence_doesnt_raise_if_newer_event_not_acked_yet(sample_template):
broadcast_message = create_broadcast_message(sample_template) broadcast_message = create_broadcast_message(sample_template)
# event approved at midnight # event approved at midnight
current_event = create_broadcast_event( current_event = create_broadcast_event(
@@ -705,7 +705,7 @@ def test_check_provider_message_should_send_doesnt_raise_if_newer_event_not_acke
# this doesn't raise, because the alert event got an ack. The cancel doesn't have an event yet # this doesn't raise, because the alert event got an ack. The cancel doesn't have an event yet
# but this task is only interested in the current task (the update) so doesn't worry about that # but this task is only interested in the current task (the update) so doesn't worry about that
check_provider_message_should_send(current_event, 'ee') check_event_makes_sense_in_sequence(current_event, 'ee')
@pytest.mark.parametrize('existing_message_status', [ @pytest.mark.parametrize('existing_message_status', [