mirror of
https://github.com/GSA/notifications-api.git
synced 2026-06-19 20:50:35 -04:00
make 500s change priorities quicker
it's not acceptable for a constantly failing provider to take 50 minutes to drain (5x reducing priority by 10). But similarly, we need _some_ delay, or a handful of concurrent failures will completely turn off a provider, rendering the whole excercise kinda pointless. Setting the delay before it tries to reduce priority again to one minute is nice because it means that if one request times out and returns 502, then any other requests that are in flight at that time will time out before the one minute is up and not switch, but any requests made after the switch that take sixty seconds to time out will affect it.
This commit is contained in:
@@ -122,7 +122,7 @@ def switch_current_sms_provider_on_slow_delivery():
|
||||
for provider_name, is_slow in slow_delivery_notifications.items():
|
||||
if is_slow:
|
||||
current_app.logger.warning('Slow delivery notifications detected for provider {}'.format(provider_name))
|
||||
dao_reduce_sms_provider_priority(provider_name)
|
||||
dao_reduce_sms_provider_priority(provider_name, time_threshold=timedelta(minutes=10))
|
||||
|
||||
|
||||
@notify_celery.task(name='check-job-status')
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime
|
||||
|
||||
from notifications_utils.timezones import convert_utc_to_bst
|
||||
from sqlalchemy import asc, desc, func
|
||||
@@ -35,7 +35,13 @@ def dao_get_provider_versions(provider_id):
|
||||
|
||||
|
||||
@transactional
|
||||
def dao_reduce_sms_provider_priority(identifier):
|
||||
def dao_reduce_sms_provider_priority(identifier, *, time_threshold):
|
||||
"""
|
||||
Will reduce a chosen sms provider's priority, and increase the other provider's priority by 10 points each.
|
||||
If either provider has been updated in the last `time_threshold`, then it won't take any action.
|
||||
"""
|
||||
amount_to_reduce_by = 10
|
||||
|
||||
# get current priority of both providers
|
||||
q = ProviderDetails.query.filter(
|
||||
ProviderDetails.notification_type == 'sms',
|
||||
@@ -46,8 +52,8 @@ def dao_reduce_sms_provider_priority(identifier):
|
||||
other_identifier = get_alternative_sms_provider(identifier)
|
||||
|
||||
# if something updated recently, don't update again. If the updated_at is null, treat it as min time
|
||||
if any((provider.updated_at or datetime.min) > datetime.utcnow() - timedelta(minutes=10) for provider in q):
|
||||
current_app.logger.info("Not adjusting providers, providers updated less than 10 minutes ago.")
|
||||
if any((provider.updated_at or datetime.min) > datetime.utcnow() - time_threshold for provider in q):
|
||||
current_app.logger.info("Not adjusting providers, providers updated less than {} ago.".format(time_threshold))
|
||||
return
|
||||
|
||||
reduced_provider = providers[identifier]
|
||||
@@ -56,8 +62,8 @@ def dao_reduce_sms_provider_priority(identifier):
|
||||
pre_increase_priority = increased_provider.priority
|
||||
|
||||
# always keep values between 0 and 100
|
||||
reduced_provider.priority = max(0, reduced_provider.priority - 10)
|
||||
increased_provider.priority = min(100, increased_provider.priority + 10)
|
||||
reduced_provider.priority = max(0, reduced_provider.priority - amount_to_reduce_by)
|
||||
increased_provider.priority = min(100, increased_provider.priority + amount_to_reduce_by)
|
||||
|
||||
current_app.logger.info('Adjusting provider priority - {} going from {} to {}'.format(
|
||||
reduced_provider.identifier,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import random
|
||||
from urllib import parse
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from flask import current_app
|
||||
from notifications_utils.recipients import (
|
||||
@@ -66,7 +66,7 @@ def send_sms_to_provider(notification):
|
||||
except Exception as e:
|
||||
notification.billable_units = template.fragment_count
|
||||
dao_update_notification(notification)
|
||||
dao_reduce_sms_provider_priority(provider.get_name())
|
||||
dao_reduce_sms_provider_priority(provider.get_name(), time_threshold=timedelta(minutes=1))
|
||||
raise e
|
||||
else:
|
||||
notification.billable_units = template.fragment_count
|
||||
|
||||
@@ -58,7 +58,7 @@ def test_should_call_delete_codes_on_delete_verify_codes_task(notify_db_session,
|
||||
assert scheduled_tasks.delete_codes_older_created_more_than_a_day_ago.call_count == 1
|
||||
|
||||
|
||||
def test_should_call_delete_invotations_on_delete_invitations_task(notify_api, mocker):
|
||||
def test_should_call_delete_invotations_on_delete_invitations_task(notify_db_session, mocker):
|
||||
mocker.patch('app.celery.scheduled_tasks.delete_invitations_created_more_than_two_days_ago')
|
||||
delete_invitations()
|
||||
assert scheduled_tasks.delete_invitations_created_more_than_two_days_ago.call_count == 1
|
||||
@@ -119,7 +119,7 @@ def test_switch_current_sms_provider_on_slow_delivery_switches_when_one_provider
|
||||
created_at=datetime(2017, 5, 1, 13, 50),
|
||||
delivery_time=timedelta(minutes=4)
|
||||
)
|
||||
mock_reduce.assert_called_once_with('firetext')
|
||||
mock_reduce.assert_called_once_with('firetext', time_threshold=timedelta(minutes=10))
|
||||
|
||||
|
||||
@freeze_time('2017-05-01 14:00:00')
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from freezegun import freeze_time
|
||||
from sqlalchemy.sql import desc
|
||||
|
||||
@@ -153,7 +153,7 @@ def test_reduce_sms_provider_priority_switches_provider(
|
||||
ProviderDetails.query.filter(ProviderDetails.notification_type == 'sms').update({'updated_at': datetime.min})
|
||||
|
||||
# switch away from mmg. currently both 50/50
|
||||
dao_reduce_sms_provider_priority('mmg')
|
||||
dao_reduce_sms_provider_priority('mmg', time_threshold=timedelta(minutes=10))
|
||||
|
||||
assert firetext.priority == expected_priorities['firetext']
|
||||
assert mmg.priority == expected_priorities['mmg']
|
||||
@@ -177,7 +177,7 @@ def test_reduce_sms_provider_priority_adds_rows_to_history_table(
|
||||
desc(ProviderDetailsHistory.version)
|
||||
).all()
|
||||
|
||||
dao_reduce_sms_provider_priority(mmg.identifier)
|
||||
dao_reduce_sms_provider_priority(mmg.identifier, time_threshold=timedelta(minutes=10))
|
||||
|
||||
updated_provider_history_rows = ProviderDetailsHistory.query.filter(
|
||||
ProviderDetailsHistory.id == mmg.id
|
||||
@@ -197,9 +197,11 @@ def test_reduce_sms_provider_priority_does_nothing_if_providers_have_recently_ch
|
||||
mock_is_slow = mocker.patch('app.celery.scheduled_tasks.is_delivery_slow_for_providers')
|
||||
mock_reduce = mocker.patch('app.celery.scheduled_tasks.dao_reduce_sms_provider_priority')
|
||||
ProviderDetails.query.filter(ProviderDetails.identifier == 'firetext').update({'updated_at': datetime.min})
|
||||
ProviderDetails.query.filter(ProviderDetails.identifier == 'mmg').update({'updated_at': datetime.utcnow()})
|
||||
ProviderDetails.query.filter(ProviderDetails.identifier == 'mmg').update(
|
||||
{'updated_at': datetime.utcnow() - timedelta(minutes=4, seconds=59)}
|
||||
)
|
||||
|
||||
dao_reduce_sms_provider_priority('firetext')
|
||||
dao_reduce_sms_provider_priority('firetext', time_threshold=timedelta(minutes=5))
|
||||
|
||||
assert mock_is_slow.called is False
|
||||
assert mock_reduce.called is False
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import uuid
|
||||
from collections import namedtuple
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import ANY
|
||||
|
||||
import pytest
|
||||
@@ -560,12 +560,12 @@ def test_should_update_billable_units_and_status_according_to_research_mode_and_
|
||||
assert notification.status == expected_status
|
||||
|
||||
|
||||
def test_should_set_notification_billable_units_if_sending_to_provider_fails(
|
||||
def test_should_set_notification_billable_units_and_reduces_provider_priority_if_sending_to_provider_fails(
|
||||
sample_notification,
|
||||
mocker,
|
||||
):
|
||||
mocker.patch('app.mmg_client.send_sms', side_effect=Exception())
|
||||
mocker.patch('app.delivery.send_to_providers.dao_reduce_sms_provider_priority')
|
||||
mock_reduce = mocker.patch('app.delivery.send_to_providers.dao_reduce_sms_provider_priority')
|
||||
|
||||
sample_notification.billable_units = 0
|
||||
assert sample_notification.sent_by is None
|
||||
@@ -574,6 +574,7 @@ def test_should_set_notification_billable_units_if_sending_to_provider_fails(
|
||||
send_to_providers.send_sms_to_provider(sample_notification)
|
||||
|
||||
assert sample_notification.billable_units == 1
|
||||
mock_reduce.assert_called_once_with('mmg', time_threshold=timedelta(minutes=1))
|
||||
|
||||
|
||||
def test_should_send_sms_to_international_providers(
|
||||
|
||||
Reference in New Issue
Block a user