mirror of
https://github.com/GSA/notifications-api.git
synced 2026-02-04 10:21:14 -05:00
make 500s change priorities quicker
it's not acceptable for a constantly failing provider to take 50 minutes to drain (5x reducing priority by 10). But similarly, we need _some_ delay, or a handful of concurrent failures will completely turn off a provider, rendering the whole excercise kinda pointless. Setting the delay before it tries to reduce priority again to one minute is nice because it means that if one request times out and returns 502, then any other requests that are in flight at that time will time out before the one minute is up and not switch, but any requests made after the switch that take sixty seconds to time out will affect it.
This commit is contained in:
@@ -122,7 +122,7 @@ def switch_current_sms_provider_on_slow_delivery():
|
||||
for provider_name, is_slow in slow_delivery_notifications.items():
|
||||
if is_slow:
|
||||
current_app.logger.warning('Slow delivery notifications detected for provider {}'.format(provider_name))
|
||||
dao_reduce_sms_provider_priority(provider_name)
|
||||
dao_reduce_sms_provider_priority(provider_name, time_threshold=timedelta(minutes=10))
|
||||
|
||||
|
||||
@notify_celery.task(name='check-job-status')
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime
|
||||
|
||||
from notifications_utils.timezones import convert_utc_to_bst
|
||||
from sqlalchemy import asc, desc, func
|
||||
@@ -35,7 +35,13 @@ def dao_get_provider_versions(provider_id):
|
||||
|
||||
|
||||
@transactional
|
||||
def dao_reduce_sms_provider_priority(identifier):
|
||||
def dao_reduce_sms_provider_priority(identifier, *, time_threshold):
|
||||
"""
|
||||
Will reduce a chosen sms provider's priority, and increase the other provider's priority by 10 points each.
|
||||
If either provider has been updated in the last `time_threshold`, then it won't take any action.
|
||||
"""
|
||||
amount_to_reduce_by = 10
|
||||
|
||||
# get current priority of both providers
|
||||
q = ProviderDetails.query.filter(
|
||||
ProviderDetails.notification_type == 'sms',
|
||||
@@ -46,8 +52,8 @@ def dao_reduce_sms_provider_priority(identifier):
|
||||
other_identifier = get_alternative_sms_provider(identifier)
|
||||
|
||||
# if something updated recently, don't update again. If the updated_at is null, treat it as min time
|
||||
if any((provider.updated_at or datetime.min) > datetime.utcnow() - timedelta(minutes=10) for provider in q):
|
||||
current_app.logger.info("Not adjusting providers, providers updated less than 10 minutes ago.")
|
||||
if any((provider.updated_at or datetime.min) > datetime.utcnow() - time_threshold for provider in q):
|
||||
current_app.logger.info("Not adjusting providers, providers updated less than {} ago.".format(time_threshold))
|
||||
return
|
||||
|
||||
reduced_provider = providers[identifier]
|
||||
@@ -56,8 +62,8 @@ def dao_reduce_sms_provider_priority(identifier):
|
||||
pre_increase_priority = increased_provider.priority
|
||||
|
||||
# always keep values between 0 and 100
|
||||
reduced_provider.priority = max(0, reduced_provider.priority - 10)
|
||||
increased_provider.priority = min(100, increased_provider.priority + 10)
|
||||
reduced_provider.priority = max(0, reduced_provider.priority - amount_to_reduce_by)
|
||||
increased_provider.priority = min(100, increased_provider.priority + amount_to_reduce_by)
|
||||
|
||||
current_app.logger.info('Adjusting provider priority - {} going from {} to {}'.format(
|
||||
reduced_provider.identifier,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import random
|
||||
from urllib import parse
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from flask import current_app
|
||||
from notifications_utils.recipients import (
|
||||
@@ -66,7 +66,7 @@ def send_sms_to_provider(notification):
|
||||
except Exception as e:
|
||||
notification.billable_units = template.fragment_count
|
||||
dao_update_notification(notification)
|
||||
dao_reduce_sms_provider_priority(provider.get_name())
|
||||
dao_reduce_sms_provider_priority(provider.get_name(), time_threshold=timedelta(minutes=1))
|
||||
raise e
|
||||
else:
|
||||
notification.billable_units = template.fragment_count
|
||||
|
||||
Reference in New Issue
Block a user