From 31d1abd6d19bcbec4ef9af34d791d905562c3755 Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Mon, 9 Dec 2019 15:55:36 +0000 Subject: [PATCH] add task to move sms providers back towards shared load we generally aim to share the load between the two providers equally (more or less). When one provider has struggled, we deprioritise them, this commit adds a function that gradually restores balance. It checks every five minutes, if it's been more than an hour since the providers were last changed then it adjusts them towards a 50/50 split. Except it's not quite 50/50 due to #reasons (we want to slightly favour MMG), it's actually 60/40. That's defined in a new dict in config.py. --- app/celery/scheduled_tasks.py | 11 ++++++++- app/config.py | 11 +++++++++ app/dao/provider_details_dao.py | 43 ++++++++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 3e58dfc02..52831ebc1 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -33,7 +33,10 @@ from app.dao.notifications_dao import ( letters_missing_from_sending_bucket, is_delivery_slow_for_providers, ) -from app.dao.provider_details_dao import dao_reduce_sms_provider_priority +from app.dao.provider_details_dao import ( + dao_reduce_sms_provider_priority, + dao_adjust_provider_priority_back_to_resting_points +) from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago from app.dao.services_dao import dao_find_services_sending_to_tv_numbers, dao_find_services_with_high_failure_rates from app.models import ( @@ -126,6 +129,12 @@ def switch_current_sms_provider_on_slow_delivery(): dao_reduce_sms_provider_priority(provider_name, time_threshold=timedelta(minutes=10)) +@notify_celery.task(name='tend-providers-back-to-middle') +@statsd(namespace='tasks') +def tend_providers_back_to_middle(): + dao_adjust_provider_priority_back_to_resting_points() + + @notify_celery.task(name='check-job-status') @statsd(namespace="tasks") def check_job_status(): diff --git a/app/config.py b/app/config.py index f248cf9ed..49136a50c 100644 --- a/app/config.py +++ b/app/config.py @@ -135,6 +135,12 @@ class Config(object): CHECK_PROXY_HEADER = False + # these should always add up to 100% + SMS_PROVIDER_RESTING_POINTS = { + 'mmg': 60, + 'firetext': 40 + } + NOTIFY_SERVICE_ID = 'd6aa2c68-a2d9-4437-ab19-3ae8eb202553' NOTIFY_USER_ID = '6af522d0-2915-4e52-83a3-3690455a5fe6' INVITATION_EMAIL_TEMPLATE_ID = '4f46df42-f795-4cc4-83bb-65ca312f49cc' @@ -198,6 +204,11 @@ class Config(object): 'schedule': crontab(), 'options': {'queue': QueueNames.PERIODIC} }, + 'tend-providers-back-to-middle': { + 'task': 'tend-providers-back-to-middle', + 'schedule': crontab(minute='*/5'), + 'options': {'queue': QueueNames.PERIODIC} + }, 'check-for-missing-rows-in-completed-jobs': { 'task': 'check-for-missing-rows-in-completed-jobs', 'schedule': crontab(minute='*/10'), diff --git a/app/dao/provider_details_dao.py b/app/dao/provider_details_dao.py index 1c4977bd8..d2da2efd4 100644 --- a/app/dao/provider_details_dao.py +++ b/app/dao/provider_details_dao.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timedelta from notifications_utils.timezones import convert_utc_to_bst from sqlalchemy import asc, desc, func @@ -87,6 +87,47 @@ def dao_reduce_sms_provider_priority(identifier, *, time_threshold): _update_provider_details_without_commit(increased_provider) +@transactional +def dao_adjust_provider_priority_back_to_resting_points(): + """ + Provided that neither SMS provider has been modified in the last hour, move both providers by 10 percentage points + each towards their defined resting points (set in SMS_PROVIDER_RESTING_POINTS in config.py). + """ + amount_to_reduce_by = 10 + time_threshold = timedelta(hours=1) + + # get current priority of both providers + providers = ProviderDetails.query.filter( + ProviderDetails.notification_type == 'sms', + ProviderDetails.active + ).with_for_update().all() + + # if something updated recently, don't update again. If the updated_at is null, treat it as min time + if any((provider.updated_at or datetime.min) > datetime.utcnow() - time_threshold for provider in providers): + current_app.logger.info("Not adjusting providers, providers updated less than {} ago.".format(time_threshold)) + return + + # Automatic update so set as notify user + notify_user = get_user_by_id(current_app.config['NOTIFY_USER_ID']) + for provider in providers: + target = current_app.config['SMS_PROVIDER_RESTING_POINTS'][provider.identifier] + current = provider.priority + + if current != target: + if current > target: + provider.priority = max(provider.priority - amount_to_reduce_by, target) + else: + provider.priority = min(provider.priority + amount_to_reduce_by, target) + + provider.created_by_id = notify_user.id + _update_provider_details_without_commit(provider) + current_app.logger.info('Adjusting provider priority - {} going from {} to {}'.format( + provider.identifier, + current, + provider.priority, + )) + + def get_provider_details_by_notification_type(notification_type, supports_international=False): filters = [ProviderDetails.notification_type == notification_type]