From 1b7b26bf24136c9843fda664d71d237e31e450b5 Mon Sep 17 00:00:00 2001 From: Pea Tyczynska Date: Thu, 5 Dec 2019 16:07:06 +0000 Subject: [PATCH] Query directly for services with high failure rate --- app/celery/scheduled_tasks.py | 10 +- app/dao/services_dao.py | 46 +++++-- app/service/statistics.py | 19 --- app/service/utils.py | 22 ---- tests/app/celery/test_scheduled_tasks.py | 11 +- tests/app/dao/test_services_dao.py | 152 ++++++++++------------- tests/app/service/test_statistics.py | 23 +--- tests/app/service/test_utils.py | 37 ------ 8 files changed, 116 insertions(+), 204 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 7c2a88260..225772537 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -35,7 +35,7 @@ from app.dao.notifications_dao import ( ) from app.dao.provider_details_dao import dao_reduce_sms_provider_priority from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago -from app.dao.services_dao import dao_find_services_sending_to_tv_numbers +from app.dao.services_dao import dao_find_services_sending_to_tv_numbers, dao_find_services_with_high_failure_rates from app.models import ( Job, JOB_STATUS_IN_PROGRESS, @@ -46,8 +46,6 @@ from app.models import ( from app.notifications.process_notifications import send_notification_to_queue from app.v2.errors import JobIncompleteError -from app.service.utils import get_services_with_high_failure_rates - @notify_celery.task(name="run-scheduled-jobs") @statsd(namespace="tasks") @@ -265,7 +263,7 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers(): end_date = datetime.utcnow() message = "" - services_with_failures = get_services_with_high_failure_rates(start_date=start_date, end_date=end_date) + services_with_failures = dao_find_services_with_high_failure_rates(start_date=start_date, end_date=end_date) services_sending_to_tv_numbers = dao_find_services_sending_to_tv_numbers(start_date=start_date, end_date=end_date) if services_with_failures: @@ -273,8 +271,8 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers(): len(services_with_failures) ) for service in services_with_failures: - service_dashboard = current_app.config['ADMIN_BASE_URL'] + "/services/" + service["id"] - message += "service: {} failure rate: {},\n".format(service_dashboard, service["permanent_failure_rate"]) + service_dashboard = current_app.config['ADMIN_BASE_URL'] + "/services/" + service.service_id + message += "service: {} failure rate: {},\n".format(service_dashboard, service.permanent_failure_rate) elif services_sending_to_tv_numbers: message += "{} service(s) have sent over 100 sms messages to tv numbers in last 24 hours:\n".format( len(services_sending_to_tv_numbers) diff --git a/app/dao/services_dao.py b/app/dao/services_dao.py index 03ddfd22a..76e75f33a 100644 --- a/app/dao/services_dao.py +++ b/app/dao/services_dao.py @@ -4,6 +4,7 @@ from datetime import date, datetime, timedelta from notifications_utils.statsd_decorators import statsd from sqlalchemy.sql.expression import asc, case, and_, func from sqlalchemy.orm import joinedload +from sqlalchemy import cast, Float from flask import current_app from app import db @@ -44,6 +45,7 @@ from app.models import ( KEY_TYPE_TEST, NHS_ORGANISATION_TYPES, NON_CROWN_ORGANISATION_TYPES, + NOTIFICATION_PERMANENT_FAILURE, SMS_TYPE, LETTER_TYPE, ) @@ -544,12 +546,10 @@ def dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=100) ).all() -def dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date): - # only works within services' retention period - return db.session.query( - Notification.service_id.label('service_id'), - Notification.status.label('status'), - func.count(Notification.id).label('count') +def dao_find_services_with_high_failure_rates(start_date, end_date, threshold=100): + subquery = db.session.query( + func.count(Notification.id).label('total_count'), + Notification.service_id.label('service_id') ).filter( Notification.service_id == Service.id, Notification.created_at >= start_date, @@ -561,5 +561,35 @@ def dao_find_real_sms_notification_count_by_status_for_live_services(start_date, Service.active == True, ).group_by( Notification.service_id, - Notification.status - ).all() + ).having( + func.count(Notification.id) >= threshold + ) + + subquery = subquery.subquery() + + query = db.session.query( + Notification.service_id.label('service_id'), + func.count(Notification.id).label('permanent_failure_count'), + subquery.c.total_count.label('total_count'), + (cast(func.count(Notification.id), Float) / cast(subquery.c.total_count, Float)).label('permanent_failure_rate') + ).join( + subquery, + subquery.c.service_id == Notification.service_id + ).filter( + Notification.service_id == Service.id, + Notification.created_at >= start_date, + Notification.created_at <= end_date, + Notification.key_type != KEY_TYPE_TEST, + Notification.notification_type == SMS_TYPE, + Notification.status == NOTIFICATION_PERMANENT_FAILURE, + Service.restricted == False, # noqa + Service.research_mode == False, + Service.active == True, + ).group_by( + Notification.service_id, + subquery.c.total_count + ).having( + cast(func.count(Notification.id), Float) / cast(subquery.c.total_count, Float) >= 0.25 + ) + + return query.all() diff --git a/app/service/statistics.py b/app/service/statistics.py index 3c51ee343..5c6accaa5 100644 --- a/app/service/statistics.py +++ b/app/service/statistics.py @@ -21,18 +21,6 @@ def format_statistics(statistics): return counts -def get_rate_of_permanent_failures_for_service(statistics, threshold=100): - counts = {"permanent_failure": 0, "all_other_statuses": 0} - for row in statistics: - _count_if_status_is_permanent_failure_from_row(counts, row) - - if counts['permanent_failure'] + counts['all_other_statuses'] >= threshold: - rate = counts['permanent_failure'] / (counts['permanent_failure'] + counts['all_other_statuses']) - else: - rate = 0 - return rate - - def format_admin_stats(statistics): counts = create_stats_dict() @@ -106,13 +94,6 @@ def _update_statuses_from_row(update_dict, row): update_dict['failed'] += row.count -def _count_if_status_is_permanent_failure_from_row(update_dict, row): - if row.status == 'permanent-failure': - update_dict['permanent_failure'] += row.count - else: - update_dict['all_other_statuses'] += row.count - - def create_empty_monthly_notification_status_stats_dict(year): utc_month_starts = get_months_for_financial_year(year) # nested dicts - data[month][template type][status] = count diff --git a/app/service/utils.py b/app/service/utils.py index 0b7865928..16521fbbd 100644 --- a/app/service/utils.py +++ b/app/service/utils.py @@ -2,15 +2,11 @@ import itertools from notifications_utils.recipients import allowed_to_send_to -from app.dao.services_dao import dao_find_real_sms_notification_count_by_status_for_live_services - from app.models import ( ServiceWhitelist, MOBILE_TYPE, EMAIL_TYPE, KEY_TYPE_TEST, KEY_TYPE_TEAM, KEY_TYPE_NORMAL) -from app.service import statistics - def get_recipients_from_request(request_json, key, type): return [(type, recipient) for recipient in request_json.get(key)] @@ -56,21 +52,3 @@ def service_allowed_to_send_to(recipient, service, key_type, allow_whitelisted_r whitelist_members ) ) - - -def get_services_with_high_failure_rates(start_date, end_date, rate=0.25, threshold=100): - stats = dao_find_real_sms_notification_count_by_status_for_live_services( - start_date=start_date, - end_date=end_date, - ) - results = [] - for service_id, rows in itertools.groupby(stats, lambda x: x.service_id): - rows = list(rows) - - permanent_failure_rate = statistics.get_rate_of_permanent_failures_for_service(rows, threshold=threshold) - if permanent_failure_rate >= rate: - results.append({ - 'id': str(rows[0].service_id), - 'permanent_failure_rate': permanent_failure_rate - }) - return results diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index e6703e4d5..cd4848c4d 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -505,11 +505,18 @@ MockServicesSendingToTVNumbers = namedtuple( 'notification_count', ] ) +MockServicesWithHighFailureRate = namedtuple( + 'ServicesWithHighFailureRate', + [ + 'service_id', + 'permanent_failure_rate', + ] +) @pytest.mark.parametrize("failure_rates, sms_to_tv_numbers, expected_message", [ [ - [{"id": "123", "permanent_failure_rate": 0.3}], + [MockServicesWithHighFailureRate("123", 0.3)], [], "1 service(s) have had high permanent-failure rates for sms messages in last " "24 hours:\nservice: {} failure rate: 0.3,\n".format( @@ -531,7 +538,7 @@ def test_check_for_services_with_high_failure_rates_or_sending_to_tv_numbers( mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception') mock_create_ticket = mocker.patch('app.celery.scheduled_tasks.zendesk_client.create_ticket') mock_failure_rates = mocker.patch( - 'app.celery.scheduled_tasks.get_services_with_high_failure_rates', return_value=failure_rates + 'app.celery.scheduled_tasks.dao_find_services_with_high_failure_rates', return_value=failure_rates ) mock_sms_to_tv_numbers = mocker.patch( 'app.celery.scheduled_tasks.dao_find_services_sending_to_tv_numbers', return_value=sms_to_tv_numbers diff --git a/tests/app/dao/test_services_dao.py b/tests/app/dao/test_services_dao.py index df03a7a6f..5769f94f4 100644 --- a/tests/app/dao/test_services_dao.py +++ b/tests/app/dao/test_services_dao.py @@ -8,79 +8,49 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm.exc import NoResultFound from app import db -from app.dao.inbound_numbers_dao import ( - dao_set_inbound_number_to_service, - dao_get_available_inbound_numbers, - dao_set_inbound_number_active_flag -) +from app.dao.inbound_numbers_dao import (dao_get_available_inbound_numbers, + dao_set_inbound_number_active_flag, + dao_set_inbound_number_to_service) from app.dao.organisation_dao import dao_add_service_to_organisation -from app.dao.service_permissions_dao import dao_add_service_permission, dao_remove_service_permission -from app.dao.services_dao import ( - dao_create_service, - dao_add_user_to_service, - dao_remove_user_from_service, - dao_fetch_all_services, - dao_fetch_live_services_data, - dao_fetch_service_by_id, - dao_fetch_all_services_by_user, - dao_find_real_sms_notification_count_by_status_for_live_services, - dao_find_services_sending_to_tv_numbers, - dao_update_service, - delete_service_and_all_associated_db_objects, - dao_fetch_stats_for_service, - dao_fetch_todays_stats_for_service, - fetch_todays_total_message_count, - dao_fetch_todays_stats_for_all_services, - dao_suspend_service, - dao_resume_service, - dao_fetch_active_users_for_service, - dao_fetch_service_by_inbound_number, - get_services_by_partial_name, -) -from app.dao.service_user_dao import dao_get_service_user, dao_update_service_user -from app.dao.users_dao import save_model_user, create_user_code -from app.models import ( - VerifyCode, - ApiKey, - Template, - TemplateHistory, - Job, - Notification, - NotificationHistory, - Permission, - User, - InvitedUser, - Service, - ServicePermission, - ServiceUser, - KEY_TYPE_NORMAL, - KEY_TYPE_TEAM, - KEY_TYPE_TEST, - EMAIL_TYPE, - SMS_TYPE, - INTERNATIONAL_SMS_TYPE, - LETTER_TYPE, - user_folder_permissions, - Organisation -) -from tests.app.db import ( - create_ft_billing, - create_inbound_number, - create_organisation, - create_user, - create_service, - create_service_with_inbound_number, - create_service_with_defined_sms_sender, - create_template, - create_template_folder, - create_notification, - create_api_key, - create_invited_user, - create_email_branding, - create_letter_branding, - create_notification_history, - create_annual_billing, -) +from app.dao.service_permissions_dao import (dao_add_service_permission, + dao_remove_service_permission) +from app.dao.service_user_dao import (dao_get_service_user, + dao_update_service_user) +from app.dao.services_dao import (dao_add_user_to_service, dao_create_service, + dao_fetch_active_users_for_service, + dao_fetch_all_services, + dao_fetch_all_services_by_user, + dao_fetch_live_services_data, + dao_fetch_service_by_id, + dao_fetch_service_by_inbound_number, + dao_fetch_stats_for_service, + dao_fetch_todays_stats_for_all_services, + dao_fetch_todays_stats_for_service, + dao_find_services_sending_to_tv_numbers, + dao_find_services_with_high_failure_rates, + dao_remove_user_from_service, + dao_resume_service, dao_suspend_service, + dao_update_service, + delete_service_and_all_associated_db_objects, + fetch_todays_total_message_count, + get_services_by_partial_name) +from app.dao.users_dao import create_user_code, save_model_user +from app.models import (EMAIL_TYPE, INTERNATIONAL_SMS_TYPE, KEY_TYPE_NORMAL, + KEY_TYPE_TEAM, KEY_TYPE_TEST, LETTER_TYPE, SMS_TYPE, + ApiKey, InvitedUser, Job, Notification, + NotificationHistory, Organisation, Permission, Service, + ServicePermission, ServiceUser, Template, + TemplateHistory, User, VerifyCode, + user_folder_permissions) +from tests.app.db import (create_annual_billing, create_api_key, + create_email_branding, create_ft_billing, + create_inbound_number, create_invited_user, + create_letter_branding, create_notification, + create_notification_history, create_organisation, + create_service, + create_service_with_defined_sms_sender, + create_service_with_inbound_number, create_template, + create_template_folder, create_user) def test_should_have_decorated_services_dao_functions(): @@ -1106,9 +1076,9 @@ def create_email_sms_letter_template(): @freeze_time("2019-12-02 12:00:00.000000") def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid): service_1 = create_service(service_name="Service 1", service_id=fake_uuid) - service_3 = create_service(service_name="Service 3", restricted=True) # restricted - service_4 = create_service(service_name="Service 4", research_mode=True) # research mode - service_5 = create_service(service_name="Service 5", active=False) # not active + service_3 = create_service(service_name="Service 3", restricted=True) # restricted is excluded + service_4 = create_service(service_name="Service 4", research_mode=True) # research mode is excluded + service_5 = create_service(service_name="Service 5", active=False) # not active is excluded services = [service_1, service_3, service_4, service_5] tv_number = "447700900001" @@ -1120,13 +1090,13 @@ def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid): for x in range(0, 5): create_notification(template, normalised_to=tv_number, status="permanent-failure") - service_6 = create_service(service_name="Service 6") # notifications too old + service_6 = create_service(service_name="Service 6") # notifications too old are excluded with freeze_time("2019-11-30 15:00:00.000000"): template_6 = create_template(service_6) for x in range(0, 5): create_notification(template_6, normalised_to=tv_number, status="permanent-failure") - service_2 = create_service(service_name="Service 2") # below threshold + service_2 = create_service(service_name="Service 2") # below threshold is excluded template_2 = create_template(service_2) create_notification(template_2, normalised_to=tv_number, status="permanent-failure") for x in range(0, 5): @@ -1141,11 +1111,11 @@ def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid): assert str(result[0].service_id) == fake_uuid -def test_dao_find_real_sms_notification_count_by_status_for_live_services(notify_db_session, fake_uuid): +def test_dao_find_services_with_high_failure_rates(notify_db_session, fake_uuid): service_1 = create_service(service_name="Service 1", service_id=fake_uuid) - service_3 = create_service(service_name="Service 3", restricted=True) # restricted - service_4 = create_service(service_name="Service 4", research_mode=True) # research mode - service_5 = create_service(service_name="Service 5", active=False) # not active + service_3 = create_service(service_name="Service 3", restricted=True) # restricted is excluded + service_4 = create_service(service_name="Service 4", research_mode=True) # research mode is excluded + service_5 = create_service(service_name="Service 5", active=False) # not active is excluded services = [service_1, service_3, service_4, service_5] for service in services: @@ -1153,21 +1123,27 @@ def test_dao_find_real_sms_notification_count_by_status_for_live_services(notify for x in range(0, 3): create_notification(template, status="permanent-failure") create_notification(template, status="delivered") + create_notification(template, status="sending") create_notification(template, status="temporary-failure") - service_6 = create_service(service_name="Service 6") # notifications too old + service_6 = create_service(service_name="Service 6") with freeze_time("2019-11-30 15:00:00.000000"): template_6 = create_template(service_6) - for x in range(0, 2): - create_notification(template_6, status="permanent-failure") + for x in range(0, 4): + create_notification(template_6, status="permanent-failure") # notifications too old are excluded - service_2 = create_service(service_name="Service 2") # below threshold + service_2 = create_service(service_name="Service 2") template_2 = create_template(service_2) - create_notification(template_2, status="permanent-failure", key_type='test') # test key type + for x in range(0, 4): + create_notification(template_2, status="permanent-failure", key_type='test') # test key type is excluded + create_notification(template_2, status="permanent-failure") # below threshold is excluded start_date = (datetime.utcnow() - timedelta(days=1)) end_date = datetime.utcnow() - result = dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date) - assert len(result) == 3 + result = dao_find_services_with_high_failure_rates(start_date, end_date, threshold=3) + # assert len(result) == 3 + # assert str(result[0].service_id) == fake_uuid + assert len(result) == 1 assert str(result[0].service_id) == fake_uuid + assert result[0].permanent_failure_rate == 0.25 diff --git a/tests/app/service/test_statistics.py b/tests/app/service/test_statistics.py index 90947f1ae..ba40dc796 100644 --- a/tests/app/service/test_statistics.py +++ b/tests/app/service/test_statistics.py @@ -11,8 +11,7 @@ from app.service.statistics import ( create_stats_dict, create_zeroed_stats_dicts, format_admin_stats, - format_statistics, - get_rate_of_permanent_failures_for_service + format_statistics ) StatsRow = collections.namedtuple('row', ('notification_type', 'status', 'count')) @@ -74,26 +73,6 @@ def test_format_statistics(stats, email_counts, sms_counts, letter_counts): } -@pytest.mark.idparametrize("statistics, expected_result", { - 'counts_rate_for_sms': ([ - StatsRow('sms', 'permanent-failure', 100), - StatsRow('sms', 'delivered', 300), - ], 0.25), - 'only_counts_permanent_failure_as_failed': ([ - StatsRow('sms', 'permanent-failure', 100), - StatsRow('sms', 'temporary-failure', 100), - StatsRow('sms', 'delivered', 300), - ], 0.2), - 'below_threshold': ([ - StatsRow('sms', 'permanent-failure', 5), - StatsRow('sms', 'delivered', 3), - ], 0), -}) -def test_get_rate_of_permanent_failures_for_service(statistics, expected_result): - rate = get_rate_of_permanent_failures_for_service(statistics) - assert rate == expected_result - - def test_create_zeroed_stats_dicts(): assert create_zeroed_stats_dicts() == { 'sms': {'requested': 0, 'delivered': 0, 'failed': 0}, diff --git a/tests/app/service/test_utils.py b/tests/app/service/test_utils.py index 2e107dada..71b2770bb 100644 --- a/tests/app/service/test_utils.py +++ b/tests/app/service/test_utils.py @@ -1,8 +1,5 @@ from app.dao.date_util import get_current_financial_year_start_year from freezegun import freeze_time -from app.service.utils import get_services_with_high_failure_rates -from collections import namedtuple -from datetime import datetime, timedelta # see get_financial_year for conversion of financial years. @@ -16,37 +13,3 @@ def test_get_current_financial_year_start_year_before_march(): def test_get_current_financial_year_start_year_after_april(): current_fy = get_current_financial_year_start_year() assert current_fy == 2017 - - -MockServicesNotificationCounts = namedtuple( - 'ServicesSendingToTVNumbers', - [ - 'service_id', - 'status', - 'count', - ] -) - - -@freeze_time("2019-12-02 12:00:00.000000") -def test_get_services_with_high_failure_rates(mocker, notify_db_session): - mock_query_results = [ - MockServicesNotificationCounts('123', 'delivered', 150), - MockServicesNotificationCounts('123', 'permanent-failure', 50), # these will show up - MockServicesNotificationCounts('456', 'delivered', 150), - MockServicesNotificationCounts('456', 'permanent-failure', 5), # ratio too low - MockServicesNotificationCounts('789', 'permanent-failure', 5), # below threshold - MockServicesNotificationCounts('444', 'delivered', 100), - MockServicesNotificationCounts('444', 'permanent-failure', 100), # these will show up - ] - mocker.patch( - 'app.service.utils.dao_find_real_sms_notification_count_by_status_for_live_services', - return_value=mock_query_results - ) - start_date = (datetime.utcnow() - timedelta(days=1)) - end_date = datetime.utcnow() - - assert get_services_with_high_failure_rates(start_date, end_date) == [ - {'id': '123', 'permanent_failure_rate': 0.25}, - {'id': '444', 'permanent_failure_rate': 0.5} - ]