From cfbb080f573f3a318d447fb5d735e6bf69138c92 Mon Sep 17 00:00:00 2001 From: Pea Tyczynska Date: Tue, 3 Dec 2019 16:18:07 +0000 Subject: [PATCH] Simplify failure rate by building separate query --- app/celery/scheduled_tasks.py | 7 +--- app/dao/services_dao.py | 24 ++++++++++-- app/service/statistics.py | 3 +- app/service/utils.py | 26 ++++++------- tests/app/celery/test_scheduled_tasks.py | 6 ++- .../dao/test_fact_notification_status_dao.py | 2 +- tests/app/dao/test_services_dao.py | 39 +++++++++++++++++-- tests/app/service/test_statistics.py | 6 --- tests/app/service/test_utils.py | 1 - 9 files changed, 77 insertions(+), 37 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index fba91b8c4..d6397754e 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -266,11 +266,7 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers(): message = "" services_with_failures = get_services_with_high_failure_rates(start_date=start_date, end_date=end_date) - services_sending_to_tv_numbers = dao_find_services_sending_to_tv_numbers( - threshold=100, - start_date=start_date, - end_date=end_date - ) + services_sending_to_tv_numbers = dao_find_services_sending_to_tv_numbers(start_date=start_date, end_date=end_date) if services_with_failures: message += "{} service(s) have had high permanent-failure rates for sms messages in last 24 hours:\n".format( @@ -291,6 +287,7 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers(): current_app.logger.exception(message) if current_app.config['NOTIFY_ENVIRONMENT'] in ['live', 'production', 'test']: + message += "\nThings to do: contact service? revoke their key?" zendesk_client.create_ticket( subject="[{}] High failure rates for sms spotted for services".format( current_app.config['NOTIFY_ENVIRONMENT'] diff --git a/app/dao/services_dao.py b/app/dao/services_dao.py index 3ec010093..03ddfd22a 100644 --- a/app/dao/services_dao.py +++ b/app/dao/services_dao.py @@ -524,9 +524,7 @@ def dao_fetch_active_users_for_service(service_id): def dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=100): - return db.session.query( - Service.name.label('service_name'), Notification.service_id.label('service_id'), func.count(Notification.id).label('notification_count') ).filter( @@ -541,7 +539,27 @@ def dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=100) Service.active == True, ).group_by( Notification.service_id, - Service.name ).having( func.count(Notification.id) > threshold ).all() + + +def dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date): + # only works within services' retention period + return db.session.query( + Notification.service_id.label('service_id'), + Notification.status.label('status'), + func.count(Notification.id).label('count') + ).filter( + Notification.service_id == Service.id, + Notification.created_at >= start_date, + Notification.created_at <= end_date, + Notification.key_type != KEY_TYPE_TEST, + Notification.notification_type == SMS_TYPE, + Service.restricted == False, # noqa + Service.research_mode == False, + Service.active == True, + ).group_by( + Notification.service_id, + Notification.status + ).all() diff --git a/app/service/statistics.py b/app/service/statistics.py index 295be4fb1..3c51ee343 100644 --- a/app/service/statistics.py +++ b/app/service/statistics.py @@ -24,8 +24,7 @@ def format_statistics(statistics): def get_rate_of_permanent_failures_for_service(statistics, threshold=100): counts = {"permanent_failure": 0, "all_other_statuses": 0} for row in statistics: - if row.notification_type == 'sms': - _count_if_status_is_permanent_failure_from_row(counts, row) + _count_if_status_is_permanent_failure_from_row(counts, row) if counts['permanent_failure'] + counts['all_other_statuses'] >= threshold: rate = counts['permanent_failure'] / (counts['permanent_failure'] + counts['all_other_statuses']) diff --git a/app/service/utils.py b/app/service/utils.py index 932df29e0..0b7865928 100644 --- a/app/service/utils.py +++ b/app/service/utils.py @@ -2,6 +2,8 @@ import itertools from notifications_utils.recipients import allowed_to_send_to +from app.dao.services_dao import dao_find_real_sms_notification_count_by_status_for_live_services + from app.models import ( ServiceWhitelist, MOBILE_TYPE, EMAIL_TYPE, @@ -9,8 +11,6 @@ from app.models import ( from app.service import statistics -from app.dao.fact_notification_status_dao import fetch_stats_for_all_services_by_date_range - def get_recipients_from_request(request_json, key, type): return [(type, recipient) for recipient in request_json.get(key)] @@ -59,20 +59,18 @@ def service_allowed_to_send_to(recipient, service, key_type, allow_whitelisted_r def get_services_with_high_failure_rates(start_date, end_date, rate=0.25, threshold=100): - stats = fetch_stats_for_all_services_by_date_range( - start_date=start_date.date(), - end_date=end_date.date(), - include_from_test_key=False, + stats = dao_find_real_sms_notification_count_by_status_for_live_services( + start_date=start_date, + end_date=end_date, ) results = [] for service_id, rows in itertools.groupby(stats, lambda x: x.service_id): rows = list(rows) - if not rows[0].restricted and not rows[0].research_mode and rows[0].active: - permanent_failure_rate = statistics.get_rate_of_permanent_failures_for_service(rows, threshold=threshold) - if permanent_failure_rate >= rate: - results.append({ - 'id': str(rows[0].service_id), - 'name': rows[0].name, - 'permanent_failure_rate': permanent_failure_rate - }) + + permanent_failure_rate = statistics.get_rate_of_permanent_failures_for_service(rows, threshold=threshold) + if permanent_failure_rate >= rate: + results.append({ + 'id': str(rows[0].service_id), + 'permanent_failure_rate': permanent_failure_rate + }) return results diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index ee4ec14ff..79143e673 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -510,7 +510,7 @@ MockServicesSendingToTVNumbers = namedtuple( @pytest.mark.parametrize("failure_rates, sms_to_tv_numbers, expected_message", [ [ - [{"id": "123", "name": "Service 1", "permanent_failure_rate": 0.3}], + [{"id": "123", "permanent_failure_rate": 0.3}], [], "1 service(s) have had high permanent-failure rates for sms messages in last " "24 hours:\nservice id: 123 failure rate: 0.3,\n" @@ -534,13 +534,15 @@ def test_check_for_services_with_high_failure_rates_or_sending_to_tv_numbers( 'app.celery.scheduled_tasks.dao_find_services_sending_to_tv_numbers', return_value=sms_to_tv_numbers ) + zendesk_actions = "\nThings to do: contact service? revoke their key?" + check_for_services_with_high_failure_rates_or_sending_to_tv_numbers() assert mock_failure_rates.called assert mock_sms_to_tv_numbers.called mock_logger.assert_called_once_with(expected_message) mock_create_ticket.assert_called_with( - message=expected_message, + message=expected_message + zendesk_actions, subject="[test] High failure rates for sms spotted for services", ticket_type='incident' ) diff --git a/tests/app/dao/test_fact_notification_status_dao.py b/tests/app/dao/test_fact_notification_status_dao.py index 65687f9d8..8e2ce9afd 100644 --- a/tests/app/dao/test_fact_notification_status_dao.py +++ b/tests/app/dao/test_fact_notification_status_dao.py @@ -14,7 +14,7 @@ from app.dao.fact_notification_status_dao import ( fetch_notification_status_totals_for_all_services, fetch_notification_statuses_for_job, fetch_stats_for_all_services_by_date_range, fetch_monthly_template_usage_for_service, - get_total_sent_notifications_for_day_and_type, + get_total_sent_notifications_for_day_and_type ) from app.models import ( FactNotificationStatus, diff --git a/tests/app/dao/test_services_dao.py b/tests/app/dao/test_services_dao.py index 4be4e1787..df03a7a6f 100644 --- a/tests/app/dao/test_services_dao.py +++ b/tests/app/dao/test_services_dao.py @@ -23,6 +23,7 @@ from app.dao.services_dao import ( dao_fetch_live_services_data, dao_fetch_service_by_id, dao_fetch_all_services_by_user, + dao_find_real_sms_notification_count_by_status_for_live_services, dao_find_services_sending_to_tv_numbers, dao_update_service, delete_service_and_all_associated_db_objects, @@ -1103,8 +1104,8 @@ def create_email_sms_letter_template(): @freeze_time("2019-12-02 12:00:00.000000") -def test_dao_find_services_sending_to_tv_numbers(notify_db_session): - service_1 = create_service(service_name="Service 1") +def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid): + service_1 = create_service(service_name="Service 1", service_id=fake_uuid) service_3 = create_service(service_name="Service 3", restricted=True) # restricted service_4 = create_service(service_name="Service 4", research_mode=True) # research mode service_5 = create_service(service_name="Service 5", active=False) # not active @@ -1137,4 +1138,36 @@ def test_dao_find_services_sending_to_tv_numbers(notify_db_session): result = dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=4) assert len(result) == 1 - assert result[0].service_name == "Service 1" + assert str(result[0].service_id) == fake_uuid + + +def test_dao_find_real_sms_notification_count_by_status_for_live_services(notify_db_session, fake_uuid): + service_1 = create_service(service_name="Service 1", service_id=fake_uuid) + service_3 = create_service(service_name="Service 3", restricted=True) # restricted + service_4 = create_service(service_name="Service 4", research_mode=True) # research mode + service_5 = create_service(service_name="Service 5", active=False) # not active + services = [service_1, service_3, service_4, service_5] + + for service in services: + template = create_template(service) + for x in range(0, 3): + create_notification(template, status="permanent-failure") + create_notification(template, status="delivered") + create_notification(template, status="temporary-failure") + + service_6 = create_service(service_name="Service 6") # notifications too old + with freeze_time("2019-11-30 15:00:00.000000"): + template_6 = create_template(service_6) + for x in range(0, 2): + create_notification(template_6, status="permanent-failure") + + service_2 = create_service(service_name="Service 2") # below threshold + template_2 = create_template(service_2) + create_notification(template_2, status="permanent-failure", key_type='test') # test key type + + start_date = (datetime.utcnow() - timedelta(days=1)) + end_date = datetime.utcnow() + + result = dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date) + assert len(result) == 3 + assert str(result[0].service_id) == fake_uuid diff --git a/tests/app/service/test_statistics.py b/tests/app/service/test_statistics.py index abf3369ec..90947f1ae 100644 --- a/tests/app/service/test_statistics.py +++ b/tests/app/service/test_statistics.py @@ -79,12 +79,6 @@ def test_format_statistics(stats, email_counts, sms_counts, letter_counts): StatsRow('sms', 'permanent-failure', 100), StatsRow('sms', 'delivered', 300), ], 0.25), - 'ignores_other_notification_types': ([ - StatsRow('sms', 'permanent-failure', 100), - StatsRow('sms', 'delivered', 300), - StatsRow('email', 'delivered', 300), - StatsRow('letter', 'created', 300), - ], 0.25), 'only_counts_permanent_failure_as_failed': ([ StatsRow('sms', 'permanent-failure', 100), StatsRow('sms', 'temporary-failure', 100), diff --git a/tests/app/service/test_utils.py b/tests/app/service/test_utils.py index 32a8877e0..b8be1ad78 100644 --- a/tests/app/service/test_utils.py +++ b/tests/app/service/test_utils.py @@ -45,6 +45,5 @@ def test_get_services_with_high_failure_rates(notify_db_session): assert get_services_with_high_failure_rates(start_date, end_date, threshold=3) == [{ 'id': str(service_1.id), - 'name': service_1.name, 'permanent_failure_rate': 0.25 }]