Query directly for services with high failure rate

This commit is contained in:
Pea Tyczynska
2019-12-05 16:07:06 +00:00
parent b8de67ae54
commit 1b7b26bf24
8 changed files with 116 additions and 204 deletions

View File

@@ -35,7 +35,7 @@ from app.dao.notifications_dao import (
)
from app.dao.provider_details_dao import dao_reduce_sms_provider_priority
from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago
from app.dao.services_dao import dao_find_services_sending_to_tv_numbers
from app.dao.services_dao import dao_find_services_sending_to_tv_numbers, dao_find_services_with_high_failure_rates
from app.models import (
Job,
JOB_STATUS_IN_PROGRESS,
@@ -46,8 +46,6 @@ from app.models import (
from app.notifications.process_notifications import send_notification_to_queue
from app.v2.errors import JobIncompleteError
from app.service.utils import get_services_with_high_failure_rates
@notify_celery.task(name="run-scheduled-jobs")
@statsd(namespace="tasks")
@@ -265,7 +263,7 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers():
end_date = datetime.utcnow()
message = ""
services_with_failures = get_services_with_high_failure_rates(start_date=start_date, end_date=end_date)
services_with_failures = dao_find_services_with_high_failure_rates(start_date=start_date, end_date=end_date)
services_sending_to_tv_numbers = dao_find_services_sending_to_tv_numbers(start_date=start_date, end_date=end_date)
if services_with_failures:
@@ -273,8 +271,8 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers():
len(services_with_failures)
)
for service in services_with_failures:
service_dashboard = current_app.config['ADMIN_BASE_URL'] + "/services/" + service["id"]
message += "service: {} failure rate: {},\n".format(service_dashboard, service["permanent_failure_rate"])
service_dashboard = current_app.config['ADMIN_BASE_URL'] + "/services/" + service.service_id
message += "service: {} failure rate: {},\n".format(service_dashboard, service.permanent_failure_rate)
elif services_sending_to_tv_numbers:
message += "{} service(s) have sent over 100 sms messages to tv numbers in last 24 hours:\n".format(
len(services_sending_to_tv_numbers)

View File

@@ -4,6 +4,7 @@ from datetime import date, datetime, timedelta
from notifications_utils.statsd_decorators import statsd
from sqlalchemy.sql.expression import asc, case, and_, func
from sqlalchemy.orm import joinedload
from sqlalchemy import cast, Float
from flask import current_app
from app import db
@@ -44,6 +45,7 @@ from app.models import (
KEY_TYPE_TEST,
NHS_ORGANISATION_TYPES,
NON_CROWN_ORGANISATION_TYPES,
NOTIFICATION_PERMANENT_FAILURE,
SMS_TYPE,
LETTER_TYPE,
)
@@ -544,12 +546,10 @@ def dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=100)
).all()
def dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date):
# only works within services' retention period
return db.session.query(
Notification.service_id.label('service_id'),
Notification.status.label('status'),
func.count(Notification.id).label('count')
def dao_find_services_with_high_failure_rates(start_date, end_date, threshold=100):
subquery = db.session.query(
func.count(Notification.id).label('total_count'),
Notification.service_id.label('service_id')
).filter(
Notification.service_id == Service.id,
Notification.created_at >= start_date,
@@ -561,5 +561,35 @@ def dao_find_real_sms_notification_count_by_status_for_live_services(start_date,
Service.active == True,
).group_by(
Notification.service_id,
Notification.status
).all()
).having(
func.count(Notification.id) >= threshold
)
subquery = subquery.subquery()
query = db.session.query(
Notification.service_id.label('service_id'),
func.count(Notification.id).label('permanent_failure_count'),
subquery.c.total_count.label('total_count'),
(cast(func.count(Notification.id), Float) / cast(subquery.c.total_count, Float)).label('permanent_failure_rate')
).join(
subquery,
subquery.c.service_id == Notification.service_id
).filter(
Notification.service_id == Service.id,
Notification.created_at >= start_date,
Notification.created_at <= end_date,
Notification.key_type != KEY_TYPE_TEST,
Notification.notification_type == SMS_TYPE,
Notification.status == NOTIFICATION_PERMANENT_FAILURE,
Service.restricted == False, # noqa
Service.research_mode == False,
Service.active == True,
).group_by(
Notification.service_id,
subquery.c.total_count
).having(
cast(func.count(Notification.id), Float) / cast(subquery.c.total_count, Float) >= 0.25
)
return query.all()

View File

@@ -21,18 +21,6 @@ def format_statistics(statistics):
return counts
def get_rate_of_permanent_failures_for_service(statistics, threshold=100):
counts = {"permanent_failure": 0, "all_other_statuses": 0}
for row in statistics:
_count_if_status_is_permanent_failure_from_row(counts, row)
if counts['permanent_failure'] + counts['all_other_statuses'] >= threshold:
rate = counts['permanent_failure'] / (counts['permanent_failure'] + counts['all_other_statuses'])
else:
rate = 0
return rate
def format_admin_stats(statistics):
counts = create_stats_dict()
@@ -106,13 +94,6 @@ def _update_statuses_from_row(update_dict, row):
update_dict['failed'] += row.count
def _count_if_status_is_permanent_failure_from_row(update_dict, row):
if row.status == 'permanent-failure':
update_dict['permanent_failure'] += row.count
else:
update_dict['all_other_statuses'] += row.count
def create_empty_monthly_notification_status_stats_dict(year):
utc_month_starts = get_months_for_financial_year(year)
# nested dicts - data[month][template type][status] = count

View File

@@ -2,15 +2,11 @@ import itertools
from notifications_utils.recipients import allowed_to_send_to
from app.dao.services_dao import dao_find_real_sms_notification_count_by_status_for_live_services
from app.models import (
ServiceWhitelist,
MOBILE_TYPE, EMAIL_TYPE,
KEY_TYPE_TEST, KEY_TYPE_TEAM, KEY_TYPE_NORMAL)
from app.service import statistics
def get_recipients_from_request(request_json, key, type):
return [(type, recipient) for recipient in request_json.get(key)]
@@ -56,21 +52,3 @@ def service_allowed_to_send_to(recipient, service, key_type, allow_whitelisted_r
whitelist_members
)
)
def get_services_with_high_failure_rates(start_date, end_date, rate=0.25, threshold=100):
stats = dao_find_real_sms_notification_count_by_status_for_live_services(
start_date=start_date,
end_date=end_date,
)
results = []
for service_id, rows in itertools.groupby(stats, lambda x: x.service_id):
rows = list(rows)
permanent_failure_rate = statistics.get_rate_of_permanent_failures_for_service(rows, threshold=threshold)
if permanent_failure_rate >= rate:
results.append({
'id': str(rows[0].service_id),
'permanent_failure_rate': permanent_failure_rate
})
return results

View File

@@ -505,11 +505,18 @@ MockServicesSendingToTVNumbers = namedtuple(
'notification_count',
]
)
MockServicesWithHighFailureRate = namedtuple(
'ServicesWithHighFailureRate',
[
'service_id',
'permanent_failure_rate',
]
)
@pytest.mark.parametrize("failure_rates, sms_to_tv_numbers, expected_message", [
[
[{"id": "123", "permanent_failure_rate": 0.3}],
[MockServicesWithHighFailureRate("123", 0.3)],
[],
"1 service(s) have had high permanent-failure rates for sms messages in last "
"24 hours:\nservice: {} failure rate: 0.3,\n".format(
@@ -531,7 +538,7 @@ def test_check_for_services_with_high_failure_rates_or_sending_to_tv_numbers(
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
mock_create_ticket = mocker.patch('app.celery.scheduled_tasks.zendesk_client.create_ticket')
mock_failure_rates = mocker.patch(
'app.celery.scheduled_tasks.get_services_with_high_failure_rates', return_value=failure_rates
'app.celery.scheduled_tasks.dao_find_services_with_high_failure_rates', return_value=failure_rates
)
mock_sms_to_tv_numbers = mocker.patch(
'app.celery.scheduled_tasks.dao_find_services_sending_to_tv_numbers', return_value=sms_to_tv_numbers

View File

@@ -8,79 +8,49 @@ from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm.exc import NoResultFound
from app import db
from app.dao.inbound_numbers_dao import (
dao_set_inbound_number_to_service,
dao_get_available_inbound_numbers,
dao_set_inbound_number_active_flag
)
from app.dao.inbound_numbers_dao import (dao_get_available_inbound_numbers,
dao_set_inbound_number_active_flag,
dao_set_inbound_number_to_service)
from app.dao.organisation_dao import dao_add_service_to_organisation
from app.dao.service_permissions_dao import dao_add_service_permission, dao_remove_service_permission
from app.dao.services_dao import (
dao_create_service,
dao_add_user_to_service,
dao_remove_user_from_service,
dao_fetch_all_services,
dao_fetch_live_services_data,
dao_fetch_service_by_id,
dao_fetch_all_services_by_user,
dao_find_real_sms_notification_count_by_status_for_live_services,
dao_find_services_sending_to_tv_numbers,
dao_update_service,
delete_service_and_all_associated_db_objects,
dao_fetch_stats_for_service,
dao_fetch_todays_stats_for_service,
fetch_todays_total_message_count,
dao_fetch_todays_stats_for_all_services,
dao_suspend_service,
dao_resume_service,
dao_fetch_active_users_for_service,
dao_fetch_service_by_inbound_number,
get_services_by_partial_name,
)
from app.dao.service_user_dao import dao_get_service_user, dao_update_service_user
from app.dao.users_dao import save_model_user, create_user_code
from app.models import (
VerifyCode,
ApiKey,
Template,
TemplateHistory,
Job,
Notification,
NotificationHistory,
Permission,
User,
InvitedUser,
Service,
ServicePermission,
ServiceUser,
KEY_TYPE_NORMAL,
KEY_TYPE_TEAM,
KEY_TYPE_TEST,
EMAIL_TYPE,
SMS_TYPE,
INTERNATIONAL_SMS_TYPE,
LETTER_TYPE,
user_folder_permissions,
Organisation
)
from tests.app.db import (
create_ft_billing,
create_inbound_number,
create_organisation,
create_user,
create_service,
create_service_with_inbound_number,
create_service_with_defined_sms_sender,
create_template,
create_template_folder,
create_notification,
create_api_key,
create_invited_user,
create_email_branding,
create_letter_branding,
create_notification_history,
create_annual_billing,
)
from app.dao.service_permissions_dao import (dao_add_service_permission,
dao_remove_service_permission)
from app.dao.service_user_dao import (dao_get_service_user,
dao_update_service_user)
from app.dao.services_dao import (dao_add_user_to_service, dao_create_service,
dao_fetch_active_users_for_service,
dao_fetch_all_services,
dao_fetch_all_services_by_user,
dao_fetch_live_services_data,
dao_fetch_service_by_id,
dao_fetch_service_by_inbound_number,
dao_fetch_stats_for_service,
dao_fetch_todays_stats_for_all_services,
dao_fetch_todays_stats_for_service,
dao_find_services_sending_to_tv_numbers,
dao_find_services_with_high_failure_rates,
dao_remove_user_from_service,
dao_resume_service, dao_suspend_service,
dao_update_service,
delete_service_and_all_associated_db_objects,
fetch_todays_total_message_count,
get_services_by_partial_name)
from app.dao.users_dao import create_user_code, save_model_user
from app.models import (EMAIL_TYPE, INTERNATIONAL_SMS_TYPE, KEY_TYPE_NORMAL,
KEY_TYPE_TEAM, KEY_TYPE_TEST, LETTER_TYPE, SMS_TYPE,
ApiKey, InvitedUser, Job, Notification,
NotificationHistory, Organisation, Permission, Service,
ServicePermission, ServiceUser, Template,
TemplateHistory, User, VerifyCode,
user_folder_permissions)
from tests.app.db import (create_annual_billing, create_api_key,
create_email_branding, create_ft_billing,
create_inbound_number, create_invited_user,
create_letter_branding, create_notification,
create_notification_history, create_organisation,
create_service,
create_service_with_defined_sms_sender,
create_service_with_inbound_number, create_template,
create_template_folder, create_user)
def test_should_have_decorated_services_dao_functions():
@@ -1106,9 +1076,9 @@ def create_email_sms_letter_template():
@freeze_time("2019-12-02 12:00:00.000000")
def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid):
service_1 = create_service(service_name="Service 1", service_id=fake_uuid)
service_3 = create_service(service_name="Service 3", restricted=True) # restricted
service_4 = create_service(service_name="Service 4", research_mode=True) # research mode
service_5 = create_service(service_name="Service 5", active=False) # not active
service_3 = create_service(service_name="Service 3", restricted=True) # restricted is excluded
service_4 = create_service(service_name="Service 4", research_mode=True) # research mode is excluded
service_5 = create_service(service_name="Service 5", active=False) # not active is excluded
services = [service_1, service_3, service_4, service_5]
tv_number = "447700900001"
@@ -1120,13 +1090,13 @@ def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid):
for x in range(0, 5):
create_notification(template, normalised_to=tv_number, status="permanent-failure")
service_6 = create_service(service_name="Service 6") # notifications too old
service_6 = create_service(service_name="Service 6") # notifications too old are excluded
with freeze_time("2019-11-30 15:00:00.000000"):
template_6 = create_template(service_6)
for x in range(0, 5):
create_notification(template_6, normalised_to=tv_number, status="permanent-failure")
service_2 = create_service(service_name="Service 2") # below threshold
service_2 = create_service(service_name="Service 2") # below threshold is excluded
template_2 = create_template(service_2)
create_notification(template_2, normalised_to=tv_number, status="permanent-failure")
for x in range(0, 5):
@@ -1141,11 +1111,11 @@ def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid):
assert str(result[0].service_id) == fake_uuid
def test_dao_find_real_sms_notification_count_by_status_for_live_services(notify_db_session, fake_uuid):
def test_dao_find_services_with_high_failure_rates(notify_db_session, fake_uuid):
service_1 = create_service(service_name="Service 1", service_id=fake_uuid)
service_3 = create_service(service_name="Service 3", restricted=True) # restricted
service_4 = create_service(service_name="Service 4", research_mode=True) # research mode
service_5 = create_service(service_name="Service 5", active=False) # not active
service_3 = create_service(service_name="Service 3", restricted=True) # restricted is excluded
service_4 = create_service(service_name="Service 4", research_mode=True) # research mode is excluded
service_5 = create_service(service_name="Service 5", active=False) # not active is excluded
services = [service_1, service_3, service_4, service_5]
for service in services:
@@ -1153,21 +1123,27 @@ def test_dao_find_real_sms_notification_count_by_status_for_live_services(notify
for x in range(0, 3):
create_notification(template, status="permanent-failure")
create_notification(template, status="delivered")
create_notification(template, status="sending")
create_notification(template, status="temporary-failure")
service_6 = create_service(service_name="Service 6") # notifications too old
service_6 = create_service(service_name="Service 6")
with freeze_time("2019-11-30 15:00:00.000000"):
template_6 = create_template(service_6)
for x in range(0, 2):
create_notification(template_6, status="permanent-failure")
for x in range(0, 4):
create_notification(template_6, status="permanent-failure") # notifications too old are excluded
service_2 = create_service(service_name="Service 2") # below threshold
service_2 = create_service(service_name="Service 2")
template_2 = create_template(service_2)
create_notification(template_2, status="permanent-failure", key_type='test') # test key type
for x in range(0, 4):
create_notification(template_2, status="permanent-failure", key_type='test') # test key type is excluded
create_notification(template_2, status="permanent-failure") # below threshold is excluded
start_date = (datetime.utcnow() - timedelta(days=1))
end_date = datetime.utcnow()
result = dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date)
assert len(result) == 3
result = dao_find_services_with_high_failure_rates(start_date, end_date, threshold=3)
# assert len(result) == 3
# assert str(result[0].service_id) == fake_uuid
assert len(result) == 1
assert str(result[0].service_id) == fake_uuid
assert result[0].permanent_failure_rate == 0.25

View File

@@ -11,8 +11,7 @@ from app.service.statistics import (
create_stats_dict,
create_zeroed_stats_dicts,
format_admin_stats,
format_statistics,
get_rate_of_permanent_failures_for_service
format_statistics
)
StatsRow = collections.namedtuple('row', ('notification_type', 'status', 'count'))
@@ -74,26 +73,6 @@ def test_format_statistics(stats, email_counts, sms_counts, letter_counts):
}
@pytest.mark.idparametrize("statistics, expected_result", {
'counts_rate_for_sms': ([
StatsRow('sms', 'permanent-failure', 100),
StatsRow('sms', 'delivered', 300),
], 0.25),
'only_counts_permanent_failure_as_failed': ([
StatsRow('sms', 'permanent-failure', 100),
StatsRow('sms', 'temporary-failure', 100),
StatsRow('sms', 'delivered', 300),
], 0.2),
'below_threshold': ([
StatsRow('sms', 'permanent-failure', 5),
StatsRow('sms', 'delivered', 3),
], 0),
})
def test_get_rate_of_permanent_failures_for_service(statistics, expected_result):
rate = get_rate_of_permanent_failures_for_service(statistics)
assert rate == expected_result
def test_create_zeroed_stats_dicts():
assert create_zeroed_stats_dicts() == {
'sms': {'requested': 0, 'delivered': 0, 'failed': 0},

View File

@@ -1,8 +1,5 @@
from app.dao.date_util import get_current_financial_year_start_year
from freezegun import freeze_time
from app.service.utils import get_services_with_high_failure_rates
from collections import namedtuple
from datetime import datetime, timedelta
# see get_financial_year for conversion of financial years.
@@ -16,37 +13,3 @@ def test_get_current_financial_year_start_year_before_march():
def test_get_current_financial_year_start_year_after_april():
current_fy = get_current_financial_year_start_year()
assert current_fy == 2017
MockServicesNotificationCounts = namedtuple(
'ServicesSendingToTVNumbers',
[
'service_id',
'status',
'count',
]
)
@freeze_time("2019-12-02 12:00:00.000000")
def test_get_services_with_high_failure_rates(mocker, notify_db_session):
mock_query_results = [
MockServicesNotificationCounts('123', 'delivered', 150),
MockServicesNotificationCounts('123', 'permanent-failure', 50), # these will show up
MockServicesNotificationCounts('456', 'delivered', 150),
MockServicesNotificationCounts('456', 'permanent-failure', 5), # ratio too low
MockServicesNotificationCounts('789', 'permanent-failure', 5), # below threshold
MockServicesNotificationCounts('444', 'delivered', 100),
MockServicesNotificationCounts('444', 'permanent-failure', 100), # these will show up
]
mocker.patch(
'app.service.utils.dao_find_real_sms_notification_count_by_status_for_live_services',
return_value=mock_query_results
)
start_date = (datetime.utcnow() - timedelta(days=1))
end_date = datetime.utcnow()
assert get_services_with_high_failure_rates(start_date, end_date) == [
{'id': '123', 'permanent_failure_rate': 0.25},
{'id': '444', 'permanent_failure_rate': 0.5}
]