Simplify failure rate by building separate query

This commit is contained in:
Pea Tyczynska
2019-12-03 16:18:07 +00:00
parent 53efd87e28
commit cfbb080f57
9 changed files with 77 additions and 37 deletions

View File

@@ -266,11 +266,7 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers():
message = ""
services_with_failures = get_services_with_high_failure_rates(start_date=start_date, end_date=end_date)
services_sending_to_tv_numbers = dao_find_services_sending_to_tv_numbers(
threshold=100,
start_date=start_date,
end_date=end_date
)
services_sending_to_tv_numbers = dao_find_services_sending_to_tv_numbers(start_date=start_date, end_date=end_date)
if services_with_failures:
message += "{} service(s) have had high permanent-failure rates for sms messages in last 24 hours:\n".format(
@@ -291,6 +287,7 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers():
current_app.logger.exception(message)
if current_app.config['NOTIFY_ENVIRONMENT'] in ['live', 'production', 'test']:
message += "\nThings to do: contact service? revoke their key?"
zendesk_client.create_ticket(
subject="[{}] High failure rates for sms spotted for services".format(
current_app.config['NOTIFY_ENVIRONMENT']

View File

@@ -524,9 +524,7 @@ def dao_fetch_active_users_for_service(service_id):
def dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=100):
return db.session.query(
Service.name.label('service_name'),
Notification.service_id.label('service_id'),
func.count(Notification.id).label('notification_count')
).filter(
@@ -541,7 +539,27 @@ def dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=100)
Service.active == True,
).group_by(
Notification.service_id,
Service.name
).having(
func.count(Notification.id) > threshold
).all()
def dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date):
# only works within services' retention period
return db.session.query(
Notification.service_id.label('service_id'),
Notification.status.label('status'),
func.count(Notification.id).label('count')
).filter(
Notification.service_id == Service.id,
Notification.created_at >= start_date,
Notification.created_at <= end_date,
Notification.key_type != KEY_TYPE_TEST,
Notification.notification_type == SMS_TYPE,
Service.restricted == False, # noqa
Service.research_mode == False,
Service.active == True,
).group_by(
Notification.service_id,
Notification.status
).all()

View File

@@ -24,8 +24,7 @@ def format_statistics(statistics):
def get_rate_of_permanent_failures_for_service(statistics, threshold=100):
counts = {"permanent_failure": 0, "all_other_statuses": 0}
for row in statistics:
if row.notification_type == 'sms':
_count_if_status_is_permanent_failure_from_row(counts, row)
_count_if_status_is_permanent_failure_from_row(counts, row)
if counts['permanent_failure'] + counts['all_other_statuses'] >= threshold:
rate = counts['permanent_failure'] / (counts['permanent_failure'] + counts['all_other_statuses'])

View File

@@ -2,6 +2,8 @@ import itertools
from notifications_utils.recipients import allowed_to_send_to
from app.dao.services_dao import dao_find_real_sms_notification_count_by_status_for_live_services
from app.models import (
ServiceWhitelist,
MOBILE_TYPE, EMAIL_TYPE,
@@ -9,8 +11,6 @@ from app.models import (
from app.service import statistics
from app.dao.fact_notification_status_dao import fetch_stats_for_all_services_by_date_range
def get_recipients_from_request(request_json, key, type):
return [(type, recipient) for recipient in request_json.get(key)]
@@ -59,20 +59,18 @@ def service_allowed_to_send_to(recipient, service, key_type, allow_whitelisted_r
def get_services_with_high_failure_rates(start_date, end_date, rate=0.25, threshold=100):
stats = fetch_stats_for_all_services_by_date_range(
start_date=start_date.date(),
end_date=end_date.date(),
include_from_test_key=False,
stats = dao_find_real_sms_notification_count_by_status_for_live_services(
start_date=start_date,
end_date=end_date,
)
results = []
for service_id, rows in itertools.groupby(stats, lambda x: x.service_id):
rows = list(rows)
if not rows[0].restricted and not rows[0].research_mode and rows[0].active:
permanent_failure_rate = statistics.get_rate_of_permanent_failures_for_service(rows, threshold=threshold)
if permanent_failure_rate >= rate:
results.append({
'id': str(rows[0].service_id),
'name': rows[0].name,
'permanent_failure_rate': permanent_failure_rate
})
permanent_failure_rate = statistics.get_rate_of_permanent_failures_for_service(rows, threshold=threshold)
if permanent_failure_rate >= rate:
results.append({
'id': str(rows[0].service_id),
'permanent_failure_rate': permanent_failure_rate
})
return results