Query directly for services with high failure rate

This commit is contained in:
Pea Tyczynska
2019-12-05 16:07:06 +00:00
parent b8de67ae54
commit 1b7b26bf24
8 changed files with 116 additions and 204 deletions

View File

@@ -35,7 +35,7 @@ from app.dao.notifications_dao import (
)
from app.dao.provider_details_dao import dao_reduce_sms_provider_priority
from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago
from app.dao.services_dao import dao_find_services_sending_to_tv_numbers
from app.dao.services_dao import dao_find_services_sending_to_tv_numbers, dao_find_services_with_high_failure_rates
from app.models import (
Job,
JOB_STATUS_IN_PROGRESS,
@@ -46,8 +46,6 @@ from app.models import (
from app.notifications.process_notifications import send_notification_to_queue
from app.v2.errors import JobIncompleteError
from app.service.utils import get_services_with_high_failure_rates
@notify_celery.task(name="run-scheduled-jobs")
@statsd(namespace="tasks")
@@ -265,7 +263,7 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers():
end_date = datetime.utcnow()
message = ""
services_with_failures = get_services_with_high_failure_rates(start_date=start_date, end_date=end_date)
services_with_failures = dao_find_services_with_high_failure_rates(start_date=start_date, end_date=end_date)
services_sending_to_tv_numbers = dao_find_services_sending_to_tv_numbers(start_date=start_date, end_date=end_date)
if services_with_failures:
@@ -273,8 +271,8 @@ def check_for_services_with_high_failure_rates_or_sending_to_tv_numbers():
len(services_with_failures)
)
for service in services_with_failures:
service_dashboard = current_app.config['ADMIN_BASE_URL'] + "/services/" + service["id"]
message += "service: {} failure rate: {},\n".format(service_dashboard, service["permanent_failure_rate"])
service_dashboard = current_app.config['ADMIN_BASE_URL'] + "/services/" + service.service_id
message += "service: {} failure rate: {},\n".format(service_dashboard, service.permanent_failure_rate)
elif services_sending_to_tv_numbers:
message += "{} service(s) have sent over 100 sms messages to tv numbers in last 24 hours:\n".format(
len(services_sending_to_tv_numbers)

View File

@@ -4,6 +4,7 @@ from datetime import date, datetime, timedelta
from notifications_utils.statsd_decorators import statsd
from sqlalchemy.sql.expression import asc, case, and_, func
from sqlalchemy.orm import joinedload
from sqlalchemy import cast, Float
from flask import current_app
from app import db
@@ -44,6 +45,7 @@ from app.models import (
KEY_TYPE_TEST,
NHS_ORGANISATION_TYPES,
NON_CROWN_ORGANISATION_TYPES,
NOTIFICATION_PERMANENT_FAILURE,
SMS_TYPE,
LETTER_TYPE,
)
@@ -544,12 +546,10 @@ def dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=100)
).all()
def dao_find_real_sms_notification_count_by_status_for_live_services(start_date, end_date):
# only works within services' retention period
return db.session.query(
Notification.service_id.label('service_id'),
Notification.status.label('status'),
func.count(Notification.id).label('count')
def dao_find_services_with_high_failure_rates(start_date, end_date, threshold=100):
subquery = db.session.query(
func.count(Notification.id).label('total_count'),
Notification.service_id.label('service_id')
).filter(
Notification.service_id == Service.id,
Notification.created_at >= start_date,
@@ -561,5 +561,35 @@ def dao_find_real_sms_notification_count_by_status_for_live_services(start_date,
Service.active == True,
).group_by(
Notification.service_id,
Notification.status
).all()
).having(
func.count(Notification.id) >= threshold
)
subquery = subquery.subquery()
query = db.session.query(
Notification.service_id.label('service_id'),
func.count(Notification.id).label('permanent_failure_count'),
subquery.c.total_count.label('total_count'),
(cast(func.count(Notification.id), Float) / cast(subquery.c.total_count, Float)).label('permanent_failure_rate')
).join(
subquery,
subquery.c.service_id == Notification.service_id
).filter(
Notification.service_id == Service.id,
Notification.created_at >= start_date,
Notification.created_at <= end_date,
Notification.key_type != KEY_TYPE_TEST,
Notification.notification_type == SMS_TYPE,
Notification.status == NOTIFICATION_PERMANENT_FAILURE,
Service.restricted == False, # noqa
Service.research_mode == False,
Service.active == True,
).group_by(
Notification.service_id,
subquery.c.total_count
).having(
cast(func.count(Notification.id), Float) / cast(subquery.c.total_count, Float) >= 0.25
)
return query.all()

View File

@@ -21,18 +21,6 @@ def format_statistics(statistics):
return counts
def get_rate_of_permanent_failures_for_service(statistics, threshold=100):
counts = {"permanent_failure": 0, "all_other_statuses": 0}
for row in statistics:
_count_if_status_is_permanent_failure_from_row(counts, row)
if counts['permanent_failure'] + counts['all_other_statuses'] >= threshold:
rate = counts['permanent_failure'] / (counts['permanent_failure'] + counts['all_other_statuses'])
else:
rate = 0
return rate
def format_admin_stats(statistics):
counts = create_stats_dict()
@@ -106,13 +94,6 @@ def _update_statuses_from_row(update_dict, row):
update_dict['failed'] += row.count
def _count_if_status_is_permanent_failure_from_row(update_dict, row):
if row.status == 'permanent-failure':
update_dict['permanent_failure'] += row.count
else:
update_dict['all_other_statuses'] += row.count
def create_empty_monthly_notification_status_stats_dict(year):
utc_month_starts = get_months_for_financial_year(year)
# nested dicts - data[month][template type][status] = count

View File

@@ -2,15 +2,11 @@ import itertools
from notifications_utils.recipients import allowed_to_send_to
from app.dao.services_dao import dao_find_real_sms_notification_count_by_status_for_live_services
from app.models import (
ServiceWhitelist,
MOBILE_TYPE, EMAIL_TYPE,
KEY_TYPE_TEST, KEY_TYPE_TEAM, KEY_TYPE_NORMAL)
from app.service import statistics
def get_recipients_from_request(request_json, key, type):
return [(type, recipient) for recipient in request_json.get(key)]
@@ -56,21 +52,3 @@ def service_allowed_to_send_to(recipient, service, key_type, allow_whitelisted_r
whitelist_members
)
)
def get_services_with_high_failure_rates(start_date, end_date, rate=0.25, threshold=100):
stats = dao_find_real_sms_notification_count_by_status_for_live_services(
start_date=start_date,
end_date=end_date,
)
results = []
for service_id, rows in itertools.groupby(stats, lambda x: x.service_id):
rows = list(rows)
permanent_failure_rate = statistics.get_rate_of_permanent_failures_for_service(rows, threshold=threshold)
if permanent_failure_rate >= rate:
results.append({
'id': str(rows[0].service_id),
'permanent_failure_rate': permanent_failure_rate
})
return results