Merge pull request #2667 from alphagov/warn-team-about-high-failure-rates

Warn team about high failure rates
This commit is contained in:
Pea M. Tyczynska
2019-12-09 11:28:25 +00:00
committed by GitHub
6 changed files with 311 additions and 76 deletions

View File

@@ -2,6 +2,7 @@ from datetime import datetime, timedelta
from unittest.mock import call
import pytest
from collections import namedtuple
from freezegun import freeze_time
from mock import mock
@@ -16,9 +17,10 @@ from app.celery.scheduled_tasks import (
check_precompiled_letter_state,
check_templated_letter_state,
check_for_missing_rows_in_completed_jobs,
check_for_services_with_high_failure_rates_or_sending_to_tv_numbers,
switch_current_sms_provider_on_slow_delivery,
)
from app.config import QueueNames, TaskNames
from app.config import QueueNames, TaskNames, Config
from app.dao.jobs_dao import dao_get_job_by_id
from app.dao.notifications_dao import dao_get_scheduled_notifications
from app.dao.provider_details_dao import get_provider_details_by_identifier
@@ -494,3 +496,64 @@ def test_check_for_missing_rows_in_completed_jobs_uses_sender_id(mocker, sample_
mock_process_row.assert_called_once_with(
mock.ANY, mock.ANY, job, job.service, sender_id=fake_uuid
)
MockServicesSendingToTVNumbers = namedtuple(
'ServicesSendingToTVNumbers',
[
'service_id',
'notification_count',
]
)
MockServicesWithHighFailureRate = namedtuple(
'ServicesWithHighFailureRate',
[
'service_id',
'permanent_failure_rate',
]
)
@pytest.mark.parametrize("failure_rates, sms_to_tv_numbers, expected_message", [
[
[MockServicesWithHighFailureRate("123", 0.3)],
[],
"1 service(s) have had high permanent-failure rates for sms messages in last "
"24 hours:\nservice: {} failure rate: 0.3,\n".format(
Config.ADMIN_BASE_URL + "/services/" + "123"
)
],
[
[],
[MockServicesSendingToTVNumbers("123", 300)],
"1 service(s) have sent over 100 sms messages to tv numbers in last 24 hours:\n"
"service: {} count of sms to tv numbers: 300,\n".format(
Config.ADMIN_BASE_URL + "/services/" + "123"
)
]
])
def test_check_for_services_with_high_failure_rates_or_sending_to_tv_numbers(
mocker, notify_db_session, failure_rates, sms_to_tv_numbers, expected_message
):
mock_logger = mocker.patch('app.celery.tasks.current_app.logger.exception')
mock_create_ticket = mocker.patch('app.celery.scheduled_tasks.zendesk_client.create_ticket')
mock_failure_rates = mocker.patch(
'app.celery.scheduled_tasks.dao_find_services_with_high_failure_rates', return_value=failure_rates
)
mock_sms_to_tv_numbers = mocker.patch(
'app.celery.scheduled_tasks.dao_find_services_sending_to_tv_numbers', return_value=sms_to_tv_numbers
)
zendesk_actions = "\nYou can find instructions for this ticket in our manual:\n"
"https://github.com/alphagov/notifications-manuals/wiki/Support-Runbook#Deal-with-services-with-high-failure-rates-or-sending-sms-to-tv-numbers" # noqa
check_for_services_with_high_failure_rates_or_sending_to_tv_numbers()
assert mock_failure_rates.called
assert mock_sms_to_tv_numbers.called
mock_logger.assert_called_once_with(expected_message)
mock_create_ticket.assert_called_with(
message=expected_message + zendesk_actions,
subject="[test] High failure rates for sms spotted for services",
ticket_type='incident'
)

View File

@@ -1,5 +1,5 @@
import uuid
from datetime import datetime
from datetime import datetime, timedelta
from unittest import mock
import pytest
@@ -8,77 +8,49 @@ from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm.exc import NoResultFound
from app import db
from app.dao.inbound_numbers_dao import (
dao_set_inbound_number_to_service,
dao_get_available_inbound_numbers,
dao_set_inbound_number_active_flag
)
from app.dao.inbound_numbers_dao import (dao_get_available_inbound_numbers,
dao_set_inbound_number_active_flag,
dao_set_inbound_number_to_service)
from app.dao.organisation_dao import dao_add_service_to_organisation
from app.dao.service_permissions_dao import dao_add_service_permission, dao_remove_service_permission
from app.dao.services_dao import (
dao_create_service,
dao_add_user_to_service,
dao_remove_user_from_service,
dao_fetch_all_services,
dao_fetch_live_services_data,
dao_fetch_service_by_id,
dao_fetch_all_services_by_user,
dao_update_service,
delete_service_and_all_associated_db_objects,
dao_fetch_stats_for_service,
dao_fetch_todays_stats_for_service,
fetch_todays_total_message_count,
dao_fetch_todays_stats_for_all_services,
dao_suspend_service,
dao_resume_service,
dao_fetch_active_users_for_service,
dao_fetch_service_by_inbound_number,
get_services_by_partial_name,
)
from app.dao.service_user_dao import dao_get_service_user, dao_update_service_user
from app.dao.users_dao import save_model_user, create_user_code
from app.models import (
VerifyCode,
ApiKey,
Template,
TemplateHistory,
Job,
Notification,
NotificationHistory,
Permission,
User,
InvitedUser,
Service,
ServicePermission,
ServiceUser,
KEY_TYPE_NORMAL,
KEY_TYPE_TEAM,
KEY_TYPE_TEST,
EMAIL_TYPE,
SMS_TYPE,
INTERNATIONAL_SMS_TYPE,
LETTER_TYPE,
user_folder_permissions,
Organisation
)
from tests.app.db import (
create_ft_billing,
create_inbound_number,
create_organisation,
create_user,
create_service,
create_service_with_inbound_number,
create_service_with_defined_sms_sender,
create_template,
create_template_folder,
create_notification,
create_api_key,
create_invited_user,
create_email_branding,
create_letter_branding,
create_notification_history,
create_annual_billing,
)
from app.dao.service_permissions_dao import (dao_add_service_permission,
dao_remove_service_permission)
from app.dao.service_user_dao import (dao_get_service_user,
dao_update_service_user)
from app.dao.services_dao import (dao_add_user_to_service, dao_create_service,
dao_fetch_active_users_for_service,
dao_fetch_all_services,
dao_fetch_all_services_by_user,
dao_fetch_live_services_data,
dao_fetch_service_by_id,
dao_fetch_service_by_inbound_number,
dao_fetch_stats_for_service,
dao_fetch_todays_stats_for_all_services,
dao_fetch_todays_stats_for_service,
dao_find_services_sending_to_tv_numbers,
dao_find_services_with_high_failure_rates,
dao_remove_user_from_service,
dao_resume_service, dao_suspend_service,
dao_update_service,
delete_service_and_all_associated_db_objects,
fetch_todays_total_message_count,
get_services_by_partial_name)
from app.dao.users_dao import create_user_code, save_model_user
from app.models import (EMAIL_TYPE, INTERNATIONAL_SMS_TYPE, KEY_TYPE_NORMAL,
KEY_TYPE_TEAM, KEY_TYPE_TEST, LETTER_TYPE, SMS_TYPE,
ApiKey, InvitedUser, Job, Notification,
NotificationHistory, Organisation, Permission, Service,
ServicePermission, ServiceUser, Template,
TemplateHistory, User, VerifyCode,
user_folder_permissions)
from tests.app.db import (create_annual_billing, create_api_key,
create_email_branding, create_ft_billing,
create_inbound_number, create_invited_user,
create_letter_branding, create_notification,
create_notification_history, create_organisation,
create_service,
create_service_with_defined_sms_sender,
create_service_with_inbound_number, create_template,
create_template_folder, create_user)
def test_should_have_decorated_services_dao_functions():
@@ -1101,3 +1073,83 @@ def create_email_sms_letter_template():
template_two = create_template(service=service, template_name='2', template_type='sms')
template_three = create_template(service=service, template_name='3', template_type='letter')
return template_one, template_three, template_two
@freeze_time("2019-12-02 12:00:00.000000")
def test_dao_find_services_sending_to_tv_numbers(notify_db_session, fake_uuid):
service_1 = create_service(service_name="Service 1", service_id=fake_uuid)
service_3 = create_service(service_name="Service 3", restricted=True) # restricted is excluded
service_4 = create_service(service_name="Service 4", research_mode=True) # research mode is excluded
service_5 = create_service(service_name="Service 5", active=False) # not active is excluded
services = [service_1, service_3, service_4, service_5]
tv_number = "447700900001"
normal_number = "447711900001"
normal_number_resembling_tv_number = "447227700900"
for service in services:
template = create_template(service)
for x in range(0, 5):
create_notification(template, normalised_to=tv_number, status="permanent-failure")
service_6 = create_service(service_name="Service 6") # notifications too old are excluded
with freeze_time("2019-11-30 15:00:00.000000"):
template_6 = create_template(service_6)
for x in range(0, 5):
create_notification(template_6, normalised_to=tv_number, status="permanent-failure")
service_2 = create_service(service_name="Service 2") # below threshold is excluded
template_2 = create_template(service_2)
create_notification(template_2, normalised_to=tv_number, status="permanent-failure")
for x in range(0, 5):
# test key type is excluded
create_notification(template_2, normalised_to=tv_number, status="permanent-failure", key_type='test')
for x in range(0, 5):
# normal numbers are not counted by the query
create_notification(template_2, normalised_to=normal_number, status="delivered")
create_notification(template_2, normalised_to=normal_number_resembling_tv_number, status="delivered")
start_date = (datetime.utcnow() - timedelta(days=1))
end_date = datetime.utcnow()
result = dao_find_services_sending_to_tv_numbers(start_date, end_date, threshold=4)
assert len(result) == 1
assert str(result[0].service_id) == fake_uuid
def test_dao_find_services_with_high_failure_rates(notify_db_session, fake_uuid):
service_1 = create_service(service_name="Service 1", service_id=fake_uuid)
service_3 = create_service(service_name="Service 3", restricted=True) # restricted is excluded
service_4 = create_service(service_name="Service 4", research_mode=True) # research mode is excluded
service_5 = create_service(service_name="Service 5", active=False) # not active is excluded
services = [service_1, service_3, service_4, service_5]
for service in services:
template = create_template(service)
for x in range(0, 3):
create_notification(template, status="permanent-failure")
create_notification(template, status="delivered")
create_notification(template, status="sending")
create_notification(template, status="temporary-failure")
service_6 = create_service(service_name="Service 6")
with freeze_time("2019-11-30 15:00:00.000000"):
template_6 = create_template(service_6)
for x in range(0, 4):
create_notification(template_6, status="permanent-failure") # notifications too old are excluded
service_2 = create_service(service_name="Service 2")
template_2 = create_template(service_2)
for x in range(0, 4):
create_notification(template_2, status="permanent-failure", key_type='test') # test key type is excluded
create_notification(template_2, status="permanent-failure") # below threshold is excluded
start_date = (datetime.utcnow() - timedelta(days=1))
end_date = datetime.utcnow()
result = dao_find_services_with_high_failure_rates(start_date, end_date, threshold=3)
# assert len(result) == 3
# assert str(result[0].service_id) == fake_uuid
assert len(result) == 1
assert str(result[0].service_id) == fake_uuid
assert result[0].permanent_failure_rate == 0.25

View File

@@ -6,12 +6,12 @@ from freezegun import freeze_time
import pytest
from app.service.statistics import (
format_admin_stats,
format_statistics,
add_monthly_notification_status_stats,
create_empty_monthly_notification_status_stats_dict,
create_stats_dict,
create_zeroed_stats_dicts,
create_empty_monthly_notification_status_stats_dict,
add_monthly_notification_status_stats
format_admin_stats,
format_statistics
)
StatsRow = collections.namedtuple('row', ('notification_type', 'status', 'count'))