From d710cdefd65aff4c36a4b984ef2e08a1572ed1fa Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 28 Nov 2023 12:30:53 -0800 Subject: [PATCH] notify-api-634 --- app/celery/scheduled_tasks.py | 52 +++++++++++++++++++++++- app/config.py | 5 +++ app/dao/notifications_dao.py | 6 +++ poetry.lock | 12 +----- tests/app/celery/test_scheduled_tasks.py | 46 +++++++++++++++++++++ 5 files changed, 108 insertions(+), 13 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 8111a347f..820a0d53d 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -1,3 +1,4 @@ +import os from datetime import datetime, timedelta from flask import current_app @@ -5,7 +6,7 @@ from notifications_utils.clients.zendesk.zendesk_client import NotifySupportTick from sqlalchemy import between from sqlalchemy.exc import SQLAlchemyError -from app import notify_celery, zendesk_client +from app import notify_celery, redis_store, zendesk_client from app.celery.tasks import ( get_recipient_csv_and_template_and_sender_id, process_incomplete_jobs, @@ -23,12 +24,16 @@ from app.dao.jobs_dao import ( find_jobs_with_missing_rows, find_missing_row_for_job, ) -from app.dao.notifications_dao import notifications_not_yet_sent +from app.dao.notifications_dao import ( + dao_get_failed_notification_count, + notifications_not_yet_sent, +) from app.dao.services_dao import ( dao_find_services_sending_to_tv_numbers, dao_find_services_with_high_failure_rates, ) from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago +from app.delivery.send_to_providers import provider_to_use from app.models import ( EMAIL_TYPE, JOB_STATUS_ERROR, @@ -91,6 +96,49 @@ def expire_or_delete_invitations(): raise +@notify_celery.task(name="check-db-notification-fails") +def check_db_notification_fails(): + # get values from + last_value = redis_store.get("LAST_DB_NOTIFICATION_COUNT") + + if not last_value: + last_value = 0 + # get count from db + failed_count = dao_get_failed_notification_count() + # update redis if need be + if failed_count > last_value: + redis_store.set("LAST_DB_NOTIFICATION_COUNT", failed_count) + # TODO send to slack as well + # Only send the first time if we breach a level, except for case of >= 100% + message = "" + if failed_count >= 10000: + message = "We are over 100% in the db for failed notifications" + elif failed_count >= 7500 and last_value < 7500: + message = ( + "tts-notify-alerts@gsa.gov", + f"We crossed above 75% in the db for failed notifications on {os.getenv('ENVIRONMENT')}", + ) + elif failed_count >= 5000 and last_value < 5000: + message = ( + "tts-notify-alerts@gsa.gov", + f"We crossed above 50% in the db for failed notifications on {os.getenv('ENVIRONMENT')}", + ) + elif failed_count >= 2500 and last_value < 2500: + message = ( + "tts-notify-alerts@gsa.gov", + f"We crossed above 25% in the db for failed notifications on {os.getenv('ENVIRONMENT')}", + ) + # We don't have permissions to send email in development + if message and os.getenv("ENVIRONMENT") != "development": + provider = provider_to_use(EMAIL_TYPE, False) + provider.send_email( + "ken.kehl@gsa.gov", + "ken.kehl@gsa.gov", + "DB Notification Failures Level Breached", + body=str(message), + ) + + @notify_celery.task(name="check-job-status") def check_job_status(): """ diff --git a/app/config.py b/app/config.py index a7cc5e65e..c602e8645 100644 --- a/app/config.py +++ b/app/config.py @@ -199,6 +199,11 @@ class Config(object): "schedule": timedelta(minutes=66), "options": {"queue": QueueNames.PERIODIC}, }, + "check-db-notification-fails": { + "task": "check-db-notification-fails", + "schedule": timedelta(minutes=77), + "options": {"queue": QueueNames.PERIODIC}, + }, "check-job-status": { "task": "check-job-status", "schedule": crontab(), diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 61fba4f6f..ec9ea5053 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -20,6 +20,7 @@ from app.models import ( EMAIL_TYPE, KEY_TYPE_TEST, NOTIFICATION_CREATED, + NOTIFICATION_FAILED, NOTIFICATION_PENDING, NOTIFICATION_PENDING_VIRUS_CHECK, NOTIFICATION_PERMANENT_FAILURE, @@ -202,6 +203,11 @@ def dao_get_notification_count_for_service(*, service_id): return notification_count +def dao_get_failed_notification_count(): + failed_count = Notification.query.filter_by(status=NOTIFICATION_FAILED).count() + return failed_count + + def get_notification_with_personalisation(service_id, notification_id, key_type): filter_dict = {"service_id": service_id, "id": notification_id} if key_type: diff --git a/poetry.lock b/poetry.lock index 5412a9c69..d41e8ff33 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -2180,16 +2180,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index 97b9f903e..8ec7d3a13 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -8,6 +8,7 @@ from notifications_utils.clients.zendesk.zendesk_client import NotifySupportTick from app.celery import scheduled_tasks from app.celery.scheduled_tasks import ( + check_db_notification_fails, check_for_missing_rows_in_completed_jobs, check_for_services_with_high_failure_rates_or_sending_to_tv_numbers, check_job_status, @@ -53,6 +54,51 @@ def test_should_call_expire_or_delete_invotations_on_expire_or_delete_invitation ) +def test_should_check_db_notification_fails_task_over_100_percent( + notify_db_session, mocker +): + mock_dao = mocker.patch( + "app.celery.scheduled_tasks.dao_get_failed_notification_count" + ) + mock_provider = mocker.patch("app.celery.scheduled_tasks.provider_to_use") + mock_dao.return_value = 100000 + check_db_notification_fails() + assert mock_provider.call_count == 1 + + +def test_should_check_db_notification_fails_task_less_than_25_percent( + notify_db_session, mocker +): + mock_dao = mocker.patch( + "app.celery.scheduled_tasks.dao_get_failed_notification_count" + ) + mock_provider = mocker.patch("app.celery.scheduled_tasks.provider_to_use") + mock_dao.return_value = 10 + check_db_notification_fails() + assert mock_provider.call_count == 0 + + +def test_should_check_db_notification_fails_task_over_50_percent( + notify_db_session, mocker +): + # This tests that we only send an alert the 1st time we cross over 50%. We don't want + # to be sending the same alert every hour, especially as it might be quite normal for the db + # fails to be at 25 or 50 for long periods of time. + mock_dao = mocker.patch( + "app.celery.scheduled_tasks.dao_get_failed_notification_count" + ) + mock_provider = mocker.patch("app.celery.scheduled_tasks.provider_to_use") + mock_redis = mocker.patch("app.celery.scheduled_tasks.redis_store.get") + mock_dao.return_value = 5001 + mock_redis.return_value = 0 + check_db_notification_fails() + assert mock_provider.call_count == 1 + + mock_redis.return_value = 5001 + check_db_notification_fails() + assert mock_provider.call_count == 1 + + def test_should_update_scheduled_jobs_and_put_on_queue(mocker, sample_template): mocked = mocker.patch("app.celery.tasks.process_job.apply_async")