Merge branch 'main' of https://github.com/GSA/notifications-api into 673-data-dictionary

2026-02-04 02:11:11 -05:00 · 2023-12-12 11:04:57 -05:00
parent 7c684948cb 77cbe94f0a
commit 7460682c8a
6 changed files with 166 additions and 19 deletions
--- a/app/celery/scheduled_tasks.py
+++ b/app/celery/scheduled_tasks.py
@@ -1,3 +1,4 @@
+import os
 from datetime import datetime, timedelta

 from flask import current_app
@@ -5,7 +6,7 @@ from notifications_utils.clients.zendesk.zendesk_client import NotifySupportTick
 from sqlalchemy import between
 from sqlalchemy.exc import SQLAlchemyError

-from app import notify_celery, zendesk_client
+from app import notify_celery, redis_store, zendesk_client
 from app.celery.tasks import (
    get_recipient_csv_and_template_and_sender_id,
    process_incomplete_jobs,
@@ -23,12 +24,16 @@ from app.dao.jobs_dao import (
    find_jobs_with_missing_rows,
    find_missing_row_for_job,
 )
-from app.dao.notifications_dao import notifications_not_yet_sent
+from app.dao.notifications_dao import (
+    dao_get_failed_notification_count,
+    notifications_not_yet_sent,
+)
 from app.dao.services_dao import (
    dao_find_services_sending_to_tv_numbers,
    dao_find_services_with_high_failure_rates,
 )
 from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago
+from app.delivery.send_to_providers import provider_to_use
 from app.models import (
    EMAIL_TYPE,
    JOB_STATUS_ERROR,
@@ -39,6 +44,8 @@ from app.models import (
 )
 from app.notifications.process_notifications import send_notification_to_queue

+MAX_NOTIFICATION_FAILS = 10000
+

@notify_celery.task(name="run-scheduled-jobs")
 def run_scheduled_jobs():
@@ -91,6 +98,78 @@ def expire_or_delete_invitations():
        raise


+@notify_celery.task(name="check-db-notification-fails")
+def check_db_notification_fails():
+    """
+    We are going to use redis to keep track of the previous fail count.
+
+    If the number of fails is more than 100% of the limit, we want to send an alert every time this
+    runs, because it is urgent to fix it.
+
+    If the number is more than 25%, 50% or 75% of the limit, we only want to send an alert
+    on a breach.  I.e., if the last number was at 23% and the current number is 27%, send an email.
+    But if the last number was 26% and the current is 27%, don't.
+    """
+    last_value = redis_store.get("LAST_DB_NOTIFICATION_COUNT")
+    if not last_value:
+        last_value = 0
+
+    failed_count = dao_get_failed_notification_count()
+    if failed_count > last_value:
+        redis_store.set("LAST_DB_NOTIFICATION_COUNT", failed_count)
+    message = ""
+    curr_env = os.getenv("ENVIRONMENT")
+    if failed_count >= MAX_NOTIFICATION_FAILS:
+        message = f"We are over 100% in the db for failed notifications on {curr_env}"
+    elif (
+        failed_count >= MAX_NOTIFICATION_FAILS * 0.9
+        and last_value < MAX_NOTIFICATION_FAILS * 0.9
+    ):
+        message = (
+            "tts-notify-alerts@gsa.gov",
+            f"We crossed above 90% in the db for failed notifications on {curr_env}",
+        )
+
+    elif (
+        failed_count >= MAX_NOTIFICATION_FAILS * 0.75
+        and last_value < MAX_NOTIFICATION_FAILS * 0.75
+    ):
+        message = (
+            "tts-notify-alerts@gsa.gov",
+            f"We crossed above 75% in the db for failed notifications on {curr_env}",
+        )
+    elif (
+        failed_count >= MAX_NOTIFICATION_FAILS * 0.5
+        and last_value < MAX_NOTIFICATION_FAILS * 0.5
+    ):
+        message = (
+            "tts-notify-alerts@gsa.gov",
+            f"We crossed above 50% in the db for failed notifications on {curr_env}",
+        )
+    elif (
+        failed_count >= MAX_NOTIFICATION_FAILS * 0.25
+        and last_value < MAX_NOTIFICATION_FAILS * 0.25
+    ):
+        message = (
+            "tts-notify-alerts@gsa.gov",
+            f"We crossed above 25% in the db for failed notifications on {curr_env}",
+        )
+    # suppress any spam coming from development tier
+    if message and curr_env != "development":
+        provider = provider_to_use(EMAIL_TYPE, False)
+        from_address = '"{}" <{}@{}>'.format(
+            "Failed Notification Count Alert",
+            "test_sender",
+            current_app.config["NOTIFY_EMAIL_DOMAIN"],
+        )
+        provider.send_email(
+            from_address,
+            "tts-notify-alerts@gsa.gov",
+            "DB Notification Failures Level Breached",
+            body=str(message),
+        )
+
+
@notify_celery.task(name="check-job-status")
 def check_job_status():
    """
--- a/app/config.py
+++ b/app/config.py
@@ -199,6 +199,11 @@ class Config(object):
                "schedule": timedelta(minutes=66),
                "options": {"queue": QueueNames.PERIODIC},
            },
+            "check-db-notification-fails": {
+                "task": "check-db-notification-fails",
+                "schedule": crontab(minute="18, 48"),
+                "options": {"queue": QueueNames.PERIODIC},
+            },
            "check-job-status": {
                "task": "check-job-status",
                "schedule": crontab(),
--- a/app/dao/notifications_dao.py
+++ b/app/dao/notifications_dao.py
@@ -20,6 +20,7 @@ from app.models import (
    EMAIL_TYPE,
    KEY_TYPE_TEST,
    NOTIFICATION_CREATED,
+    NOTIFICATION_FAILED,
    NOTIFICATION_PENDING,
    NOTIFICATION_PENDING_VIRUS_CHECK,
    NOTIFICATION_PERMANENT_FAILURE,
@@ -202,6 +203,11 @@ def dao_get_notification_count_for_service(*, service_id):
    return notification_count


+def dao_get_failed_notification_count():
+    failed_count = Notification.query.filter_by(status=NOTIFICATION_FAILED).count()
+    return failed_count
+
+
 def get_notification_with_personalisation(service_id, notification_id, key_type):
    filter_dict = {"service_id": service_id, "id": notification_id}
    if key_type:
--- a/docs/all.md
+++ b/docs/all.md
@@ -477,7 +477,18 @@ Appending `--help` to any command will give you more information about parameter

 To run a command on cloud.gov, use this format:

-`cf run-task CLOUD-GOV-APP --commmand "YOUR COMMAND HERE" --name YOUR-COMMAND`
+`cf run-task CLOUD-GOV-APP --commmand "YOUR COMMAND HERE" --name YOUR-COMMAND-NAME`
+
+**NOTE:** Do not include `poetry run` in the command you provide for `cf run-task`!  cloud.gov is already aware
+of the Python virtual environment and Python dependencies; it's all handled through the Python brokerpak we use
+to deploy the application.
+
+For example, if you want to update the templates in one of the remote environments after a change to the JSON
+file, you would run this:
+
+```sh
+cf run-task CLOUD-GOV-APP --command "flask command update-templates" --name YOUR-COMMAND-NAME
+```

 [Here's more documentation](https://docs.cloudfoundry.org/devguide/using-tasks.html) about Cloud Foundry tasks.

--- a/poetry.lock
+++ b/poetry.lock
@@ -2506,26 +2506,26 @@ files = [

 [[package]]
 name = "newrelic"
-version = "9.2.0"
+version = "9.3.0"
 description = "New Relic Python Agent"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
 files = [
-    {file = "newrelic-9.2.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:2b942eb9cfe8f62268e091d399d95a6762ef5fb90636839d61a30391efbcfbf0"},
-    {file = "newrelic-9.2.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:56df42fc26400d8ee1e324bfff40439399149b15fbeb8ffd532a96e54576e69c"},
-    {file = "newrelic-9.2.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:2cfa86a5c3388490335385e0c8c155ee1f06d738282721bd05a8c7ceed33fd92"},
-    {file = "newrelic-9.2.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4139ef79e6abb7458edcf67f4ac0ce0b7dacbb58357f4d41716971fb15f778b6"},
-    {file = "newrelic-9.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d36cf6ad3cf1df3989dba9c8a5dbc36150dd1852ffeccdf6b957f21a2d5869c0"},
-    {file = "newrelic-9.2.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:420f07d0ea1bfde21507e6a59c5714f3a0451d01ab08f5ef79dd42b4552ef5ac"},
-    {file = "newrelic-9.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1bddb5793117d91e130f2f93a86f5c3d9f29823329d35a38f8f64b738b3335e"},
-    {file = "newrelic-9.2.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5b1edfc48c3d483d990bab28f268d64914f76180e057424914bb29e5bcfa927"},
-    {file = "newrelic-9.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f6c5065c434f8fb65e0e5a96a0e385be9baf5625e024cb9c87f345d966a9e5f"},
-    {file = "newrelic-9.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b151f1830590cebb53ddf45462101a0ef59d91e28a46a1a652626dc0d4c0148d"},
-    {file = "newrelic-9.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9db7dc6541a519acb327f9409c96f42faefb60748f0827c1dacce7613f88864"},
-    {file = "newrelic-9.2.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2a69115893ec53b1815b1041e231265abcf60d7d4c07ccc335b6146459e07ed"},
-    {file = "newrelic-9.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2911fb601b2a66eb0ab328342e0253889d94102c0b823f9dc5f6124917f9fbac"},
-    {file = "newrelic-9.2.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88c799cb29e6b9c20fed50709edea6bc2b05fc5e40d0fd4d3fe2a37ae4352043"},
-    {file = "newrelic-9.2.0.tar.gz", hash = "sha256:32395e1c6a97cc96427abaf4b274ccf00709613a68d18b51fc2c0711cda89bc7"},
+    {file = "newrelic-9.3.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:f25980a8c86bda75344b5b22edd5d6ad41777776e1ed8a495eb6e38e9813b02c"},
+    {file = "newrelic-9.3.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:adefb6620c5a5d75b4bf3ec565cc4d91abcb5cc4e5569f5f82ab29fa3d5aa2d9"},
+    {file = "newrelic-9.3.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:27056ab8a3cf39787fc1f93f55243749dd25786f65b15032b6fbb3e8534f4c2a"},
+    {file = "newrelic-9.3.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:663fa1c074661f93abf681c8f6028de64744c67f004b722835de1372b6bc4d19"},
+    {file = "newrelic-9.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83346c8f0bcb8f07f74c88f6073e4d44a2e2b3eeec5b2ebe8c450ae695d02b88"},
+    {file = "newrelic-9.3.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2187078d7b0054b30f39dbf891cb2caa71a7046f6d0258fb8c0fcfce70777774"},
+    {file = "newrelic-9.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0f5edd4eba3d62742b3b0730bb4f826be015dd7fbb9c455b01c410421661a2"},
+    {file = "newrelic-9.3.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8577a0f733174bee70a147f71aa061fb44a593a1be841feffe12dff480c6e02e"},
+    {file = "newrelic-9.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:234594655ac0fbe938d34ce5d5d38549d0f5cc11d0552170903ad09574bb4499"},
+    {file = "newrelic-9.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce38949404e974b566b21487394f5ea36a1fb80ba36cc4a6e8fb968d2e150ab"},
+    {file = "newrelic-9.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a750ffed8aedacdafcb548b3d3f45630a96862db630c9f72520ebbfe91e4e9e0"},
+    {file = "newrelic-9.3.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01d0f0c22b1290dbd2756ef120cfbe154179aae35e1dfc579f8bfd574066105"},
+    {file = "newrelic-9.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53e860c6eacfdef879f23fbbf7d76d8bbb90b725a1c422f62439c6edfceebc21"},
+    {file = "newrelic-9.3.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4caad3017cc978f3130fe2f3933f233c214a4850a595458b733282b3b7f7e886"},
+    {file = "newrelic-9.3.0.tar.gz", hash = "sha256:c2dd685527433f6b6fbffe58f83852b46c24b9713ebb8ee7af647e04c2de3ee4"},
 ]

 [package.extras]
--- a/tests/app/celery/test_scheduled_tasks.py
+++ b/tests/app/celery/test_scheduled_tasks.py
@@ -8,6 +8,7 @@ from notifications_utils.clients.zendesk.zendesk_client import NotifySupportTick

 from app.celery import scheduled_tasks
 from app.celery.scheduled_tasks import (
+    check_db_notification_fails,
    check_for_missing_rows_in_completed_jobs,
    check_for_services_with_high_failure_rates_or_sending_to_tv_numbers,
    check_job_status,
@@ -53,6 +54,51 @@ def test_should_call_expire_or_delete_invotations_on_expire_or_delete_invitation
    )


+def test_should_check_db_notification_fails_task_over_100_percent(
+    notify_db_session, mocker
+):
+    mock_dao = mocker.patch(
+        "app.celery.scheduled_tasks.dao_get_failed_notification_count"
+    )
+    mock_provider = mocker.patch("app.celery.scheduled_tasks.provider_to_use")
+    mock_dao.return_value = 100000
+    check_db_notification_fails()
+    assert mock_provider.call_count == 1
+
+
+def test_should_check_db_notification_fails_task_less_than_25_percent(
+    notify_db_session, mocker
+):
+    mock_dao = mocker.patch(
+        "app.celery.scheduled_tasks.dao_get_failed_notification_count"
+    )
+    mock_provider = mocker.patch("app.celery.scheduled_tasks.provider_to_use")
+    mock_dao.return_value = 10
+    check_db_notification_fails()
+    assert mock_provider.call_count == 0
+
+
+def test_should_check_db_notification_fails_task_over_50_percent(
+    notify_db_session, mocker
+):
+    # This tests that we only send an alert the 1st time we cross over 50%.  We don't want
+    # to be sending the same alert every hour, especially as it might be quite normal for the db
+    # fails to be at 25 or 50 for long periods of time.
+    mock_dao = mocker.patch(
+        "app.celery.scheduled_tasks.dao_get_failed_notification_count"
+    )
+    mock_provider = mocker.patch("app.celery.scheduled_tasks.provider_to_use")
+    mock_redis = mocker.patch("app.celery.scheduled_tasks.redis_store.get")
+    mock_dao.return_value = 5001
+    mock_redis.return_value = 0
+    check_db_notification_fails()
+    assert mock_provider.call_count == 1
+
+    mock_redis.return_value = 5001
+    check_db_notification_fails()
+    assert mock_provider.call_count == 1
+
+
 def test_should_update_scheduled_jobs_and_put_on_queue(mocker, sample_template):
    mocked = mocker.patch("app.celery.tasks.process_job.apply_async")