mirror of
https://github.com/GSA/notifications-api.git
synced 2026-02-01 23:55:58 -05:00
Merge pull request #637 from GSA/notify-api-634
Create internal alarms/notifications for 50, 75, 90% of 10K db max
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from flask import current_app
|
||||
@@ -5,7 +6,7 @@ from notifications_utils.clients.zendesk.zendesk_client import NotifySupportTick
|
||||
from sqlalchemy import between
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from app import notify_celery, zendesk_client
|
||||
from app import notify_celery, redis_store, zendesk_client
|
||||
from app.celery.tasks import (
|
||||
get_recipient_csv_and_template_and_sender_id,
|
||||
process_incomplete_jobs,
|
||||
@@ -23,12 +24,16 @@ from app.dao.jobs_dao import (
|
||||
find_jobs_with_missing_rows,
|
||||
find_missing_row_for_job,
|
||||
)
|
||||
from app.dao.notifications_dao import notifications_not_yet_sent
|
||||
from app.dao.notifications_dao import (
|
||||
dao_get_failed_notification_count,
|
||||
notifications_not_yet_sent,
|
||||
)
|
||||
from app.dao.services_dao import (
|
||||
dao_find_services_sending_to_tv_numbers,
|
||||
dao_find_services_with_high_failure_rates,
|
||||
)
|
||||
from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago
|
||||
from app.delivery.send_to_providers import provider_to_use
|
||||
from app.models import (
|
||||
EMAIL_TYPE,
|
||||
JOB_STATUS_ERROR,
|
||||
@@ -39,6 +44,8 @@ from app.models import (
|
||||
)
|
||||
from app.notifications.process_notifications import send_notification_to_queue
|
||||
|
||||
MAX_NOTIFICATION_FAILS = 10000
|
||||
|
||||
|
||||
@notify_celery.task(name="run-scheduled-jobs")
|
||||
def run_scheduled_jobs():
|
||||
@@ -91,6 +98,78 @@ def expire_or_delete_invitations():
|
||||
raise
|
||||
|
||||
|
||||
@notify_celery.task(name="check-db-notification-fails")
|
||||
def check_db_notification_fails():
|
||||
"""
|
||||
We are going to use redis to keep track of the previous fail count.
|
||||
|
||||
If the number of fails is more than 100% of the limit, we want to send an alert every time this
|
||||
runs, because it is urgent to fix it.
|
||||
|
||||
If the number is more than 25%, 50% or 75% of the limit, we only want to send an alert
|
||||
on a breach. I.e., if the last number was at 23% and the current number is 27%, send an email.
|
||||
But if the last number was 26% and the current is 27%, don't.
|
||||
"""
|
||||
last_value = redis_store.get("LAST_DB_NOTIFICATION_COUNT")
|
||||
if not last_value:
|
||||
last_value = 0
|
||||
|
||||
failed_count = dao_get_failed_notification_count()
|
||||
if failed_count > last_value:
|
||||
redis_store.set("LAST_DB_NOTIFICATION_COUNT", failed_count)
|
||||
message = ""
|
||||
curr_env = os.getenv("ENVIRONMENT")
|
||||
if failed_count >= MAX_NOTIFICATION_FAILS:
|
||||
message = f"We are over 100% in the db for failed notifications on {curr_env}"
|
||||
elif (
|
||||
failed_count >= MAX_NOTIFICATION_FAILS * 0.9
|
||||
and last_value < MAX_NOTIFICATION_FAILS * 0.9
|
||||
):
|
||||
message = (
|
||||
"tts-notify-alerts@gsa.gov",
|
||||
f"We crossed above 90% in the db for failed notifications on {curr_env}",
|
||||
)
|
||||
|
||||
elif (
|
||||
failed_count >= MAX_NOTIFICATION_FAILS * 0.75
|
||||
and last_value < MAX_NOTIFICATION_FAILS * 0.75
|
||||
):
|
||||
message = (
|
||||
"tts-notify-alerts@gsa.gov",
|
||||
f"We crossed above 75% in the db for failed notifications on {curr_env}",
|
||||
)
|
||||
elif (
|
||||
failed_count >= MAX_NOTIFICATION_FAILS * 0.5
|
||||
and last_value < MAX_NOTIFICATION_FAILS * 0.5
|
||||
):
|
||||
message = (
|
||||
"tts-notify-alerts@gsa.gov",
|
||||
f"We crossed above 50% in the db for failed notifications on {curr_env}",
|
||||
)
|
||||
elif (
|
||||
failed_count >= MAX_NOTIFICATION_FAILS * 0.25
|
||||
and last_value < MAX_NOTIFICATION_FAILS * 0.25
|
||||
):
|
||||
message = (
|
||||
"tts-notify-alerts@gsa.gov",
|
||||
f"We crossed above 25% in the db for failed notifications on {curr_env}",
|
||||
)
|
||||
# suppress any spam coming from development tier
|
||||
if message and curr_env != "development":
|
||||
provider = provider_to_use(EMAIL_TYPE, False)
|
||||
from_address = '"{}" <{}@{}>'.format(
|
||||
"Failed Notification Count Alert",
|
||||
"test_sender",
|
||||
current_app.config["NOTIFY_EMAIL_DOMAIN"],
|
||||
)
|
||||
provider.send_email(
|
||||
from_address,
|
||||
"tts-notify-alerts@gsa.gov",
|
||||
"DB Notification Failures Level Breached",
|
||||
body=str(message),
|
||||
)
|
||||
|
||||
|
||||
@notify_celery.task(name="check-job-status")
|
||||
def check_job_status():
|
||||
"""
|
||||
|
||||
@@ -199,6 +199,11 @@ class Config(object):
|
||||
"schedule": timedelta(minutes=66),
|
||||
"options": {"queue": QueueNames.PERIODIC},
|
||||
},
|
||||
"check-db-notification-fails": {
|
||||
"task": "check-db-notification-fails",
|
||||
"schedule": crontab(minute="18, 48"),
|
||||
"options": {"queue": QueueNames.PERIODIC},
|
||||
},
|
||||
"check-job-status": {
|
||||
"task": "check-job-status",
|
||||
"schedule": crontab(),
|
||||
|
||||
@@ -20,6 +20,7 @@ from app.models import (
|
||||
EMAIL_TYPE,
|
||||
KEY_TYPE_TEST,
|
||||
NOTIFICATION_CREATED,
|
||||
NOTIFICATION_FAILED,
|
||||
NOTIFICATION_PENDING,
|
||||
NOTIFICATION_PENDING_VIRUS_CHECK,
|
||||
NOTIFICATION_PERMANENT_FAILURE,
|
||||
@@ -202,6 +203,11 @@ def dao_get_notification_count_for_service(*, service_id):
|
||||
return notification_count
|
||||
|
||||
|
||||
def dao_get_failed_notification_count():
|
||||
failed_count = Notification.query.filter_by(status=NOTIFICATION_FAILED).count()
|
||||
return failed_count
|
||||
|
||||
|
||||
def get_notification_with_personalisation(service_id, notification_id, key_type):
|
||||
filter_dict = {"service_id": service_id, "id": notification_id}
|
||||
if key_type:
|
||||
|
||||
Reference in New Issue
Block a user