2021-03-10 13:55:06 +00:00
|
|
|
from datetime import datetime, timedelta
|
2019-01-14 17:22:41 +00:00
|
|
|
|
|
|
|
|
from flask import current_app
|
|
|
|
|
from sqlalchemy.exc import SQLAlchemyError
|
|
|
|
|
|
2023-04-25 07:50:56 -07:00
|
|
|
from app import notify_celery
|
2019-01-14 17:22:41 +00:00
|
|
|
from app.aws import s3
|
2023-05-23 08:31:30 -07:00
|
|
|
from app.aws.s3 import remove_csv_object
|
2022-10-03 17:16:59 -07:00
|
|
|
from app.celery.process_ses_receipts_tasks import check_and_queue_callback_task
|
2019-01-14 17:22:41 +00:00
|
|
|
from app.config import QueueNames
|
2021-03-10 13:55:06 +00:00
|
|
|
from app.cronitor import cronitor
|
2021-03-11 18:15:11 +00:00
|
|
|
from app.dao.fact_processing_time_dao import insert_update_processing_time
|
2019-02-26 17:57:35 +00:00
|
|
|
from app.dao.inbound_sms_dao import delete_inbound_sms_older_than_retention
|
2019-01-14 17:22:41 +00:00
|
|
|
from app.dao.jobs_dao import (
|
2021-03-10 13:55:06 +00:00
|
|
|
dao_archive_job,
|
2019-01-14 17:22:41 +00:00
|
|
|
dao_get_jobs_older_than_data_retention,
|
2023-05-23 08:31:30 -07:00
|
|
|
dao_get_unfinished_jobs,
|
2019-01-14 17:22:41 +00:00
|
|
|
)
|
|
|
|
|
from app.dao.notifications_dao import (
|
2021-03-11 18:53:43 +00:00
|
|
|
dao_get_notifications_processing_time_stats,
|
2019-01-14 17:22:41 +00:00
|
|
|
dao_timeout_notifications,
|
2022-01-24 15:54:37 +00:00
|
|
|
get_service_ids_with_notifications_before,
|
2021-12-06 09:30:48 +00:00
|
|
|
move_notifications_to_notification_history,
|
|
|
|
|
)
|
|
|
|
|
from app.dao.service_data_retention_dao import (
|
|
|
|
|
fetch_service_data_retention_for_all_services_by_notification_type,
|
2019-01-14 17:22:41 +00:00
|
|
|
)
|
2023-03-02 20:20:31 -05:00
|
|
|
from app.models import EMAIL_TYPE, SMS_TYPE, FactProcessingTime
|
2023-05-10 08:39:50 -07:00
|
|
|
from app.utils import get_midnight_in_utc
|
2019-01-14 17:22:41 +00:00
|
|
|
|
|
|
|
|
|
2019-01-16 14:11:03 +00:00
|
|
|
@notify_celery.task(name="remove_sms_email_jobs")
|
|
|
|
|
@cronitor("remove_sms_email_jobs")
|
2019-01-22 10:31:37 +00:00
|
|
|
def remove_sms_email_csv_files():
|
2019-01-16 14:11:03 +00:00
|
|
|
_remove_csv_files([EMAIL_TYPE, SMS_TYPE])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _remove_csv_files(job_types):
|
2019-01-14 17:22:41 +00:00
|
|
|
jobs = dao_get_jobs_older_than_data_retention(notification_types=job_types)
|
|
|
|
|
for job in jobs:
|
|
|
|
|
s3.remove_job_from_s3(job.service_id, job.id)
|
|
|
|
|
dao_archive_job(job)
|
|
|
|
|
current_app.logger.info("Job ID {} has been removed from s3.".format(job.id))
|
|
|
|
|
|
|
|
|
|
|
2023-05-23 08:31:30 -07:00
|
|
|
@notify_celery.task(name="cleanup-unfinished-jobs")
|
|
|
|
|
def cleanup_unfinished_jobs():
|
|
|
|
|
now = datetime.utcnow()
|
|
|
|
|
jobs = dao_get_unfinished_jobs()
|
|
|
|
|
for job in jobs:
|
|
|
|
|
# The query already checks that the processing_finished time is null, so here we are saying
|
|
|
|
|
# if it started more than 4 hours ago, that's too long
|
|
|
|
|
acceptable_finish_time = job.processing_started + timedelta(minutes=5)
|
|
|
|
|
if now > acceptable_finish_time:
|
|
|
|
|
remove_csv_object(job.original_file_name)
|
|
|
|
|
dao_archive_job(job)
|
|
|
|
|
|
|
|
|
|
|
2020-04-07 16:41:33 +01:00
|
|
|
@notify_celery.task(name="delete-notifications-older-than-retention")
|
|
|
|
|
def delete_notifications_older_than_retention():
|
put delete tasks on the reporting worker
they share a lot with the reporting tasks (creating ft_billing and
ft_notification_status), in that they're run nightly, take a long time,
and we see error messages if they get run multiple times (due to
visibility timeout).
The periodic app has two concurrent processes - previously there was
just one delete task, which would use one of those processes, while the
other process would pick up anything else on the queue (at that time of
night, the regular provider switch checks and scheduled job checks).
However, when we switched to running the three delete notification types
separately, we saw visibility timeout issues - three tasks would be
created, all three would be picked up by one celery instance, the two
worker processes would start on two of them, and the third would sit on
the box, wait longer than the visibility timeout to be picked up (and
acknowledged), and so SQS would assume the task was lost and replay it.
it's queues all the way down!
By putting them on the reporting worker we can take advantage of tuning
that app (for example setting the prefetch multiplier to one) which is
designed to run large tasks. We've also got more concurrent workers on
this box, so we can run all three tasks at once.
2021-12-03 13:28:16 +00:00
|
|
|
delete_email_notifications_older_than_retention.apply_async(queue=QueueNames.REPORTING)
|
|
|
|
|
delete_sms_notifications_older_than_retention.apply_async(queue=QueueNames.REPORTING)
|
2020-04-07 16:41:33 +01:00
|
|
|
|
|
|
|
|
|
2019-01-14 17:22:41 +00:00
|
|
|
@notify_celery.task(name="delete-sms-notifications")
|
2019-01-16 14:11:03 +00:00
|
|
|
@cronitor("delete-sms-notifications")
|
2019-02-26 17:57:35 +00:00
|
|
|
def delete_sms_notifications_older_than_retention():
|
2021-12-06 09:30:48 +00:00
|
|
|
_delete_notifications_older_than_retention_by_type('sms')
|
2019-01-14 17:22:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@notify_celery.task(name="delete-email-notifications")
|
2019-01-16 14:11:03 +00:00
|
|
|
@cronitor("delete-email-notifications")
|
2019-02-26 17:57:35 +00:00
|
|
|
def delete_email_notifications_older_than_retention():
|
2021-12-06 09:30:48 +00:00
|
|
|
_delete_notifications_older_than_retention_by_type('email')
|
2019-01-14 17:22:41 +00:00
|
|
|
|
|
|
|
|
|
2021-12-06 09:30:48 +00:00
|
|
|
def _delete_notifications_older_than_retention_by_type(notification_type):
|
|
|
|
|
flexible_data_retention = fetch_service_data_retention_for_all_services_by_notification_type(notification_type)
|
2021-12-14 13:06:04 +00:00
|
|
|
|
2021-12-06 09:30:48 +00:00
|
|
|
for f in flexible_data_retention:
|
2023-05-10 08:39:50 -07:00
|
|
|
day_to_delete_backwards_from = get_midnight_in_utc(datetime.utcnow()).date() \
|
|
|
|
|
- timedelta(days=f.days_of_retention)
|
2021-12-06 09:30:48 +00:00
|
|
|
|
|
|
|
|
delete_notifications_for_service_and_type.apply_async(queue=QueueNames.REPORTING, kwargs={
|
|
|
|
|
'service_id': f.service_id,
|
|
|
|
|
'notification_type': notification_type,
|
|
|
|
|
'datetime_to_delete_before': day_to_delete_backwards_from
|
|
|
|
|
})
|
|
|
|
|
|
2023-05-10 08:39:50 -07:00
|
|
|
seven_days_ago = get_midnight_in_utc(datetime.utcnow()).date() - timedelta(days=7)
|
|
|
|
|
|
2021-12-06 09:30:48 +00:00
|
|
|
service_ids_with_data_retention = {x.service_id for x in flexible_data_retention}
|
|
|
|
|
|
|
|
|
|
# get a list of all service ids that we'll need to delete for. Typically that might only be 5% of services.
|
|
|
|
|
# This query takes a couple of mins to run.
|
2022-01-24 15:54:37 +00:00
|
|
|
service_ids_that_have_sent_notifications_recently = get_service_ids_with_notifications_before(
|
2021-12-06 09:30:48 +00:00
|
|
|
notification_type,
|
|
|
|
|
seven_days_ago
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
service_ids_to_purge = service_ids_that_have_sent_notifications_recently - service_ids_with_data_retention
|
|
|
|
|
|
|
|
|
|
for service_id in service_ids_to_purge:
|
|
|
|
|
delete_notifications_for_service_and_type.apply_async(queue=QueueNames.REPORTING, kwargs={
|
|
|
|
|
'service_id': service_id,
|
|
|
|
|
'notification_type': notification_type,
|
|
|
|
|
'datetime_to_delete_before': seven_days_ago
|
|
|
|
|
})
|
|
|
|
|
|
2021-12-14 13:06:04 +00:00
|
|
|
current_app.logger.info(
|
|
|
|
|
f'delete-notifications-older-than-retention: triggered subtasks for notification_type {notification_type}: '
|
|
|
|
|
f'{len(service_ids_with_data_retention)} services with flexible data retention, '
|
|
|
|
|
f'{len(service_ids_to_purge)} services without flexible data retention'
|
|
|
|
|
)
|
|
|
|
|
|
2021-12-06 09:30:48 +00:00
|
|
|
|
|
|
|
|
@notify_celery.task(name='delete-notifications-for-service-and-type')
|
|
|
|
|
def delete_notifications_for_service_and_type(service_id, notification_type, datetime_to_delete_before):
|
|
|
|
|
start = datetime.utcnow()
|
|
|
|
|
num_deleted = move_notifications_to_notification_history(
|
|
|
|
|
notification_type,
|
|
|
|
|
service_id,
|
|
|
|
|
datetime_to_delete_before,
|
|
|
|
|
)
|
2021-12-14 16:20:50 +00:00
|
|
|
if num_deleted:
|
|
|
|
|
end = datetime.utcnow()
|
|
|
|
|
current_app.logger.info(
|
|
|
|
|
f'delete-notifications-for-service-and-type: '
|
|
|
|
|
f'service: {service_id}, '
|
|
|
|
|
f'notification_type: {notification_type}, '
|
|
|
|
|
f'count deleted: {num_deleted}, '
|
|
|
|
|
f'duration: {(end - start).seconds} seconds'
|
|
|
|
|
)
|
2019-01-14 17:22:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@notify_celery.task(name='timeout-sending-notifications')
|
2019-01-16 14:11:03 +00:00
|
|
|
@cronitor('timeout-sending-notifications')
|
2019-01-14 17:22:41 +00:00
|
|
|
def timeout_notifications():
|
2021-12-13 17:09:22 +00:00
|
|
|
notifications = ['dummy value so len() > 0']
|
|
|
|
|
|
2021-12-13 16:56:21 +00:00
|
|
|
cutoff_time = datetime.utcnow() - timedelta(
|
|
|
|
|
seconds=current_app.config.get('SENDING_NOTIFICATIONS_TIMEOUT_PERIOD')
|
|
|
|
|
)
|
|
|
|
|
|
2021-12-13 17:09:22 +00:00
|
|
|
while len(notifications) > 0:
|
2021-12-13 16:56:21 +00:00
|
|
|
notifications = dao_timeout_notifications(cutoff_time)
|
2021-11-08 14:18:21 +00:00
|
|
|
|
|
|
|
|
for notification in notifications:
|
2021-11-26 15:18:53 +00:00
|
|
|
check_and_queue_callback_task(notification)
|
2021-11-08 14:18:21 +00:00
|
|
|
|
|
|
|
|
current_app.logger.info(
|
|
|
|
|
"Timeout period reached for {} notifications, status has been updated.".format(len(notifications)))
|
|
|
|
|
|
2019-01-14 17:22:41 +00:00
|
|
|
|
|
|
|
|
@notify_celery.task(name="delete-inbound-sms")
|
2019-01-16 14:11:03 +00:00
|
|
|
@cronitor("delete-inbound-sms")
|
2019-02-26 17:57:35 +00:00
|
|
|
def delete_inbound_sms():
|
2019-01-14 17:22:41 +00:00
|
|
|
try:
|
|
|
|
|
start = datetime.utcnow()
|
2019-02-26 17:57:35 +00:00
|
|
|
deleted = delete_inbound_sms_older_than_retention()
|
2019-01-14 17:22:41 +00:00
|
|
|
current_app.logger.info(
|
|
|
|
|
"Delete inbound sms job started {} finished {} deleted {} inbound sms notifications".format(
|
|
|
|
|
start,
|
|
|
|
|
datetime.utcnow(),
|
|
|
|
|
deleted
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
except SQLAlchemyError:
|
|
|
|
|
current_app.logger.exception("Failed to delete inbound sms notifications")
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
2021-03-11 18:15:11 +00:00
|
|
|
@notify_celery.task(name='save-daily-notification-processing-time')
|
|
|
|
|
@cronitor("save-daily-notification-processing-time")
|
2022-11-21 12:05:23 -05:00
|
|
|
def save_daily_notification_processing_time(local_date=None):
|
2023-05-23 08:31:30 -07:00
|
|
|
|
2022-11-21 12:05:23 -05:00
|
|
|
# local_date is a string in the format of "YYYY-MM-DD"
|
|
|
|
|
if local_date is None:
|
2021-03-11 18:15:11 +00:00
|
|
|
# if a date is not provided, we run against yesterdays data
|
2022-11-21 12:05:23 -05:00
|
|
|
local_date = (datetime.utcnow() - timedelta(days=1)).date()
|
2021-03-11 18:15:11 +00:00
|
|
|
else:
|
2022-11-21 12:05:23 -05:00
|
|
|
local_date = datetime.strptime(local_date, "%Y-%m-%d").date()
|
2021-03-11 18:15:11 +00:00
|
|
|
|
2023-05-10 08:39:50 -07:00
|
|
|
start_time = get_midnight_in_utc(local_date)
|
|
|
|
|
end_time = get_midnight_in_utc(local_date + timedelta(days=1))
|
2021-03-11 18:53:43 +00:00
|
|
|
result = dao_get_notifications_processing_time_stats(start_time, end_time)
|
2021-03-11 18:15:11 +00:00
|
|
|
insert_update_processing_time(
|
|
|
|
|
FactProcessingTime(
|
2022-11-21 12:05:23 -05:00
|
|
|
local_date=local_date,
|
2021-03-11 18:15:11 +00:00
|
|
|
messages_total=result.messages_total,
|
|
|
|
|
messages_within_10_secs=result.messages_within_10_secs
|
|
|
|
|
)
|
|
|
|
|
)
|