mirror of
https://github.com/GSA/notifications-api.git
synced 2026-02-01 07:35:34 -05:00
split delete task up into per service
we really don't gain anything by running each service delete in sequence - we get the services, and then just loop through them deleting per service. By deleting per service in separate tasks, we can take advantage of parallelism. the only thing we lose is some log lines but I don't think we're that interested in them. only set query limit at the move_notifications dao function - the task doesn't really care about the technical implementation of how it deletes the notifications
This commit is contained in:
@@ -5,6 +5,7 @@ from flask import current_app
|
||||
from notifications_utils.clients.zendesk.zendesk_client import (
|
||||
NotifySupportTicket,
|
||||
)
|
||||
from notifications_utils.timezones import convert_utc_to_bst
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
@@ -21,7 +22,11 @@ from app.dao.jobs_dao import (
|
||||
from app.dao.notifications_dao import (
|
||||
dao_get_notifications_processing_time_stats,
|
||||
dao_timeout_notifications,
|
||||
delete_notifications_older_than_retention_by_type,
|
||||
get_service_ids_that_have_notifications_from_before_timestamp,
|
||||
move_notifications_to_notification_history,
|
||||
)
|
||||
from app.dao.service_data_retention_dao import (
|
||||
fetch_service_data_retention_for_all_services_by_notification_type,
|
||||
)
|
||||
from app.models import (
|
||||
EMAIL_TYPE,
|
||||
@@ -68,45 +73,74 @@ def delete_notifications_older_than_retention():
|
||||
@notify_celery.task(name="delete-sms-notifications")
|
||||
@cronitor("delete-sms-notifications")
|
||||
def delete_sms_notifications_older_than_retention():
|
||||
start = datetime.utcnow()
|
||||
deleted = delete_notifications_older_than_retention_by_type('sms')
|
||||
current_app.logger.info(
|
||||
"Delete {} job started {} finished {} deleted {} sms notifications".format(
|
||||
'sms',
|
||||
start,
|
||||
datetime.utcnow(),
|
||||
deleted
|
||||
)
|
||||
)
|
||||
_delete_notifications_older_than_retention_by_type('sms')
|
||||
|
||||
|
||||
@notify_celery.task(name="delete-email-notifications")
|
||||
@cronitor("delete-email-notifications")
|
||||
def delete_email_notifications_older_than_retention():
|
||||
start = datetime.utcnow()
|
||||
deleted = delete_notifications_older_than_retention_by_type('email')
|
||||
current_app.logger.info(
|
||||
"Delete {} job started {} finished {} deleted {} email notifications".format(
|
||||
'email',
|
||||
start,
|
||||
datetime.utcnow(),
|
||||
deleted
|
||||
)
|
||||
)
|
||||
_delete_notifications_older_than_retention_by_type('email')
|
||||
|
||||
|
||||
@notify_celery.task(name="delete-letter-notifications")
|
||||
@cronitor("delete-letter-notifications")
|
||||
def delete_letter_notifications_older_than_retention():
|
||||
start = datetime.utcnow()
|
||||
deleted = delete_notifications_older_than_retention_by_type('letter')
|
||||
_delete_notifications_older_than_retention_by_type('letter')
|
||||
|
||||
|
||||
def _delete_notifications_older_than_retention_by_type(notification_type):
|
||||
current_app.logger.info(
|
||||
"Delete {} job started {} finished {} deleted {} letter notifications".format(
|
||||
'letter',
|
||||
start,
|
||||
datetime.utcnow(),
|
||||
deleted
|
||||
'Deleting {} notifications for services with flexible data retention'.format(notification_type))
|
||||
|
||||
flexible_data_retention = fetch_service_data_retention_for_all_services_by_notification_type(notification_type)
|
||||
for f in flexible_data_retention:
|
||||
day_to_delete_backwards_from = get_london_midnight_in_utc(
|
||||
convert_utc_to_bst(datetime.utcnow()).date() - timedelta(days=f.days_of_retention)
|
||||
)
|
||||
|
||||
delete_notifications_for_service_and_type.apply_async(queue=QueueNames.REPORTING, kwargs={
|
||||
'service_id': f.service_id,
|
||||
'notification_type': notification_type,
|
||||
'datetime_to_delete_before': day_to_delete_backwards_from
|
||||
})
|
||||
|
||||
seven_days_ago = get_london_midnight_in_utc(convert_utc_to_bst(datetime.utcnow()).date() - timedelta(days=7))
|
||||
service_ids_with_data_retention = {x.service_id for x in flexible_data_retention}
|
||||
|
||||
# get a list of all service ids that we'll need to delete for. Typically that might only be 5% of services.
|
||||
# This query takes a couple of mins to run.
|
||||
service_ids_that_have_sent_notifications_recently = get_service_ids_that_have_notifications_from_before_timestamp(
|
||||
notification_type,
|
||||
seven_days_ago
|
||||
)
|
||||
|
||||
service_ids_to_purge = service_ids_that_have_sent_notifications_recently - service_ids_with_data_retention
|
||||
|
||||
current_app.logger.info('Deleting {} notifications for {} services without flexible data retention'.format(
|
||||
notification_type,
|
||||
len(service_ids_to_purge)
|
||||
))
|
||||
|
||||
for service_id in service_ids_to_purge:
|
||||
delete_notifications_for_service_and_type.apply_async(queue=QueueNames.REPORTING, kwargs={
|
||||
'service_id': service_id,
|
||||
'notification_type': notification_type,
|
||||
'datetime_to_delete_before': seven_days_ago
|
||||
})
|
||||
|
||||
|
||||
@notify_celery.task(name='delete-notifications-for-service-and-type')
|
||||
def delete_notifications_for_service_and_type(service_id, notification_type, datetime_to_delete_before):
|
||||
start = datetime.utcnow()
|
||||
num_deleted = move_notifications_to_notification_history(
|
||||
notification_type,
|
||||
service_id,
|
||||
datetime_to_delete_before,
|
||||
)
|
||||
end = datetime.utcnow()
|
||||
current_app.logger.info(
|
||||
f'Deleted {num_deleted} {notification_type} notifications for '
|
||||
f'service id: {service_id} in {(end - start).seconds} seconds'
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user