From dc5b56ff789fe8d6354a55c9b2d93688a7530aff Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Fri, 20 Mar 2020 19:07:08 +0000 Subject: [PATCH 1/2] Change sql to chunk by hour to remove old notifications insert/update, and then delete notifications in hourly batches. This means that if the task gets interrupted part-way through, we'll have at least something to show for it. Previously we would insert and update into the history table but might not delete from the notification table properly. Keeping the offsets and limits for confidence around reliability and queries timing out. Keeping the join to notification_history to ensure we don't delete anything prematurely while our DB is in a bit of a weird state with lots of these tasks failing over the last week. --- app/dao/notifications_dao.py | 65 ++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 645bf9090..da8d357bf 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -307,19 +307,14 @@ def delete_notifications_older_than_retention_by_type(notification_type, qry_lim ).all() deleted = 0 for f in flexible_data_retention: - days_of_retention = get_london_midnight_in_utc( - convert_utc_to_bst(datetime.utcnow()).date()) - timedelta(days=f.days_of_retention) - - if notification_type == LETTER_TYPE: - _delete_letters_from_s3( - notification_type, f.service_id, days_of_retention, qry_limit - ) - - insert_update_notification_history(notification_type, days_of_retention, f.service_id) - current_app.logger.info( "Deleting {} notifications for service id: {}".format(notification_type, f.service_id)) - deleted += _delete_notifications(notification_type, days_of_retention, f.service_id, qry_limit) + + day_to_delete_backwards_from = get_london_midnight_in_utc( + convert_utc_to_bst(datetime.utcnow()).date()) - timedelta(days=f.days_of_retention) + + deleted += _move_notifications_to_notification_history( + notification_type, f.service_id, day_to_delete_backwards_from, qry_limit) current_app.logger.info( 'Deleting {} notifications for services without flexible data retention'.format(notification_type)) @@ -329,18 +324,54 @@ def delete_notifications_older_than_retention_by_type(notification_type, qry_lim service_ids_to_purge = db.session.query(Service.id).filter(Service.id.notin_(services_with_data_retention)).all() for service_id in service_ids_to_purge: - if notification_type == LETTER_TYPE: - _delete_letters_from_s3( - notification_type, service_id, seven_days_ago, qry_limit - ) - insert_update_notification_history(notification_type, seven_days_ago, service_id) - deleted += _delete_notifications(notification_type, seven_days_ago, service_id, qry_limit) + deleted += _move_notifications_to_notification_history( + notification_type, service_id, seven_days_ago, qry_limit) current_app.logger.info('Finished deleting {} notifications'.format(notification_type)) return deleted +def _move_notifications_to_notification_history(notification_type, service_id, day_to_delete_backwards_from, qry_limit): + deleted = 0 + if notification_type == LETTER_TYPE: + _delete_letters_from_s3( + notification_type, service_id, day_to_delete_backwards_from, qry_limit + ) + + stop = -1 # exclusive, we want to include 0 + step = -1 + for hour_delta in range(23, stop, step): + # We find the timestamp we want to delete all notifications backwards from + # We then start 23 hours ago, and do an insert notification history before deleting all notifications older + # We then look 22 hours ago, do an insert notifications history before deleting all notifications older + # We continue this until we reach the original timestamp we wanted to delete notifications backwardsfrom + # This enables us to break this into smaller database queries + timestamp_to_delete_backwards_from = day_to_delete_backwards_from - timedelta(hours=hour_delta) + + if service_id == '539d63a1-701d-400d-ab11-f3ee2319d4d4': + current_app.logger.info( + "Beginning insert_update_notification_history for GOV.UK Email from {} backwards".format( + timestamp_to_delete_backwards_from + ) + ) + + insert_update_notification_history(notification_type, timestamp_to_delete_backwards_from, service_id, qry_limit) + + if service_id == '539d63a1-701d-400d-ab11-f3ee2319d4d4': + current_app.logger.info( + "Beginning _delete_notifications for GOV.UK Email {} backwards".format( + timestamp_to_delete_backwards_from + ) + ) + + deleted += _delete_notifications( + notification_type, timestamp_to_delete_backwards_from, service_id, qry_limit + ) + + return deleted + + def _delete_notifications(notification_type, date_to_delete_from, service_id, query_limit): subquery = db.session.query( Notification.id From 8ea09be1ff021403219555b88f97347889cbaa95 Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Fri, 20 Mar 2020 19:11:24 +0000 Subject: [PATCH 2/2] order the insert/update by created_at you should always order by when doing a limit/offset, to guarantee that the second time you run that query, the order hasn't changed and you aren't just repeating the task with an overlap of notifications. Luckily, in this case we haven't lost any data, because: * we have an on conflict do update statement so when we returned duplicate rows it would just do an update * when we delete, we cross-reference with the notification history so if a row always got missed, we won't delete it. This resulted in, for example, govuk email still having a handful of notifications in the table from 9th despite the task running succesfully every day until the 18th of march. order by created_at ascending so that we start with oldest notifications first, in case it's interrupted part way through. --- app/dao/notifications_dao.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index da8d357bf..c91070eea 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -420,6 +420,8 @@ def insert_update_notification_history(notification_type, date_to_delete_from, s Notification.service_id == service_id, Notification.created_at < date_to_delete_from, Notification.key_type != KEY_TYPE_TEST + ).order_by( + Notification.created_at ) notifications_count = notification_query.count()