From e35ea57ba28e7931a05b32812a731469224617e7 Mon Sep 17 00:00:00 2001 From: David McDonald Date: Wed, 16 Dec 2020 10:50:11 +0000 Subject: [PATCH] Do not delete letters if not in final state A few weeks ago, we deleted some pdf letters that had reached their retention period. However, these letters were in the 'created' state so it's very arguable that we should not have deleted them because we were expecting to resend them and were unable to. Part of the reason for this is that we marked the letters back to `created` as the status but we did not nullify the `sent_at` timestamp, meaning the check on https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346 did not catch it. Regardless of that check, which controls whether the files were removed from S3, they were also archived into the `notification_history` table as by default. This commit does changes our code such that letters that are not in their final state do not go through our retention process. This could mean they violate their retention policy but that is likely the lesser of two evils (the other being we delete them and are unable to resend them). Note, `sending` letters have been included in those not to be removed because there is a risk that we give the letter to DVLA and put it in `sending` but then they come back to us later telling us they've had problems and require us to resend. --- app/dao/notifications_dao.py | 53 ++++++++++++------- ...t_notification_dao_delete_notifications.py | 30 +++++++++-- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index a6198f242..1d47b496d 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -43,6 +43,7 @@ from app.models import ( NOTIFICATION_TEMPORARY_FAILURE, NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_SENT, + NOTIFICATION_STATUS_TYPES_COMPLETED, SMS_TYPE, EMAIL_TYPE, ServiceDataRetention, @@ -357,6 +358,20 @@ def insert_notification_history_delete_notifications( AND key_type in ('normal', 'team') limit :qry_limit """ + select_into_temp_table_for_letters = """ + CREATE TEMP TABLE NOTIFICATION_ARCHIVE AS + SELECT id, job_id, job_row_number, service_id, template_id, template_version, api_key_id, + key_type, notification_type, created_at, sent_at, sent_by, updated_at, reference, billable_units, + client_reference, international, phone_prefix, rate_multiplier, notification_status, + created_by_id, postage, document_download_count + FROM notifications + WHERE service_id = :service_id + AND notification_type = :notification_type + AND created_at < :timestamp_to_delete_backwards_from + AND notification_status NOT IN ('pending-virus-check', 'created', 'sending') + AND key_type in ('normal', 'team') + limit :qry_limit + """ # Insert into NotificationHistory if the row already exists do nothing. insert_query = """ insert into notification_history @@ -376,7 +391,9 @@ def insert_notification_history_delete_notifications( } db.session.execute(drop_table_if_exists) - db.session.execute(select_into_temp_table, input_params) + + select_to_use = select_into_temp_table_for_letters if notification_type == 'letter' else select_into_temp_table + db.session.execute(select_to_use, input_params) result = db.session.execute("select * from NOTIFICATION_ARCHIVE") @@ -425,25 +442,25 @@ def _delete_letters_from_s3( ).filter( Notification.notification_type == notification_type, Notification.created_at < date_to_delete_from, - Notification.service_id == service_id - ).limit(query_limit).all() - for letter in letters_to_delete_from_s3: - # although letters without a `sent_at` timestamp do have PDFs, they do not exist in the + Notification.service_id == service_id, + # although letters in non completed statuses do have PDFs in s3, they do not exist in the # production-letters-pdf bucket as they never made it that far so we do not try and delete # them from it - if letter.sent_at: - prefix = get_letter_pdf_filename(reference=letter.reference, - crown=letter.service.crown, - created_at=letter.created_at, - ignore_folder=letter.key_type == KEY_TYPE_TEST, - postage=letter.postage) - s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) - for s3_object in s3_objects: - try: - remove_s3_object(bucket_name, s3_object['Key']) - except ClientError: - current_app.logger.exception( - "Could not delete S3 object with filename: {}".format(s3_object['Key'])) + Notification.status.in_(NOTIFICATION_STATUS_TYPES_COMPLETED) + ).limit(query_limit).all() + for letter in letters_to_delete_from_s3: + prefix = get_letter_pdf_filename(reference=letter.reference, + crown=letter.service.crown, + created_at=letter.created_at, + ignore_folder=letter.key_type == KEY_TYPE_TEST, + postage=letter.postage) + s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) + for s3_object in s3_objects: + try: + remove_s3_object(bucket_name, s3_object['Key']) + except ClientError: + current_app.logger.exception( + "Could not delete S3 object with filename: {}".format(s3_object['Key'])) @transactional diff --git a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py index 9ab5872e0..fe819d5fd 100644 --- a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py +++ b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py @@ -213,9 +213,9 @@ def test_delete_notifications_delete_notification_type_for_default_time_if_no_da @pytest.mark.parametrize( - 'notification_status', ['created', 'validation-failed', 'virus-scan-failed', 'pending-virus-check'] + 'notification_status', ['validation-failed', 'virus-scan-failed'] ) -def test_delete_notifications_deletes_letters_not_sent_from_table_but_not_s3( +def test_delete_notifications_deletes_letters_not_sent_and_in_final_state_from_table_but_not_s3( sample_service, mocker, notification_status ): mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") @@ -236,8 +236,8 @@ def test_delete_notifications_deletes_letters_not_sent_from_table_but_not_s3( mock_get_s3.assert_not_called() -@pytest.mark.parametrize('notification_status', ['sending', 'delivered', 'returned-letter', 'technical-failure']) -def test_delete_notifications_deletes_letters_sent_from_table_and_s3( +@pytest.mark.parametrize('notification_status', ['delivered', 'returned-letter', 'technical-failure']) +def test_delete_notifications_deletes_letters_sent_and_in_final_state_from_table_and_s3( sample_service, mocker, notification_status ): mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") @@ -266,6 +266,28 @@ def test_delete_notifications_deletes_letters_sent_from_table_and_s3( ) +@pytest.mark.parametrize('notification_status', ['pending-virus-check', 'created', 'sending']) +def test_delete_notifications_does_not_delete_letters_not_yet_in_final_state( + sample_service, mocker, notification_status +): + mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") + letter_template = create_template(service=sample_service, template_type='letter') + create_notification( + template=letter_template, + status=notification_status, + reference='LETTER_REF', + created_at=datetime.utcnow() - timedelta(days=8), + ) + assert Notification.query.count() == 1 + assert NotificationHistory.query.count() == 0 + + delete_notifications_older_than_retention_by_type('letter') + + assert Notification.query.count() == 1 + assert NotificationHistory.query.count() == 0 + mock_get_s3.assert_not_called() + + @freeze_time('2020-03-25 00:01') def test_delete_notifications_calls_subquery_multiple_times(sample_template): create_notification(template=sample_template, created_at=datetime.now() - timedelta(days=7, minutes=3),