From 219023f4c666a032850e8146edeaccb87284ae64 Mon Sep 17 00:00:00 2001 From: David McDonald Date: Tue, 15 Dec 2020 09:48:08 +0000 Subject: [PATCH 1/4] Fix test that was passing unintentionally This test was added in https://github.com/alphagov/notifications-api/commit/ebb43082d51b9f27b17190abb0537a710a544408 However, there are a few problems with it 1. The test name doesn't seem to match what the code is doing. It looks like instead that it is NOT trying to delete from s3 when the letter has not been sent and therefore I've updated the test name as such. 2. `delete_notifications_older_than_retention_by_type` was being called with `email` as it's argument which doesn't match. This is a test for letters. It definitely wouldn't do any looking in s3 for emails. 3. `created_at` needed bumping back into the past, past the default 7 days retention so these letters would be considered old enough to delete 4. For the letter to not be sent, it needs to be in `created`, not in `sending` so I have updated the status. Note, there could be other statuses which class as 'not sent' but this is the most obvious one to test with --- .../test_notification_dao_delete_notifications.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py index 091826430..8d4a2fb71 100644 --- a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py +++ b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py @@ -212,13 +212,17 @@ def test_delete_notifications_delete_notification_type_for_default_time_if_no_da assert Notification.query.filter_by(notification_type='email').count() == 1 -def test_delete_notifications_does_try_to_delete_from_s3_when_letter_has_not_been_sent(sample_service, mocker): +def test_delete_notifications_doesnt_try_to_delete_from_s3_when_letter_has_not_sent(sample_service, mocker): mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") letter_template = create_template(service=sample_service, template_type='letter') - create_notification(template=letter_template, status='sending', - reference='LETTER_REF') - delete_notifications_older_than_retention_by_type('email', qry_limit=1) + create_notification( + template=letter_template, + status='created', + reference='LETTER_REF', + created_at=datetime.utcnow() - timedelta(days=14) + ) + delete_notifications_older_than_retention_by_type('letter', qry_limit=1) mock_get_s3.assert_not_called() From 1bf9b29905c8eafce5b67d1c5829e6ddd603c9d9 Mon Sep 17 00:00:00 2001 From: David McDonald Date: Wed, 16 Dec 2020 10:39:31 +0000 Subject: [PATCH 2/4] Document behaviour of s3 letter deleting The behaviour was a bit of opaque so I have added tests around it so it's clear what it is doing and why. No functionality has changed --- app/dao/notifications_dao.py | 4 +- ...t_notification_dao_delete_notifications.py | 48 +++++++++++++++++-- tests/app/db.py | 2 +- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index aa57779f7..ba50aea25 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -428,7 +428,9 @@ def _delete_letters_from_s3( ).limit(query_limit).all() for letter in letters_to_delete_from_s3: bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] - # I don't think we need this anymore, we should update the query to get letters sent 7 days ago + # although letters without a `sent_at` timestamp do have PDFs, they do not exist in the + # production-letters-pdf bucket as they never made it that far so we do not try and delete + # them from it if letter.sent_at: prefix = get_letter_pdf_filename(reference=letter.reference, crown=letter.service.crown, diff --git a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py index 8d4a2fb71..9ab5872e0 100644 --- a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py +++ b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py @@ -212,20 +212,60 @@ def test_delete_notifications_delete_notification_type_for_default_time_if_no_da assert Notification.query.filter_by(notification_type='email').count() == 1 -def test_delete_notifications_doesnt_try_to_delete_from_s3_when_letter_has_not_sent(sample_service, mocker): +@pytest.mark.parametrize( + 'notification_status', ['created', 'validation-failed', 'virus-scan-failed', 'pending-virus-check'] +) +def test_delete_notifications_deletes_letters_not_sent_from_table_but_not_s3( + sample_service, mocker, notification_status +): mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") letter_template = create_template(service=sample_service, template_type='letter') - create_notification( template=letter_template, - status='created', + status=notification_status, reference='LETTER_REF', created_at=datetime.utcnow() - timedelta(days=14) ) - delete_notifications_older_than_retention_by_type('letter', qry_limit=1) + assert Notification.query.count() == 1 + assert NotificationHistory.query.count() == 0 + + delete_notifications_older_than_retention_by_type('letter') + + assert Notification.query.count() == 0 + assert NotificationHistory.query.count() == 1 mock_get_s3.assert_not_called() +@pytest.mark.parametrize('notification_status', ['sending', 'delivered', 'returned-letter', 'technical-failure']) +def test_delete_notifications_deletes_letters_sent_from_table_and_s3( + sample_service, mocker, notification_status +): + mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") + letter_template = create_template(service=sample_service, template_type='letter') + eight_days_ago = datetime.utcnow() - timedelta(days=8) + create_notification( + template=letter_template, + status=notification_status, + reference='LETTER_REF', + created_at=eight_days_ago, + sent_at=eight_days_ago + ) + assert Notification.query.count() == 1 + assert NotificationHistory.query.count() == 0 + + delete_notifications_older_than_retention_by_type('letter') + + assert Notification.query.count() == 0 + assert NotificationHistory.query.count() == 1 + mock_get_s3.assert_called_once_with( + bucket_name=current_app.config['LETTERS_PDF_BUCKET_NAME'], + subfolder="{}/NOTIFY.LETTER_REF.D.2.C.C.{}.PDF".format( + str(eight_days_ago.date()), + eight_days_ago.strftime('%Y%m%d%H%M%S') + ) + ) + + @freeze_time('2020-03-25 00:01') def test_delete_notifications_calls_subquery_multiple_times(sample_template): create_notification(template=sample_template, created_at=datetime.now() - timedelta(days=7, minutes=3), diff --git a/tests/app/db.py b/tests/app/db.py index 8208fee49..336c0f50b 100644 --- a/tests/app/db.py +++ b/tests/app/db.py @@ -256,7 +256,7 @@ def create_notification( if to_field is None: to_field = '+447700900855' if template.template_type == SMS_TYPE else 'test@example.com' - if status != 'created': + if status not in ('created', 'validation-failed', 'virus-scan-failed', 'pending-virus-check'): sent_at = sent_at or datetime.utcnow() updated_at = updated_at or datetime.utcnow() From 47e146f010f5dc7e83f3525e547b1775868e485d Mon Sep 17 00:00:00 2001 From: David McDonald Date: Wed, 16 Dec 2020 10:40:30 +0000 Subject: [PATCH 3/4] Move variable out of loop that didn't need to be --- app/dao/notifications_dao.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index ba50aea25..a6198f242 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -419,6 +419,7 @@ def _move_notifications_to_notification_history(notification_type, service_id, d def _delete_letters_from_s3( notification_type, service_id, date_to_delete_from, query_limit ): + bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] letters_to_delete_from_s3 = db.session.query( Notification ).filter( @@ -427,7 +428,6 @@ def _delete_letters_from_s3( Notification.service_id == service_id ).limit(query_limit).all() for letter in letters_to_delete_from_s3: - bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] # although letters without a `sent_at` timestamp do have PDFs, they do not exist in the # production-letters-pdf bucket as they never made it that far so we do not try and delete # them from it From e35ea57ba28e7931a05b32812a731469224617e7 Mon Sep 17 00:00:00 2001 From: David McDonald Date: Wed, 16 Dec 2020 10:50:11 +0000 Subject: [PATCH 4/4] Do not delete letters if not in final state A few weeks ago, we deleted some pdf letters that had reached their retention period. However, these letters were in the 'created' state so it's very arguable that we should not have deleted them because we were expecting to resend them and were unable to. Part of the reason for this is that we marked the letters back to `created` as the status but we did not nullify the `sent_at` timestamp, meaning the check on https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346 did not catch it. Regardless of that check, which controls whether the files were removed from S3, they were also archived into the `notification_history` table as by default. This commit does changes our code such that letters that are not in their final state do not go through our retention process. This could mean they violate their retention policy but that is likely the lesser of two evils (the other being we delete them and are unable to resend them). Note, `sending` letters have been included in those not to be removed because there is a risk that we give the letter to DVLA and put it in `sending` but then they come back to us later telling us they've had problems and require us to resend. --- app/dao/notifications_dao.py | 53 ++++++++++++------- ...t_notification_dao_delete_notifications.py | 30 +++++++++-- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index a6198f242..1d47b496d 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -43,6 +43,7 @@ from app.models import ( NOTIFICATION_TEMPORARY_FAILURE, NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_SENT, + NOTIFICATION_STATUS_TYPES_COMPLETED, SMS_TYPE, EMAIL_TYPE, ServiceDataRetention, @@ -357,6 +358,20 @@ def insert_notification_history_delete_notifications( AND key_type in ('normal', 'team') limit :qry_limit """ + select_into_temp_table_for_letters = """ + CREATE TEMP TABLE NOTIFICATION_ARCHIVE AS + SELECT id, job_id, job_row_number, service_id, template_id, template_version, api_key_id, + key_type, notification_type, created_at, sent_at, sent_by, updated_at, reference, billable_units, + client_reference, international, phone_prefix, rate_multiplier, notification_status, + created_by_id, postage, document_download_count + FROM notifications + WHERE service_id = :service_id + AND notification_type = :notification_type + AND created_at < :timestamp_to_delete_backwards_from + AND notification_status NOT IN ('pending-virus-check', 'created', 'sending') + AND key_type in ('normal', 'team') + limit :qry_limit + """ # Insert into NotificationHistory if the row already exists do nothing. insert_query = """ insert into notification_history @@ -376,7 +391,9 @@ def insert_notification_history_delete_notifications( } db.session.execute(drop_table_if_exists) - db.session.execute(select_into_temp_table, input_params) + + select_to_use = select_into_temp_table_for_letters if notification_type == 'letter' else select_into_temp_table + db.session.execute(select_to_use, input_params) result = db.session.execute("select * from NOTIFICATION_ARCHIVE") @@ -425,25 +442,25 @@ def _delete_letters_from_s3( ).filter( Notification.notification_type == notification_type, Notification.created_at < date_to_delete_from, - Notification.service_id == service_id - ).limit(query_limit).all() - for letter in letters_to_delete_from_s3: - # although letters without a `sent_at` timestamp do have PDFs, they do not exist in the + Notification.service_id == service_id, + # although letters in non completed statuses do have PDFs in s3, they do not exist in the # production-letters-pdf bucket as they never made it that far so we do not try and delete # them from it - if letter.sent_at: - prefix = get_letter_pdf_filename(reference=letter.reference, - crown=letter.service.crown, - created_at=letter.created_at, - ignore_folder=letter.key_type == KEY_TYPE_TEST, - postage=letter.postage) - s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) - for s3_object in s3_objects: - try: - remove_s3_object(bucket_name, s3_object['Key']) - except ClientError: - current_app.logger.exception( - "Could not delete S3 object with filename: {}".format(s3_object['Key'])) + Notification.status.in_(NOTIFICATION_STATUS_TYPES_COMPLETED) + ).limit(query_limit).all() + for letter in letters_to_delete_from_s3: + prefix = get_letter_pdf_filename(reference=letter.reference, + crown=letter.service.crown, + created_at=letter.created_at, + ignore_folder=letter.key_type == KEY_TYPE_TEST, + postage=letter.postage) + s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) + for s3_object in s3_objects: + try: + remove_s3_object(bucket_name, s3_object['Key']) + except ClientError: + current_app.logger.exception( + "Could not delete S3 object with filename: {}".format(s3_object['Key'])) @transactional diff --git a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py index 9ab5872e0..fe819d5fd 100644 --- a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py +++ b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py @@ -213,9 +213,9 @@ def test_delete_notifications_delete_notification_type_for_default_time_if_no_da @pytest.mark.parametrize( - 'notification_status', ['created', 'validation-failed', 'virus-scan-failed', 'pending-virus-check'] + 'notification_status', ['validation-failed', 'virus-scan-failed'] ) -def test_delete_notifications_deletes_letters_not_sent_from_table_but_not_s3( +def test_delete_notifications_deletes_letters_not_sent_and_in_final_state_from_table_but_not_s3( sample_service, mocker, notification_status ): mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") @@ -236,8 +236,8 @@ def test_delete_notifications_deletes_letters_not_sent_from_table_but_not_s3( mock_get_s3.assert_not_called() -@pytest.mark.parametrize('notification_status', ['sending', 'delivered', 'returned-letter', 'technical-failure']) -def test_delete_notifications_deletes_letters_sent_from_table_and_s3( +@pytest.mark.parametrize('notification_status', ['delivered', 'returned-letter', 'technical-failure']) +def test_delete_notifications_deletes_letters_sent_and_in_final_state_from_table_and_s3( sample_service, mocker, notification_status ): mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") @@ -266,6 +266,28 @@ def test_delete_notifications_deletes_letters_sent_from_table_and_s3( ) +@pytest.mark.parametrize('notification_status', ['pending-virus-check', 'created', 'sending']) +def test_delete_notifications_does_not_delete_letters_not_yet_in_final_state( + sample_service, mocker, notification_status +): + mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") + letter_template = create_template(service=sample_service, template_type='letter') + create_notification( + template=letter_template, + status=notification_status, + reference='LETTER_REF', + created_at=datetime.utcnow() - timedelta(days=8), + ) + assert Notification.query.count() == 1 + assert NotificationHistory.query.count() == 0 + + delete_notifications_older_than_retention_by_type('letter') + + assert Notification.query.count() == 1 + assert NotificationHistory.query.count() == 0 + mock_get_s3.assert_not_called() + + @freeze_time('2020-03-25 00:01') def test_delete_notifications_calls_subquery_multiple_times(sample_template): create_notification(template=sample_template, created_at=datetime.now() - timedelta(days=7, minutes=3),