Merge pull request #3063 from alphagov/letter-retention-fixes-third-approach

Do not let us delete letters that have not reached a final state
This commit is contained in:
David McDonald
2020-12-21 12:51:29 +00:00
committed by GitHub
3 changed files with 107 additions and 22 deletions

View File

@@ -43,6 +43,7 @@ from app.models import (
NOTIFICATION_TEMPORARY_FAILURE, NOTIFICATION_TEMPORARY_FAILURE,
NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_PERMANENT_FAILURE,
NOTIFICATION_SENT, NOTIFICATION_SENT,
NOTIFICATION_STATUS_TYPES_COMPLETED,
SMS_TYPE, SMS_TYPE,
EMAIL_TYPE, EMAIL_TYPE,
ServiceDataRetention, ServiceDataRetention,
@@ -357,6 +358,20 @@ def insert_notification_history_delete_notifications(
AND key_type in ('normal', 'team') AND key_type in ('normal', 'team')
limit :qry_limit limit :qry_limit
""" """
select_into_temp_table_for_letters = """
CREATE TEMP TABLE NOTIFICATION_ARCHIVE AS
SELECT id, job_id, job_row_number, service_id, template_id, template_version, api_key_id,
key_type, notification_type, created_at, sent_at, sent_by, updated_at, reference, billable_units,
client_reference, international, phone_prefix, rate_multiplier, notification_status,
created_by_id, postage, document_download_count
FROM notifications
WHERE service_id = :service_id
AND notification_type = :notification_type
AND created_at < :timestamp_to_delete_backwards_from
AND notification_status NOT IN ('pending-virus-check', 'created', 'sending')
AND key_type in ('normal', 'team')
limit :qry_limit
"""
# Insert into NotificationHistory if the row already exists do nothing. # Insert into NotificationHistory if the row already exists do nothing.
insert_query = """ insert_query = """
insert into notification_history insert into notification_history
@@ -376,7 +391,9 @@ def insert_notification_history_delete_notifications(
} }
db.session.execute(drop_table_if_exists) db.session.execute(drop_table_if_exists)
db.session.execute(select_into_temp_table, input_params)
select_to_use = select_into_temp_table_for_letters if notification_type == 'letter' else select_into_temp_table
db.session.execute(select_to_use, input_params)
result = db.session.execute("select * from NOTIFICATION_ARCHIVE") result = db.session.execute("select * from NOTIFICATION_ARCHIVE")
@@ -419,29 +436,31 @@ def _move_notifications_to_notification_history(notification_type, service_id, d
def _delete_letters_from_s3( def _delete_letters_from_s3(
notification_type, service_id, date_to_delete_from, query_limit notification_type, service_id, date_to_delete_from, query_limit
): ):
bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME']
letters_to_delete_from_s3 = db.session.query( letters_to_delete_from_s3 = db.session.query(
Notification Notification
).filter( ).filter(
Notification.notification_type == notification_type, Notification.notification_type == notification_type,
Notification.created_at < date_to_delete_from, Notification.created_at < date_to_delete_from,
Notification.service_id == service_id Notification.service_id == service_id,
# although letters in non completed statuses do have PDFs in s3, they do not exist in the
# production-letters-pdf bucket as they never made it that far so we do not try and delete
# them from it
Notification.status.in_(NOTIFICATION_STATUS_TYPES_COMPLETED)
).limit(query_limit).all() ).limit(query_limit).all()
for letter in letters_to_delete_from_s3: for letter in letters_to_delete_from_s3:
bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] prefix = get_letter_pdf_filename(reference=letter.reference,
# I don't think we need this anymore, we should update the query to get letters sent 7 days ago crown=letter.service.crown,
if letter.sent_at: created_at=letter.created_at,
prefix = get_letter_pdf_filename(reference=letter.reference, ignore_folder=letter.key_type == KEY_TYPE_TEST,
crown=letter.service.crown, postage=letter.postage)
created_at=letter.created_at, s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix)
ignore_folder=letter.key_type == KEY_TYPE_TEST, for s3_object in s3_objects:
postage=letter.postage) try:
s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix) remove_s3_object(bucket_name, s3_object['Key'])
for s3_object in s3_objects: except ClientError:
try: current_app.logger.exception(
remove_s3_object(bucket_name, s3_object['Key']) "Could not delete S3 object with filename: {}".format(s3_object['Key']))
except ClientError:
current_app.logger.exception(
"Could not delete S3 object with filename: {}".format(s3_object['Key']))
@transactional @transactional

View File

@@ -212,13 +212,79 @@ def test_delete_notifications_delete_notification_type_for_default_time_if_no_da
assert Notification.query.filter_by(notification_type='email').count() == 1 assert Notification.query.filter_by(notification_type='email').count() == 1
def test_delete_notifications_does_try_to_delete_from_s3_when_letter_has_not_been_sent(sample_service, mocker): @pytest.mark.parametrize(
'notification_status', ['validation-failed', 'virus-scan-failed']
)
def test_delete_notifications_deletes_letters_not_sent_and_in_final_state_from_table_but_not_s3(
sample_service, mocker, notification_status
):
mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects") mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects")
letter_template = create_template(service=sample_service, template_type='letter') letter_template = create_template(service=sample_service, template_type='letter')
create_notification(
template=letter_template,
status=notification_status,
reference='LETTER_REF',
created_at=datetime.utcnow() - timedelta(days=14)
)
assert Notification.query.count() == 1
assert NotificationHistory.query.count() == 0
create_notification(template=letter_template, status='sending', delete_notifications_older_than_retention_by_type('letter')
reference='LETTER_REF')
delete_notifications_older_than_retention_by_type('email', qry_limit=1) assert Notification.query.count() == 0
assert NotificationHistory.query.count() == 1
mock_get_s3.assert_not_called()
@pytest.mark.parametrize('notification_status', ['delivered', 'returned-letter', 'technical-failure'])
def test_delete_notifications_deletes_letters_sent_and_in_final_state_from_table_and_s3(
sample_service, mocker, notification_status
):
mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects")
letter_template = create_template(service=sample_service, template_type='letter')
eight_days_ago = datetime.utcnow() - timedelta(days=8)
create_notification(
template=letter_template,
status=notification_status,
reference='LETTER_REF',
created_at=eight_days_ago,
sent_at=eight_days_ago
)
assert Notification.query.count() == 1
assert NotificationHistory.query.count() == 0
delete_notifications_older_than_retention_by_type('letter')
assert Notification.query.count() == 0
assert NotificationHistory.query.count() == 1
mock_get_s3.assert_called_once_with(
bucket_name=current_app.config['LETTERS_PDF_BUCKET_NAME'],
subfolder="{}/NOTIFY.LETTER_REF.D.2.C.C.{}.PDF".format(
str(eight_days_ago.date()),
eight_days_ago.strftime('%Y%m%d%H%M%S')
)
)
@pytest.mark.parametrize('notification_status', ['pending-virus-check', 'created', 'sending'])
def test_delete_notifications_does_not_delete_letters_not_yet_in_final_state(
sample_service, mocker, notification_status
):
mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_bucket_objects")
letter_template = create_template(service=sample_service, template_type='letter')
create_notification(
template=letter_template,
status=notification_status,
reference='LETTER_REF',
created_at=datetime.utcnow() - timedelta(days=8),
)
assert Notification.query.count() == 1
assert NotificationHistory.query.count() == 0
delete_notifications_older_than_retention_by_type('letter')
assert Notification.query.count() == 1
assert NotificationHistory.query.count() == 0
mock_get_s3.assert_not_called() mock_get_s3.assert_not_called()

View File

@@ -256,7 +256,7 @@ def create_notification(
if to_field is None: if to_field is None:
to_field = '+447700900855' if template.template_type == SMS_TYPE else 'test@example.com' to_field = '+447700900855' if template.template_type == SMS_TYPE else 'test@example.com'
if status != 'created': if status not in ('created', 'validation-failed', 'virus-scan-failed', 'pending-virus-check'):
sent_at = sent_at or datetime.utcnow() sent_at = sent_at or datetime.utcnow()
updated_at = updated_at or datetime.utcnow() updated_at = updated_at or datetime.utcnow()