diff --git a/app/aws/s3.py b/app/aws/s3.py index a440745da..299d7de76 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -66,6 +66,17 @@ def get_s3_bucket_objects(bucket_name, subfolder='', older_than=7, limit_days=2) return all_objects_in_bucket +def get_s3_object_by_prefix(bucket_name, prefix): + boto_client = client('s3', current_app.config['AWS_REGION']) + paginator = boto_client.get_paginator('list_objects_v2') + page_iterator = paginator.paginate( + Bucket=bucket_name, + Prefix=prefix + ) + + return page_iterator + + def filter_s3_bucket_objects_within_date_range(bucket_objects, older_than=7, limit_days=2): """ S3 returns the Object['LastModified'] as an 'offset-aware' timestamp so the diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index dce08f132..7ecd86352 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -22,6 +22,8 @@ from sqlalchemy.sql import functions from notifications_utils.international_billing_rates import INTERNATIONAL_BILLING_RATES from app import db, create_uuid +from app.aws.s3 import get_s3_object_by_prefix +from app.letters.utils import LETTERS_PDF_FILE_LOCATION_STRUCTURE from app.utils import midnight_n_days_ago, escape_special_characters from app.errors import InvalidRequest from app.models import ( @@ -42,7 +44,8 @@ from app.models import ( NOTIFICATION_PERMANENT_FAILURE, NOTIFICATION_SENT, SMS_TYPE, - EMAIL_TYPE + EMAIL_TYPE, + ServiceDataRetention ) from app.dao.dao_utils import transactional @@ -234,18 +237,18 @@ def get_notifications(filter_dict=None): @statsd(namespace="dao") def get_notifications_for_service( - service_id, - filter_dict=None, - page=1, - page_size=None, - limit_days=None, - key_type=None, - personalisation=False, - include_jobs=False, - include_from_test_key=False, - older_than=None, - client_reference=None, - include_one_off=True + service_id, + filter_dict=None, + page=1, + page_size=None, + limit_days=None, + key_type=None, + personalisation=False, + include_jobs=False, + include_from_test_key=False, + older_than=None, + client_reference=None, + include_one_off=True ): if page_size is None: page_size = current_app.config['PAGE_SIZE'] @@ -310,14 +313,50 @@ def _filter_query(query, filter_dict=None): @statsd(namespace="dao") @transactional def delete_notifications_created_more_than_a_week_ago_by_type(notification_type): + flexible_data_retention = ServiceDataRetention.query.filter( + ServiceDataRetention.notification_type == notification_type + ).all() + deleted = 0 + for f in flexible_data_retention: + days_of_retention = convert_utc_to_bst(datetime.utcnow()).date() - timedelta(days=f.days_of_retention) + query = db.session.query(Notification).filter( + func.date(Notification.created_at) < days_of_retention, + Notification.notification_type == f.notification_type, Notification.service_id == f.service_id) + if notification_type == LETTER_TYPE: + _delete_letters_from_s3(query) + deleted += query.delete(synchronize_session='fetch') + seven_days_ago = convert_utc_to_bst(datetime.utcnow()).date() - timedelta(days=7) - deleted = db.session.query(Notification).filter( - func.date(Notification.created_at) < seven_days_ago, - Notification.notification_type == notification_type, - ).delete(synchronize_session='fetch') + services_with_data_retention = [x.service_id for x in flexible_data_retention] + query = db.session.query(Notification).filter(func.date(Notification.created_at) < seven_days_ago, + Notification.notification_type == notification_type, + Notification.service_id.notin_( + services_with_data_retention)) + if notification_type == LETTER_TYPE: + _delete_letters_from_s3(query=query) + deleted = query.delete(synchronize_session='fetch') return deleted +def _delete_letters_from_s3(query): + letters_to_delete_from_s3 = query.all() + for letter in letters_to_delete_from_s3: + bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] + sent_at = str(letter.sent_at.date()) + prefix = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format( + folder=sent_at, + reference=letter.reference, + duplex="D", + letter_class="2", + colour="C", + crown="C" if letter.service.crown else "N", + date='' + ).upper()[:-5] + s3_objects = get_s3_object_by_prefix(bucket_name=bucket_name, prefix=prefix) + for s3_object in s3_objects: + s3_object.delete() + + @statsd(namespace="dao") @transactional def dao_delete_notifications_and_history_by_id(notification_id): @@ -330,7 +369,6 @@ def dao_delete_notifications_and_history_by_id(notification_id): def _timeout_notifications(current_statuses, new_status, timeout_start, updated_at): - notifications = Notification.query.filter( Notification.created_at < timeout_start, Notification.status.in_(current_statuses), @@ -393,12 +431,12 @@ def get_total_sent_notifications_in_date_range(start_date, end_date, notificatio def is_delivery_slow_for_provider( - sent_at, - provider, - threshold, - delivery_time, - service_id, - template_id + sent_at, + provider, + threshold, + delivery_time, + service_id, + template_id ): count = db.session.query(Notification).filter( Notification.service_id == service_id, @@ -433,7 +471,6 @@ def dao_update_notifications_by_reference(references, update_dict): @statsd(namespace="dao") def dao_get_notifications_by_to_field(service_id, search_term, notification_type=None, statuses=None): - if notification_type is None: notification_type = guess_notification_type(search_term) diff --git a/app/dao/service_data_retention_dao.py b/app/dao/service_data_retention_dao.py index 1e27c52a1..2e2859302 100644 --- a/app/dao/service_data_retention_dao.py +++ b/app/dao/service_data_retention_dao.py @@ -20,6 +20,14 @@ def fetch_service_data_retention(service_id): return data_retention_list +def fetch_service_data_retention_by_notification_type(service_id, notification_type): + data_retention_list = ServiceDataRetention.query.filter_by( + service_id=service_id, + notification_type=notification_type + ).first() + return data_retention_list + + @transactional def insert_service_data_retention(service_id, notification_type, days_of_retention): new_data_retention = ServiceDataRetention(service_id=service_id, diff --git a/app/letters/utils.py b/app/letters/utils.py index a293663c1..8b9248790 100644 --- a/app/letters/utils.py +++ b/app/letters/utils.py @@ -8,7 +8,6 @@ from notifications_utils.s3 import s3upload from app.models import KEY_TYPE_TEST from app.utils import convert_utc_to_bst -from app.variables import Retention class ScanErrorType(Enum): @@ -83,8 +82,7 @@ def upload_letter_pdf(notification, pdf_data, precompiled=False): filedata=pdf_data, region=current_app.config['AWS_REGION'], bucket_name=bucket_name, - file_location=upload_file_name, - tags={Retention.KEY: Retention.ONE_WEEK} + file_location=upload_file_name ) current_app.logger.info("Uploaded letters PDF {} to {} for notification id {}".format( diff --git a/tests/app/celery/test_letters_pdf_tasks.py b/tests/app/celery/test_letters_pdf_tasks.py index eeeea3891..ea1c73787 100644 --- a/tests/app/celery/test_letters_pdf_tasks.py +++ b/tests/app/celery/test_letters_pdf_tasks.py @@ -11,7 +11,6 @@ from requests import RequestException from sqlalchemy.orm.exc import NoResultFound from app.errors import VirusScanError -from app.variables import Retention from app.celery.letters_pdf_tasks import ( create_letters_pdf, get_letters_pdf, @@ -112,8 +111,7 @@ def test_create_letters_pdf_calls_s3upload(mocker, sample_letter_notification): bucket_name=current_app.config['LETTERS_PDF_BUCKET_NAME'], file_location=filename, filedata=b'\x00\x01', - region=current_app.config['AWS_REGION'], - tags={Retention.KEY: Retention.ONE_WEEK} + region=current_app.config['AWS_REGION'] ) diff --git a/tests/app/dao/notification_dao/test_notification_dao.py b/tests/app/dao/notification_dao/test_notification_dao.py index 3e36c697c..3a0b4e324 100644 --- a/tests/app/dao/notification_dao/test_notification_dao.py +++ b/tests/app/dao/notification_dao/test_notification_dao.py @@ -54,11 +54,9 @@ from app.models import ( from tests.app.conftest import ( sample_notification, sample_template as create_sample_template, - sample_email_template, sample_service, sample_job, sample_notification_history as create_notification_history, - sample_letter_template ) from tests.app.db import ( create_job, @@ -607,104 +605,6 @@ def test_update_notification_sets_status(sample_notification): assert notification_from_db.status == 'failed' -@pytest.mark.parametrize('month, delete_run_time', - [(4, '2016-04-10 23:40'), (1, '2016-01-11 00:40')]) -@pytest.mark.parametrize( - 'notification_type, expected_sms_count, expected_email_count, expected_letter_count', - [('sms', 7, 10, 10), - ('email', 10, 7, 10), - ('letter', 10, 10, 7)] -) -def test_should_delete_notifications_by_type_after_seven_days( - sample_service, - month, - delete_run_time, - notification_type, - expected_sms_count, - expected_email_count, - expected_letter_count -): - assert len(Notification.query.all()) == 0 - - email_template = create_template(service=sample_service, template_type='email') - sms_template = create_template(service=sample_service) - letter_template = create_template(service=sample_service, template_type='letter') - - # create one notification a day between 1st and 10th from 11:00 to 19:00 of each type - for i in range(1, 11): - past_date = '2016-0{0}-{1:02d} {1:02d}:00:00.000000'.format(month, i) - with freeze_time(past_date): - create_notification(template=email_template, created_at=datetime.utcnow(), status="permanent-failure") - create_notification(template=sms_template, created_at=datetime.utcnow(), status="delivered") - create_notification(template=letter_template, created_at=datetime.utcnow(), status="temporary-failure") - - all_notifications = Notification.query.all() - assert len(all_notifications) == 30 - - # Records from before 3rd should be deleted - with freeze_time(delete_run_time): - delete_notifications_created_more_than_a_week_ago_by_type(notification_type) - remaining_sms_notifications = Notification.query.filter_by(notification_type='sms').all() - remaining_letter_notifications = Notification.query.filter_by(notification_type='letter').all() - remaining_email_notifications = Notification.query.filter_by(notification_type='email').all() - - assert len(remaining_sms_notifications) == expected_sms_count - assert len(remaining_email_notifications) == expected_email_count - assert len(remaining_letter_notifications) == expected_letter_count - - if notification_type == 'sms': - notifications_to_check = remaining_sms_notifications - if notification_type == 'email': - notifications_to_check = remaining_email_notifications - if notification_type == 'letter': - notifications_to_check = remaining_letter_notifications - - for notification in notifications_to_check: - assert notification.created_at.date() >= date(2016, month, 3) - - -@pytest.mark.parametrize('notification_type', ['sms', 'email', 'letter']) -@freeze_time("2016-01-10 12:00:00.000000") -def test_should_not_delete_notification_history(notify_db, notify_db_session, sample_service, notification_type): - with freeze_time('2016-01-01 12:00'): - email_template = sample_email_template(notify_db, notify_db_session, service=sample_service) - sms_template = create_sample_template(notify_db, notify_db_session, service=sample_service) - letter_template = sample_letter_template(sample_service) - - sample_notification( - notify_db, - notify_db_session, - created_at=datetime.utcnow(), - status="failed", - service=sample_service, - template=email_template - ) - sample_notification( - notify_db, - notify_db_session, - created_at=datetime.utcnow(), - status="failed", - service=sample_service, - template=sms_template - ) - sample_notification( - notify_db, - notify_db_session, - created_at=datetime.utcnow(), - status="failed", - service=sample_service, - template=letter_template - ) - - assert Notification.query.count() == 3 - assert NotificationHistory.query.count() == 3 - - delete_notifications_created_more_than_a_week_ago_by_type(notification_type) - - assert Notification.query.count() == 2 - assert NotificationHistory.query.count() == 3 - - @freeze_time("2016-01-10") def test_should_limit_notifications_return_by_day_limit_plus_one(sample_template): assert len(Notification.query.all()) == 0 diff --git a/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py new file mode 100644 index 000000000..7d56ea35c --- /dev/null +++ b/tests/app/dao/notification_dao/test_notification_dao_delete_notifications.py @@ -0,0 +1,183 @@ +from datetime import ( + datetime, + date, + timedelta +) +import pytest +from flask import current_app +from freezegun import freeze_time +from app.dao.notifications_dao import delete_notifications_created_more_than_a_week_ago_by_type +from app.models import Notification, NotificationHistory +from tests.app.db import ( + create_template, + create_notification, + create_service_data_retention, + create_service +) + + +@pytest.mark.parametrize('month, delete_run_time', + [(4, '2016-04-10 23:40'), (1, '2016-01-11 00:40')]) +@pytest.mark.parametrize( + 'notification_type, expected_sms_count, expected_email_count, expected_letter_count', + [('sms', 7, 10, 10), + ('email', 10, 7, 10), + ('letter', 10, 10, 7)] +) +def test_should_delete_notifications_by_type_after_seven_days( + sample_service, + mocker, + month, + delete_run_time, + notification_type, + expected_sms_count, + expected_email_count, + expected_letter_count +): + mocker.patch("app.dao.notifications_dao.get_s3_object_by_prefix") + assert len(Notification.query.all()) == 0 + email_template, letter_template, sms_template = _create_templates(sample_service) + # create one notification a day between 1st and 10th from 11:00 to 19:00 of each type + for i in range(1, 11): + past_date = '2016-0{0}-{1:02d} {1:02d}:00:00.000000'.format(month, i) + with freeze_time(past_date): + create_notification(template=email_template, created_at=datetime.utcnow(), status="permanent-failure") + create_notification(template=sms_template, created_at=datetime.utcnow(), status="delivered") + create_notification(template=letter_template, created_at=datetime.utcnow(), status="temporary-failure") + all_notifications = Notification.query.all() + assert len(all_notifications) == 30 + # Records from before 3rd should be deleted + with freeze_time(delete_run_time): + delete_notifications_created_more_than_a_week_ago_by_type(notification_type) + remaining_sms_notifications = Notification.query.filter_by(notification_type='sms').all() + remaining_letter_notifications = Notification.query.filter_by(notification_type='letter').all() + remaining_email_notifications = Notification.query.filter_by(notification_type='email').all() + assert len(remaining_sms_notifications) == expected_sms_count + assert len(remaining_email_notifications) == expected_email_count + assert len(remaining_letter_notifications) == expected_letter_count + + if notification_type == 'sms': + notifications_to_check = remaining_sms_notifications + if notification_type == 'email': + notifications_to_check = remaining_email_notifications + if notification_type == 'letter': + notifications_to_check = remaining_letter_notifications + for notification in notifications_to_check: + assert notification.created_at.date() >= date(2016, month, 3) + + +@pytest.mark.parametrize('notification_type', ['sms', 'email', 'letter']) +@freeze_time("2016-01-10 12:00:00.000000") +def test_should_not_delete_notification_history(sample_service, notification_type, mocker): + mocker.patch("app.dao.notifications_dao.get_s3_object_by_prefix") + with freeze_time('2016-01-01 12:00'): + email_template, letter_template, sms_template = _create_templates(sample_service) + create_notification(template=email_template, status='permanent-failure') + create_notification(template=sms_template, status='permanent-failure') + create_notification(template=letter_template, status='permanent-failure') + assert Notification.query.count() == 3 + assert NotificationHistory.query.count() == 3 + delete_notifications_created_more_than_a_week_ago_by_type(notification_type) + assert Notification.query.count() == 2 + assert NotificationHistory.query.count() == 3 + + +@pytest.mark.parametrize('notification_type', ['sms', 'email', 'letter']) +def test_delete_notifications_for_days_of_retention(sample_service, notification_type, mocker): + mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_object_by_prefix") + service_with_default_data_retention = create_service(service_name='default data retention') + email_template, letter_template, sms_template = _create_templates(sample_service) + default_email_template, default_letter_template, default_sms_template = _create_templates( + service_with_default_data_retention) + create_notification(template=email_template, status='delivered') + create_notification(template=sms_template, status='permanent-failure') + create_notification(template=letter_template, status='temporary-failure', + reference='LETTER_REF', sent_at=datetime.utcnow()) + create_notification(template=email_template, status='delivered', + created_at=datetime.utcnow() - timedelta(days=4)) + create_notification(template=sms_template, status='permanent-failure', + created_at=datetime.utcnow() - timedelta(days=4)) + create_notification(template=letter_template, status='temporary-failure', + reference='LETTER_REF', sent_at=datetime.utcnow(), + created_at=datetime.utcnow() - timedelta(days=4)) + create_notification(template=default_email_template, status='delivered', + created_at=datetime.utcnow() - timedelta(days=8)) + create_notification(template=default_sms_template, status='permanent-failure', + created_at=datetime.utcnow() - timedelta(days=8)) + create_notification(template=default_letter_template, status='temporary-failure', + reference='LETTER_REF', sent_at=datetime.utcnow(), + created_at=datetime.utcnow() - timedelta(days=8)) + create_service_data_retention(service_id=sample_service.id, notification_type=notification_type) + assert len(Notification.query.all()) == 9 + delete_notifications_created_more_than_a_week_ago_by_type(notification_type) + assert len(Notification.query.all()) == 7 + assert len(Notification.query.filter_by(notification_type=notification_type).all()) == 1 + if notification_type == 'letter': + mock_get_s3.assert_called_with(bucket_name=current_app.config['LETTERS_PDF_BUCKET_NAME'], + prefix="{}NOTIFY.LETTER_REF.D.2.C.C".format(str(datetime.utcnow().date())) + ) + assert mock_get_s3.call_count == 2 + else: + mock_get_s3.assert_not_called() + + +@pytest.mark.parametrize('notification_type', ['sms', 'email', 'letter']) +def test_delete_notifications_keep_data_for_days_of_retention_is_longer(sample_service, notification_type, mocker): + mock_get_s3 = mocker.patch("app.dao.notifications_dao.get_s3_object_by_prefix") + create_service_data_retention(service_id=sample_service.id, notification_type=notification_type, + days_of_retention=15) + email_template, letter_template, sms_template = _create_templates(sample_service) + service_with_default_data_retention = create_service(service_name='default data retention') + default_email_template, default_letter_template, default_sms_template = _create_templates( + service_with_default_data_retention) + create_notification(template=email_template, status='delivered') + create_notification(template=sms_template, status='permanent-failure') + create_notification(template=letter_template, status='temporary-failure') + create_notification(template=email_template, status='delivered', + created_at=datetime.utcnow() - timedelta(days=14)) + create_notification(template=sms_template, status='permanent-failure', + created_at=datetime.utcnow() - timedelta(days=14)) + create_notification(template=letter_template, status='temporary-failure', + created_at=datetime.utcnow() - timedelta(days=14)) + create_notification(template=default_email_template, status='delivered', + created_at=datetime.utcnow() - timedelta(days=8)) + create_notification(template=default_sms_template, status='permanent-failure', + created_at=datetime.utcnow() - timedelta(days=8)) + create_notification(template=default_letter_template, status='temporary-failure', + created_at=datetime.utcnow() - timedelta(days=8)) + assert len(Notification.query.all()) == 9 + delete_notifications_created_more_than_a_week_ago_by_type(notification_type) + assert len(Notification.query.filter_by().all()) == 8 + assert len(Notification.query.filter_by(notification_type=notification_type).all()) == 2 + if notification_type == 'letter': + assert mock_get_s3.called + else: + mock_get_s3.assert_not_called() + + +def test_delete_notifications_delete_notification_type_for_default_time_if_no_days_of_retention_for_type( + sample_service, mocker +): + create_service_data_retention(service_id=sample_service.id, notification_type='sms', + days_of_retention=15) + email_template, letter_template, sms_template = _create_templates(sample_service) + create_notification(template=email_template, status='delivered') + create_notification(template=sms_template, status='permanent-failure') + create_notification(template=letter_template, status='temporary-failure') + create_notification(template=email_template, status='delivered', + created_at=datetime.utcnow() - timedelta(days=14)) + create_notification(template=sms_template, status='permanent-failure', + created_at=datetime.utcnow() - timedelta(days=14)) + create_notification(template=letter_template, status='temporary-failure', + created_at=datetime.utcnow() - timedelta(days=14)) + assert len(Notification.query.all()) == 6 + delete_notifications_created_more_than_a_week_ago_by_type('email') + assert len(Notification.query.filter_by().all()) == 5 + assert len(Notification.query.filter_by(notification_type='email').all()) == 1 + + +def _create_templates(sample_service): + email_template = create_template(service=sample_service, template_type='email') + sms_template = create_template(service=sample_service) + letter_template = create_template(service=sample_service, template_type='letter') + return email_template, letter_template, sms_template diff --git a/tests/app/dao/test_service_data_retention_dao.py b/tests/app/dao/test_service_data_retention_dao.py index cbf3661d2..2c5551bb3 100644 --- a/tests/app/dao/test_service_data_retention_dao.py +++ b/tests/app/dao/test_service_data_retention_dao.py @@ -8,10 +8,11 @@ from app.dao.service_data_retention_dao import ( fetch_service_data_retention, insert_service_data_retention, update_service_data_retention, - fetch_service_data_retention_by_id + fetch_service_data_retention_by_id, + fetch_service_data_retention_by_notification_type ) from app.models import ServiceDataRetention -from tests.app.db import create_service +from tests.app.db import create_service, create_service_data_retention def test_fetch_service_data_retention(sample_service): @@ -131,3 +132,20 @@ def test_update_service_data_retention_does_not_update_row_if_data_retention_is_ service_id=uuid.uuid4(), days_of_retention=5) assert updated_count == 0 + + +@pytest.mark.parametrize('notification_type, alternate', + [('sms', 'email'), + ('email', 'sms'), + ('letter', 'email'), + ('letter', 'sms')] + ) +def test_fetch_service_data_retention_by_notification_type(sample_service, notification_type, alternate): + data_retention = create_service_data_retention(service_id=sample_service.id, notification_type=notification_type) + create_service_data_retention(service_id=sample_service.id, notification_type=alternate) + result = fetch_service_data_retention_by_notification_type(sample_service.id, notification_type) + assert result == data_retention + + +def test_fetch_service_data_retention_by_notification_type_returns_none_when_no_rows(sample_service): + assert not fetch_service_data_retention_by_notification_type(sample_service.id, 'email') diff --git a/tests/app/letters/test_letter_utils.py b/tests/app/letters/test_letter_utils.py index 2430c57cf..b4c557fb5 100644 --- a/tests/app/letters/test_letter_utils.py +++ b/tests/app/letters/test_letter_utils.py @@ -15,7 +15,6 @@ from app.letters.utils import ( ScanErrorType, move_failed_pdf, get_folder_name ) from app.models import KEY_TYPE_NORMAL, KEY_TYPE_TEST, PRECOMPILED_TEMPLATE_NAME -from app.variables import Retention FROZEN_DATE_TIME = "2018-03-14 17:00:00" @@ -135,8 +134,7 @@ def test_upload_letter_pdf_to_correct_bucket( bucket_name=current_app.config[bucket_config_name], file_location=filename, filedata=b'\x00\x01', - region=current_app.config['AWS_REGION'], - tags={Retention.KEY: Retention.ONE_WEEK} + region=current_app.config['AWS_REGION'] )