From be6f37069b92c5fc98b7382cf91beb02f335b4f7 Mon Sep 17 00:00:00 2001 From: Pea Tyczynska Date: Mon, 19 Nov 2018 17:09:27 +0000 Subject: [PATCH] Change job selection dao to take flexible retention into account Also test deleting jobs with flexible data retention Also update tests for default data retention following logic change: dao_get_jobs_older_than_data_retention now counts today at the start of the day, not at a time when function runs and updated tests reflect that --- app/celery/scheduled_tasks.py | 6 +- app/dao/jobs_dao.py | 39 ++++++++--- tests/app/celery/test_scheduled_tasks.py | 84 ++++++++++++++++++++---- tests/app/dao/test_jobs_dao.py | 8 +-- 4 files changed, 108 insertions(+), 29 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 9ff9cb956..e6870f38d 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -23,7 +23,7 @@ from app.dao.invited_org_user_dao import delete_org_invitations_created_more_tha from app.dao.invited_user_dao import delete_invitations_created_more_than_two_days_ago from app.dao.jobs_dao import ( dao_set_scheduled_jobs_to_pending, - dao_get_jobs_older_than_limited_by + dao_get_jobs_older_than_data_retention ) from app.dao.jobs_dao import dao_update_job from app.dao.notifications_dao import ( @@ -64,7 +64,7 @@ from app.v2.errors import JobIncompleteError @notify_celery.task(name="remove_csv_files") @statsd(namespace="tasks") def remove_csv_files(job_types): - jobs = dao_get_jobs_older_than_limited_by(job_types=job_types) + jobs = dao_get_jobs_older_than_data_retention(notification_types=job_types) for job in jobs: s3.remove_job_from_s3(job.service_id, job.id) current_app.logger.info("Job ID {} has been removed from s3.".format(job.id)) @@ -299,7 +299,7 @@ def delete_inbound_sms_older_than_seven_days(): @notify_celery.task(name="remove_transformed_dvla_files") @statsd(namespace="tasks") def remove_transformed_dvla_files(): - jobs = dao_get_jobs_older_than_limited_by(job_types=[LETTER_TYPE]) + jobs = dao_get_jobs_older_than_data_retention(notification_types=[LETTER_TYPE]) for job in jobs: s3.remove_transformed_dvla_file(job.id) current_app.logger.info("Transformed dvla file for job {} has been removed from s3.".format(job.id)) diff --git a/app/dao/jobs_dao.py b/app/dao/jobs_dao.py index 6e2297302..a4947e958 100644 --- a/app/dao/jobs_dao.py +++ b/app/dao/jobs_dao.py @@ -18,6 +18,7 @@ from app.models import ( LETTER_TYPE, NotificationHistory, Template, + ServiceDataRetention ) from app.variables import LETTER_TEST_API_FILENAME @@ -115,15 +116,37 @@ def dao_update_job(job): db.session.commit() -def dao_get_jobs_older_than_limited_by(job_types, older_than=7, limit_days=2): - end_date = datetime.utcnow() - timedelta(days=older_than) - start_date = end_date - timedelta(days=limit_days) +def dao_get_jobs_older_than_data_retention(notification_types): + flexible_data_retention = ServiceDataRetention.query.filter( + ServiceDataRetention.notification_type.in_(notification_types) + ).all() + jobs = [] + today = datetime.utcnow().date() + for f in flexible_data_retention: + end_date = today - timedelta(days=f.days_of_retention) + start_date = end_date - timedelta(days=2) - return Job.query.join(Template).filter( - Job.created_at < end_date, - Job.created_at >= start_date, - Template.template_type.in_(job_types) - ).order_by(desc(Job.created_at)).all() + jobs.extend(Job.query.join(Template).filter( + Job.created_at < end_date, + Job.created_at >= start_date, + Template.template_type == f.notification_type, + Job.service_id == f.service_id + ).order_by(desc(Job.created_at)).all()) + + end_date = today - timedelta(days=7) + start_date = end_date - timedelta(days=2) + for notification_type in notification_types: + services_with_data_retention = [ + x.service_id for x in flexible_data_retention if x.notification_type == notification_type + ] + jobs.extend(Job.query.join(Template).filter( + Job.created_at < end_date, + Job.created_at >= start_date, + Template.template_type == notification_type, + Job.service_id.notin_(services_with_data_retention) + ).order_by(desc(Job.created_at)).all()) + + return jobs def dao_get_all_letter_jobs(): diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index 5ce737764..af58f0e97 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -52,11 +52,21 @@ from app.models import ( JOB_STATUS_IN_PROGRESS, JOB_STATUS_ERROR, LETTER_TYPE, - SMS_TYPE + SMS_TYPE, + EMAIL_TYPE ) from app.utils import get_london_midnight_in_utc from app.v2.errors import JobIncompleteError from tests.app.aws.test_s3 import single_s3_object_stub +from tests.app.db import ( + create_notification, + create_service, + create_template, + create_job, + create_service_callback_api, + create_service_data_retention +) + from tests.app.conftest import ( sample_job as create_sample_job, sample_notification_history as create_notification_history, @@ -64,9 +74,6 @@ from tests.app.conftest import ( create_custom_template, datetime_in_past ) -from tests.app.db import ( - create_notification, create_service, create_template, create_job, create_service_callback_api -) from tests.conftest import set_config_values @@ -273,10 +280,11 @@ def test_should_update_all_scheduled_jobs_and_put_on_queue(notify_db, notify_db_ def test_will_remove_csv_files_for_jobs_older_than_seven_days( notify_db, notify_db_session, mocker, sample_template ): - mocker.patch('app.celery.scheduled_tasks.s3.remove_job_from_s3') """ Jobs older than seven days are deleted, but only two day's worth (two-day window) """ + mocker.patch('app.celery.scheduled_tasks.s3.remove_job_from_s3') + seven_days_ago = datetime.utcnow() - timedelta(days=7) just_under_seven_days = seven_days_ago + timedelta(seconds=1) eight_days_ago = seven_days_ago - timedelta(days=1) @@ -284,7 +292,7 @@ def test_will_remove_csv_files_for_jobs_older_than_seven_days( just_under_nine_days = nine_days_ago + timedelta(seconds=1) nine_days_one_second_ago = nine_days_ago - timedelta(seconds=1) - create_sample_job(notify_db, notify_db_session, created_at=nine_days_one_second_ago) + job3_to_delete = create_sample_job(notify_db, notify_db_session, created_at=nine_days_one_second_ago) job1_to_delete = create_sample_job(notify_db, notify_db_session, created_at=eight_days_ago) job2_to_delete = create_sample_job(notify_db, notify_db_session, created_at=just_under_nine_days) create_sample_job(notify_db, notify_db_session, created_at=seven_days_ago) @@ -294,10 +302,57 @@ def test_will_remove_csv_files_for_jobs_older_than_seven_days( assert s3.remove_job_from_s3.call_args_list == [ call(job1_to_delete.service_id, job1_to_delete.id), - call(job2_to_delete.service_id, job2_to_delete.id) + call(job2_to_delete.service_id, job2_to_delete.id), + call(job3_to_delete.service_id, job3_to_delete.id) ] +@freeze_time('2016-10-18T10:00:00') +def test_will_remove_csv_files_for_jobs_older_than_retention_period( + notify_db, notify_db_session, mocker +): + """ + Jobs older than retention period are deleted, but only two day's worth (two-day window) + """ + mocker.patch('app.celery.scheduled_tasks.s3.remove_job_from_s3') + service_1 = create_service(service_name='service 1') + service_2 = create_service(service_name='service 2') + create_service_data_retention(service_id=service_1.id, notification_type=SMS_TYPE, days_of_retention=3) + create_service_data_retention(service_id=service_2.id, notification_type=EMAIL_TYPE, days_of_retention=30) + sms_template_service_1 = create_template(service=service_1) + email_template_service_1 = create_template(service=service_1, template_type='email') + + sms_template_service_2 = create_template(service=service_2) + email_template_service_2 = create_template(service=service_2, template_type='email') + + four_days_ago = datetime.utcnow() - timedelta(days=4) + eight_days_ago = datetime.utcnow() - timedelta(days=8) + thirty_one_days_ago = datetime.utcnow() - timedelta(days=31) + + _create_job = partial( + create_sample_job, + notify_db, + notify_db_session, + ) + + job1_to_delete = _create_job(service=service_1, template=sms_template_service_1, created_at=four_days_ago) + job2_to_delete = _create_job(service=service_1, template=email_template_service_1, created_at=eight_days_ago) + _create_job(service=service_1, template=email_template_service_1, created_at=four_days_ago) + + _create_job(service=service_2, template=email_template_service_2, created_at=eight_days_ago) + job3_to_delete = _create_job(service=service_2, template=email_template_service_2, created_at=thirty_one_days_ago) + job4_to_delete = _create_job(service=service_2, template=sms_template_service_2, created_at=eight_days_ago) + + remove_csv_files(job_types=[SMS_TYPE, EMAIL_TYPE]) + + s3.remove_job_from_s3.assert_has_calls([ + call(job1_to_delete.service_id, job1_to_delete.id), + call(job2_to_delete.service_id, job2_to_delete.id), + call(job3_to_delete.service_id, job3_to_delete.id), + call(job4_to_delete.service_id, job4_to_delete.id) + ], any_order=True) + + def test_send_daily_performance_stats_calls_does_not_send_if_inactive(client, mocker): send_mock = mocker.patch( 'app.celery.scheduled_tasks.total_sent_notifications.send_total_notifications_sent_for_day_stats') # noqa @@ -545,17 +600,18 @@ def test_remove_dvla_transformed_files_removes_expected_files(mocker, sample_ser just_over_seven_days = seven_days_ago - timedelta(seconds=1) eight_days_ago = seven_days_ago - timedelta(days=1) nine_days_ago = eight_days_ago - timedelta(days=1) + ten_days_ago = nine_days_ago - timedelta(days=1) just_under_nine_days = nine_days_ago + timedelta(seconds=1) just_over_nine_days = nine_days_ago - timedelta(seconds=1) + just_over_ten_days = ten_days_ago - timedelta(seconds=1) - job(created_at=seven_days_ago) job(created_at=just_under_seven_days) - job_to_delete_1 = job(created_at=just_over_seven_days) - job_to_delete_2 = job(created_at=eight_days_ago) - job_to_delete_3 = job(created_at=nine_days_ago) - job_to_delete_4 = job(created_at=just_under_nine_days) - job(created_at=just_over_nine_days) - + job(created_at=just_over_seven_days) + job_to_delete_1 = job(created_at=eight_days_ago) + job_to_delete_2 = job(created_at=nine_days_ago) + job_to_delete_3 = job(created_at=just_under_nine_days) + job_to_delete_4 = job(created_at=just_over_nine_days) + job(created_at=just_over_ten_days) remove_transformed_dvla_files() s3.remove_transformed_dvla_file.assert_has_calls([ diff --git a/tests/app/dao/test_jobs_dao.py b/tests/app/dao/test_jobs_dao.py index a6bea2d48..bd8752ead 100644 --- a/tests/app/dao/test_jobs_dao.py +++ b/tests/app/dao/test_jobs_dao.py @@ -13,7 +13,7 @@ from app.dao.jobs_dao import ( dao_set_scheduled_jobs_to_pending, dao_get_future_scheduled_job_by_id_and_service_id, dao_get_notification_outcomes_for_job, - dao_get_jobs_older_than_limited_by + dao_get_jobs_older_than_data_retention, ) from app.models import ( Job, @@ -296,7 +296,7 @@ def test_should_get_jobs_seven_days_old(notify_db, notify_db_session, sample_tem job(created_at=nine_days_ago) job(created_at=nine_days_one_second_ago) - jobs = dao_get_jobs_older_than_limited_by(job_types=[sample_template.template_type]) + jobs = dao_get_jobs_older_than_data_retention(notification_types=[sample_template.template_type]) assert len(jobs) == 1 assert jobs[0].id == job_to_delete.id @@ -359,8 +359,8 @@ def test_should_get_jobs_seven_days_old_filters_type(notify_db, notify_db_sessio job(template=sms_template) job(template=email_template) - jobs = dao_get_jobs_older_than_limited_by( - job_types=[EMAIL_TYPE, SMS_TYPE] + jobs = dao_get_jobs_older_than_data_retention( + notification_types=[EMAIL_TYPE, SMS_TYPE] ) assert len(jobs) == 2