From 852e207478e02281bb281d86eb16c85cf55b6859 Mon Sep 17 00:00:00 2001 From: Martyn Inglis Date: Wed, 7 Sep 2016 15:36:59 +0100 Subject: [PATCH] Adds new scheduled task to delete CSV files. Deletes files for jobs older than 7 days, matching delete process for the actual notifications Runs 1 minutes past the hour at midnight, 1 and 2 am. --- app/celery/scheduled_tasks.py | 13 +++++++++++-- config.py | 5 +++++ tests/app/celery/test_scheduled_tasks.py | 21 ++++++++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index b0cb57660..c960c3b20 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -3,10 +3,10 @@ from datetime import datetime, timedelta from flask import current_app from sqlalchemy.exc import SQLAlchemyError +from app.aws import s3 from app import notify_celery -from app.clients import STATISTICS_FAILURE from app.dao.invited_user_dao import delete_invitations_created_more_than_two_days_ago -from app.dao.jobs_dao import dao_get_scheduled_jobs, dao_update_job +from app.dao.jobs_dao import dao_get_scheduled_jobs, dao_update_job, dao_get_jobs_older_than from app.dao.notifications_dao import delete_notifications_created_more_than_a_week_ago, get_notifications, \ update_notification_status_by_id from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago @@ -15,6 +15,15 @@ from app.models import JOB_STATUS_PENDING from app.celery.tasks import process_job +@notify_celery.task(name="remove_csv_files") +@statsd(namespace="tasks") +def remove_csv_files(): + jobs = dao_get_jobs_older_than(7) + for job in jobs: + s3.remove_job_from_s3(job.service_id, job.id) + current_app.logger.info("Job ID {} has been removed from s3.".format(job.id)) + + @notify_celery.task(name="run-scheduled-jobs") @statsd(namespace="tasks") def run_scheduled_jobs(): diff --git a/config.py b/config.py index 44a0b88d1..1df96acf4 100644 --- a/config.py +++ b/config.py @@ -77,6 +77,11 @@ class Config(object): 'task': 'timeout-sending-notifications', 'schedule': crontab(minute=0, hour='0,1,2'), 'options': {'queue': 'periodic'} + }, + 'remove_csv_files': { + 'task': 'remove_csv_files', + 'schedule': crontab(minute=1, hour='0,1,2'), + 'options': {'queue': 'periodic'} } } CELERY_QUEUES = [ diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index 893988490..aa5345ff2 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -1,9 +1,11 @@ from datetime import datetime, timedelta from flask import current_app - +from freezegun import freeze_time +from app.celery.scheduled_tasks import s3 from app.celery import scheduled_tasks from app.celery.scheduled_tasks import (delete_verify_codes, + remove_csv_files, delete_successful_notifications, delete_failed_notifications, delete_invitations, @@ -21,6 +23,7 @@ def test_should_have_decorated_tasks_functions(): assert timeout_notifications.__wrapped__.__name__ == 'timeout_notifications' assert delete_invitations.__wrapped__.__name__ == 'delete_invitations' assert run_scheduled_jobs.__wrapped__.__name__ == 'run_scheduled_jobs' + assert remove_csv_files.__wrapped__.__name__ == 'remove_csv_files' def test_should_call_delete_notifications_more_than_week_in_task(notify_api, mocker): @@ -120,3 +123,19 @@ def test_should_update_all_scheduled_jobs_and_put_on_queue(notify_db, notify_db_ call([str(job_2.id)], queue='process-job'), call([str(job_1.id)], queue='process-job') ]) + + +def test_will_remove_csv_files_for_jobs_older_than_seven_days(notify_db, notify_db_session, mocker): + mocker.patch('app.celery.scheduled_tasks.s3.remove_job_from_s3') + + one_millisecond_before_midnight = datetime(2016, 10, 9, 23, 59, 59, 999) + midnight = datetime(2016, 10, 10, 0, 0, 0, 0) + one_millisecond_past_midnight = datetime(2016, 10, 10, 0, 0, 0, 1) + + job_1 = sample_job(notify_db, notify_db_session, created_at=one_millisecond_before_midnight) + sample_job(notify_db, notify_db_session, created_at=midnight) + sample_job(notify_db, notify_db_session, created_at=one_millisecond_past_midnight) + + with freeze_time('2016-10-17T00:00:00'): + remove_csv_files() + s3.remove_job_from_s3.assert_called_once_with(job_1.service_id, job_1.id)