diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 11e4de49d..e86a7c113 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -255,3 +255,29 @@ def remove_transformed_dvla_files(): for job in jobs: s3.remove_transformed_dvla_file(job.id) current_app.logger.info("Transformed dvla file for job {} has been removed from s3.".format(job.id)) + + +@notify_celery.task(name="delete_dvla_response_files") +@statsd(namespace="tasks") +def delete_dvla_response_files_older_than_seven_days(): + try: + start = datetime.utcnow() + bucket_objects = s3.get_s3_bucket_objects( + current_app.config['DVLA_RESPONSE_BUCKET_NAME'], + 'root/dispatch' + ) + older_than_seven_days = s3.filter_s3_bucket_objects_within_date_range(bucket_objects) + + for f in older_than_seven_days: + s3.remove_s3_object(current_app.config['DVLA_RESPONSE_BUCKET_NAME'], f['Key']) + + current_app.logger.info( + "Delete dvla response files started {} finished {} deleted {} files".format( + start, + datetime.utcnow(), + len(older_than_seven_days) + ) + ) + except SQLAlchemyError as e: + current_app.logger.exception("Failed to delete dvla response files") + raise diff --git a/app/config.py b/app/config.py index f470666a7..b3d76dc93 100644 --- a/app/config.py +++ b/app/config.py @@ -1,7 +1,8 @@ from datetime import timedelta +import os + from celery.schedules import crontab from kombu import Exchange, Queue -import os from app.models import ( EMAIL_TYPE, SMS_TYPE, LETTER_TYPE, @@ -209,6 +210,11 @@ class Config(object): 'schedule': crontab(minute=40, hour=4), 'options': {'queue': QueueNames.PERIODIC} }, + 'delete_dvla_response_files': { + 'task': 'delete_dvla_response_files', + 'schedule': crontab(minute=10, hour=5), + 'options': {'queue': QueueNames.PERIODIC} + }, 'timeout-job-statistics': { 'task': 'timeout-job-statistics', 'schedule': crontab(minute=0, hour=5), @@ -265,6 +271,7 @@ class Development(Config): SQLALCHEMY_ECHO = False NOTIFY_EMAIL_DOMAIN = 'notify.tools' CSV_UPLOAD_BUCKET_NAME = 'development-notifications-csv-upload' + DVLA_RESPONSE_BUCKET_NAME = 'notify.tools-ftp' NOTIFY_ENVIRONMENT = 'development' NOTIFICATION_QUEUE_PREFIX = 'development' DEBUG = True @@ -284,6 +291,7 @@ class Test(Config): NOTIFY_ENVIRONMENT = 'test' DEBUG = True CSV_UPLOAD_BUCKET_NAME = 'test-notifications-csv-upload' + DVLA_RESPONSE_BUCKET_NAME = 'test.notify.com-ftp' STATSD_ENABLED = True STATSD_HOST = "localhost" STATSD_PORT = 1000 @@ -316,6 +324,7 @@ class Preview(Config): NOTIFY_EMAIL_DOMAIN = 'notify.works' NOTIFY_ENVIRONMENT = 'preview' CSV_UPLOAD_BUCKET_NAME = 'preview-notifications-csv-upload' + DVLA_RESPONSE_BUCKET_NAME = 'notify.works-ftp' FROM_NUMBER = 'preview' API_RATE_LIMIT_ENABLED = True @@ -324,6 +333,7 @@ class Staging(Config): NOTIFY_EMAIL_DOMAIN = 'staging-notify.works' NOTIFY_ENVIRONMENT = 'staging' CSV_UPLOAD_BUCKET_NAME = 'staging-notify-csv-upload' + DVLA_RESPONSE_BUCKET_NAME = 'staging-notify.works-ftp' STATSD_ENABLED = True FROM_NUMBER = 'stage' API_RATE_LIMIT_ENABLED = True @@ -333,6 +343,7 @@ class Live(Config): NOTIFY_EMAIL_DOMAIN = 'notifications.service.gov.uk' NOTIFY_ENVIRONMENT = 'live' CSV_UPLOAD_BUCKET_NAME = 'live-notifications-csv-upload' + DVLA_RESPONSE_BUCKET_NAME = 'notifications.service.gov.uk-ftp' STATSD_ENABLED = True FROM_NUMBER = 'GOVUK' FUNCTIONAL_TEST_PROVIDER_SERVICE_ID = '6c1d81bb-dae2-4ee9-80b0-89a4aae9f649' @@ -350,6 +361,7 @@ class Sandbox(CloudFoundryConfig): NOTIFY_EMAIL_DOMAIN = 'notify.works' NOTIFY_ENVIRONMENT = 'sandbox' CSV_UPLOAD_BUCKET_NAME = 'cf-sandbox-notifications-csv-upload' + DVLA_RESPONSE_BUCKET_NAME = 'notify.works-ftp' FROM_NUMBER = 'sandbox' REDIS_ENABLED = False diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index fcdd8ea93..fb1a3981e 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -9,6 +9,7 @@ from freezegun import freeze_time from app.celery import scheduled_tasks from app.celery.scheduled_tasks import ( + delete_dvla_response_files_older_than_seven_days, delete_email_notifications_older_than_seven_days, delete_inbound_sms_older_than_seven_days, delete_invitations, @@ -42,8 +43,10 @@ from tests.app.db import create_notification, create_service, create_template, c from tests.app.conftest import ( sample_job as create_sample_job, sample_notification_history as create_notification_history, - create_custom_template) -from tests.conftest import set_config_values + create_custom_template, + set_config_values +) +from tests.app.aws.test_s3 import single_s3_object_stub, datetime_in_past def _create_slow_delivery_notification(provider='mmg'): @@ -93,6 +96,8 @@ def test_should_have_decorated_tasks_functions(): 'delete_inbound_sms_older_than_seven_days' assert remove_transformed_dvla_files.__wrapped__.__name__ == \ 'remove_transformed_dvla_files' + assert delete_dvla_response_files_older_than_seven_days.__wrapped__.__name__ == \ + 'delete_dvla_response_files_older_than_seven_days' def test_should_call_delete_sms_notifications_more_than_week_in_task(notify_api, mocker): @@ -548,3 +553,40 @@ def test_remove_dvla_transformed_files_does_not_remove_files(mocker, sample_serv remove_transformed_dvla_files() s3.remove_transformed_dvla_file.assert_has_calls([]) + + +def test_delete_dvla_response_files_older_than_seven_days_removes_old_files(notify_api, mocker): + single_page_s3_objects = [{ + "Contents": [ + single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=8)), + single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=8)), + ] + }] + mocker.patch( + 'app.celery.scheduled_tasks.s3.get_s3_bucket_objects', return_value=single_page_s3_objects[0]["Contents"] + ) + remove_s3_mock = mocker.patch('app.celery.scheduled_tasks.s3.remove_s3_object') + + delete_dvla_response_files_older_than_seven_days() + + remove_s3_mock.assert_has_calls([ + call(current_app.config['DVLA_RESPONSE_BUCKET_NAME'], single_page_s3_objects[0]["Contents"][0]["Key"]), + call(current_app.config['DVLA_RESPONSE_BUCKET_NAME'], single_page_s3_objects[0]["Contents"][1]["Key"]) + ]) + + +def test_delete_dvla_response_files_older_than_seven_days_does_not_remove_files(notify_api, mocker): + single_page_s3_objects = [{ + "Contents": [ + single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=6)), + single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=9)), + ] + }] + mocker.patch( + 'app.celery.scheduled_tasks.s3.get_s3_bucket_objects', return_value=single_page_s3_objects[0]["Contents"] + ) + remove_s3_mock = mocker.patch('app.celery.scheduled_tasks.s3.remove_s3_object') + + delete_dvla_response_files_older_than_seven_days() + + remove_s3_mock.assert_not_called()