Updates to the delete CSV file job to reduce the number of eligible jobs in any run

- previously this was unbounded, so it got all jobs older then 7 days. In excess of 75,000 🔥
- this meant that the job took (a) a long time and (b) a lot memory and (c) doing the same thing every day

These changes mean that the job has a 2 day eligible window for jobs, minimising the number of eligible jobs in a run, whilst still retaining some leeway in event if it failing one night.

In principle the job runs early morning on a given day. The previous 7 days are left along, and then the previous 2 days worth of files are deleted:

so:
runs on
31st
30,29,28,27,26,25,24 are ignored
23,22 jobs here have files deleted
21 and earlier are ignored.
This commit is contained in:
Martyn Inglis
2017-04-05 16:23:41 +01:00
parent 61a8097ca5
commit 832005efef
4 changed files with 40 additions and 24 deletions

View File

@@ -203,17 +203,19 @@ def test_should_update_all_scheduled_jobs_and_put_on_queue(notify_db, notify_db_
def test_will_remove_csv_files_for_jobs_older_than_seven_days(notify_db, notify_db_session, mocker):
mocker.patch('app.celery.scheduled_tasks.s3.remove_job_from_s3')
one_millisecond_before_midnight = datetime(2016, 10, 9, 23, 59, 59, 999)
midnight = datetime(2016, 10, 10, 0, 0, 0, 0)
one_millisecond_past_midnight = datetime(2016, 10, 10, 0, 0, 0, 1)
eligible_job_1 = datetime(2016, 10, 10, 23, 59, 59, 000)
eligible_job_2 = datetime(2016, 10, 9, 00, 00, 00, 000)
in_eligible_job_too_new = datetime(2016, 10, 11, 00, 00, 00, 000)
in_eligible_job_too_old = datetime(2016, 10, 8, 23, 59, 59, 999)
job_1 = create_sample_job(notify_db, notify_db_session, created_at=one_millisecond_before_midnight)
create_sample_job(notify_db, notify_db_session, created_at=midnight)
create_sample_job(notify_db, notify_db_session, created_at=one_millisecond_past_midnight)
job_1 = create_sample_job(notify_db, notify_db_session, created_at=eligible_job_1)
job_2 = create_sample_job(notify_db, notify_db_session, created_at=eligible_job_2)
create_sample_job(notify_db, notify_db_session, created_at=in_eligible_job_too_new)
create_sample_job(notify_db, notify_db_session, created_at=in_eligible_job_too_old)
with freeze_time('2016-10-17T00:00:00'):
with freeze_time('2016-10-18T10:00:00'):
remove_csv_files()
s3.remove_job_from_s3.assert_called_once_with(job_1.service_id, job_1.id)
s3.remove_job_from_s3.assert_has_calls([call(job_1.service_id, job_1.id), call(job_2.service_id, job_2.id)])
def test_send_daily_performance_stats_calls_does_not_send_if_inactive(