mirror of
https://github.com/GSA/notifications-api.git
synced 2026-01-31 23:26:23 -05:00
Updates to the delete CSV file job to reduce the number of eligible jobs in any run
- previously this was unbounded, so it got all jobs older then 7 days. In excess of 75,000 🔥
- this meant that the job took (a) a long time and (b) a lot memory and (c) doing the same thing every day
These changes mean that the job has a 2 day eligible window for jobs, minimising the number of eligible jobs in a run, whilst still retaining some leeway in event if it failing one night.
In principle the job runs early morning on a given day. The previous 7 days are left along, and then the previous 2 days worth of files are deleted:
so:
runs on
31st
30,29,28,27,26,25,24 are ignored
23,22 jobs here have files deleted
21 and earlier are ignored.
This commit is contained in:
@@ -10,7 +10,7 @@ from app.aws import s3
|
||||
from app import notify_celery
|
||||
from app import performance_platform_client
|
||||
from app.dao.invited_user_dao import delete_invitations_created_more_than_two_days_ago
|
||||
from app.dao.jobs_dao import dao_set_scheduled_jobs_to_pending, dao_get_jobs_older_than
|
||||
from app.dao.jobs_dao import dao_set_scheduled_jobs_to_pending, dao_get_jobs_older_than_limited_by
|
||||
from app.dao.notifications_dao import (
|
||||
delete_notifications_created_more_than_a_week_ago,
|
||||
dao_timeout_notifications,
|
||||
@@ -28,7 +28,7 @@ from app.celery.tasks import process_job
|
||||
@notify_celery.task(name="remove_csv_files")
|
||||
@statsd(namespace="tasks")
|
||||
def remove_csv_files():
|
||||
jobs = dao_get_jobs_older_than(7)
|
||||
jobs = dao_get_jobs_older_than_limited_by()
|
||||
for job in jobs:
|
||||
s3.remove_job_from_s3(job.service_id, job.id)
|
||||
current_app.logger.info("Job ID {} has been removed from s3.".format(job.id))
|
||||
|
||||
@@ -116,7 +116,8 @@ def dao_update_job(job):
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def dao_get_jobs_older_than(limit_days):
|
||||
def dao_get_jobs_older_than_limited_by(older_than=7, limit_days=2):
|
||||
return Job.query.filter(
|
||||
cast(Job.created_at, sql_date) < days_ago(limit_days)
|
||||
cast(Job.created_at, sql_date) < days_ago(older_than),
|
||||
cast(Job.created_at, sql_date) >= days_ago(older_than + limit_days)
|
||||
).order_by(desc(Job.created_at)).all()
|
||||
|
||||
Reference in New Issue
Block a user