prevent race conditions in run_scheduled_jobs queuing jobs multiple times

we were running into issues where multiple beats queue up the
run_scheduled_jobs task at the same time, and concurrency issues with selecting
scheduled jobs causes both tasks to trigger processing of the job.

Use with_for_update, which calls through to the postgres SELECT  ... FOR UPDATE
which locks other SELECT FOR UPDATES (ie other threads running same code) until
the rows are set to pending and the transaction completes - so the second
thread will not find any rows
This commit is contained in:
Martyn Inglis
2016-10-07 10:47:48 +01:00
committed by Leo Hemsted
parent fff74bb4f7
commit 897ad6a957
3 changed files with 19 additions and 2 deletions

View File

@@ -27,17 +27,24 @@ def remove_csv_files():
@notify_celery.task(name="run-scheduled-jobs")
@statsd(namespace="tasks")
def run_scheduled_jobs():
from app import db
try:
jobs = dao_get_scheduled_jobs()
for job in jobs:
job.job_status = JOB_STATUS_PENDING
dao_update_job(job)
process_job.apply_async([str(job.id)], queue="process-job")
from time import sleep
sleep(1)
print('SCHEDULING' + str(job.id))
# dao_update_job(job)
# process_job.apply_async([str(job.id)], queue="process-job")
current_app.logger.info(
"Job ID {} added to process job queue".format(job.id)
)
db.session.add_all(jobs)
db.session.commit()
except SQLAlchemyError as e:
current_app.logger.exception("Failed to run scheduled jobs", e)
db.session.rollback()
raise