prevent race conditions in run_scheduled_jobs queuing jobs multiple times

we were running into issues where multiple beats queue up the
run_scheduled_jobs task at the same time, and concurrency issues with selecting
scheduled jobs causes both tasks to trigger processing of the job.

Use with_for_update, which calls through to the postgres SELECT  ... FOR UPDATE
which locks other SELECT FOR UPDATES (ie other threads running same code) until
the rows are set to pending and the transaction completes - so the second
thread will not find any rows
This commit is contained in:
Martyn Inglis
2016-10-07 10:47:48 +01:00
committed by Leo Hemsted
parent fff74bb4f7
commit 897ad6a957
3 changed files with 19 additions and 2 deletions

View File

@@ -27,17 +27,24 @@ def remove_csv_files():
@notify_celery.task(name="run-scheduled-jobs") @notify_celery.task(name="run-scheduled-jobs")
@statsd(namespace="tasks") @statsd(namespace="tasks")
def run_scheduled_jobs(): def run_scheduled_jobs():
from app import db
try: try:
jobs = dao_get_scheduled_jobs() jobs = dao_get_scheduled_jobs()
for job in jobs: for job in jobs:
job.job_status = JOB_STATUS_PENDING job.job_status = JOB_STATUS_PENDING
dao_update_job(job) from time import sleep
process_job.apply_async([str(job.id)], queue="process-job") sleep(1)
print('SCHEDULING' + str(job.id))
# dao_update_job(job)
# process_job.apply_async([str(job.id)], queue="process-job")
current_app.logger.info( current_app.logger.info(
"Job ID {} added to process job queue".format(job.id) "Job ID {} added to process job queue".format(job.id)
) )
db.session.add_all(jobs)
db.session.commit()
except SQLAlchemyError as e: except SQLAlchemyError as e:
current_app.logger.exception("Failed to run scheduled jobs", e) current_app.logger.exception("Failed to run scheduled jobs", e)
db.session.rollback()
raise raise

View File

@@ -52,6 +52,7 @@ def dao_get_scheduled_jobs():
Job.scheduled_for < datetime.utcnow() Job.scheduled_for < datetime.utcnow()
) \ ) \
.order_by(asc(Job.scheduled_for)) \ .order_by(asc(Job.scheduled_for)) \
.with_for_update() \
.all() .all()

View File

@@ -0,0 +1,9 @@
# Lambda function
This code is used to setup a lambda function to call into our API to deliver messages.
Basic expected flow is that an request to GOV.UK Notify to send a text/email will persist in our DB then invoke an async lambda function.
This function will call /deliver/notification/{id} which will make the API call to our delivery partners, updating the DB with the response.
## EXPERIMENTAL AT THIS STAGE