From aace1bdd8aa85cc5a3ad62dc8372efa4a466b068 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Sat, 26 Sep 2020 11:06:44 +0100 Subject: [PATCH] Allow 20 minutes before checking for missing rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we’ve doubled the number of rows in a job, jobs can take twice as long to insert all the notifications. We don’t check for missing rows until we’re pretty confident that the original tasks have finished processing. This means we need to double the time we wait to still be as sure. --- app/dao/jobs_dao.py | 6 ++-- tests/app/celery/test_scheduled_tasks.py | 36 ++++++++++++++++++++++-- tests/app/dao/test_jobs_dao.py | 4 +-- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/app/dao/jobs_dao.py b/app/dao/jobs_dao.py index ce09fef7b..cee3d16b1 100644 --- a/app/dao/jobs_dao.py +++ b/app/dao/jobs_dao.py @@ -209,9 +209,9 @@ def can_letter_job_be_cancelled(job): def find_jobs_with_missing_rows(): - # Jobs can be a maximum of 50,000 rows. It typically takes 5 minutes to create all those notifications. - # Using 10 minutes as a condition seems reasonable. - ten_minutes_ago = datetime.utcnow() - timedelta(minutes=10) + # Jobs can be a maximum of 100,000 rows. It typically takes 10 minutes to create all those notifications. + # Using 20 minutes as a condition seems reasonable. + ten_minutes_ago = datetime.utcnow() - timedelta(minutes=20) yesterday = datetime.utcnow() - timedelta(days=1) jobs_with_rows_missing = db.session.query( func.count(Notification.id).label('actual_count'), diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index 655d6e699..c29ea1637 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -398,6 +398,36 @@ def test_check_templated_letter_state_during_utc(mocker, sample_letter_template) ) +@pytest.mark.parametrize('offset', ( + timedelta(days=1), + pytest.param(timedelta(hours=23, minutes=59), marks=pytest.mark.xfail), + pytest.param(timedelta(minutes=20), marks=pytest.mark.xfail), + timedelta(minutes=19), +)) +def test_check_for_missing_rows_in_completed_jobs_ignores_old_and_new_jobs( + mocker, + sample_email_template, + offset, +): + mocker.patch('app.celery.tasks.s3.get_job_and_metadata_from_s3', + return_value=(load_example_csv('multiple_email'), {"sender_id": None})) + mocker.patch('app.encryption.encrypt', return_value="something_encrypted") + process_row = mocker.patch('app.celery.scheduled_tasks.process_row') + + job = create_job( + template=sample_email_template, + notification_count=5, + job_status=JOB_STATUS_FINISHED, + processing_finished=datetime.utcnow() - offset, + ) + for i in range(0, 4): + create_notification(job=job, job_row_number=i) + + check_for_missing_rows_in_completed_jobs() + + assert process_row.called is False + + def test_check_for_missing_rows_in_completed_jobs(mocker, sample_email_template): mocker.patch('app.celery.tasks.s3.get_job_and_metadata_from_s3', return_value=(load_example_csv('multiple_email'), {"sender_id": None})) @@ -407,7 +437,7 @@ def test_check_for_missing_rows_in_completed_jobs(mocker, sample_email_template) job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED, - processing_finished=datetime.utcnow() - timedelta(minutes=11)) + processing_finished=datetime.utcnow() - timedelta(minutes=20)) for i in range(0, 4): create_notification(job=job, job_row_number=i) @@ -428,7 +458,7 @@ def test_check_for_missing_rows_in_completed_jobs_calls_save_email(mocker, sampl job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED, - processing_finished=datetime.utcnow() - timedelta(minutes=11)) + processing_finished=datetime.utcnow() - timedelta(minutes=20)) for i in range(0, 4): create_notification(job=job, job_row_number=i) @@ -452,7 +482,7 @@ def test_check_for_missing_rows_in_completed_jobs_uses_sender_id(mocker, sample_ job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED, - processing_finished=datetime.utcnow() - timedelta(minutes=11)) + processing_finished=datetime.utcnow() - timedelta(minutes=20)) for i in range(0, 4): create_notification(job=job, job_row_number=i) diff --git a/tests/app/dao/test_jobs_dao.py b/tests/app/dao/test_jobs_dao.py index fccb9b96c..181194010 100644 --- a/tests/app/dao/test_jobs_dao.py +++ b/tests/app/dao/test_jobs_dao.py @@ -446,14 +446,14 @@ def test_find_jobs_with_missing_rows(sample_email_template): healthy_job = create_job(template=sample_email_template, notification_count=3, job_status=JOB_STATUS_FINISHED, - processing_finished=datetime.utcnow() - timedelta(minutes=11) + processing_finished=datetime.utcnow() - timedelta(minutes=20) ) for i in range(0, 3): create_notification(job=healthy_job, job_row_number=i) job_with_missing_rows = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED, - processing_finished=datetime.utcnow() - timedelta(minutes=11) + processing_finished=datetime.utcnow() - timedelta(minutes=20) ) for i in range(0, 4): create_notification(job=job_with_missing_rows, job_row_number=i)