Adding a scheduled task to processing missing rows from job

Sometimes a job finishes but has missed a row in the middle. It is a mystery why this is happening, it could be that the task to save the notifications has been dropped.
So until we solve the missing let's find missing rows and process them.

A new scheduled task has been added to find any "finished" jobs that do not have enough notifications created. If there are missing notifications the job processes those rows for the job.
Adding the new task to beat schedule will be done in the next commit.

A unique key constraint has been added to Notifications to ensure that the row is not added twice. Any index or constraint can affect performance, but this unique constraint should not affect it enough for us to notice.
This commit is contained in:
Rebecca Law
2019-11-05 16:47:00 +00:00
parent 975af113e4
commit db5a50c5a7
7 changed files with 160 additions and 56 deletions

View File

@@ -3,6 +3,7 @@ from unittest.mock import call
import pytest
from freezegun import freeze_time
from mock import mock
from app import db
from app.celery import scheduled_tasks
@@ -16,6 +17,7 @@ from app.celery.scheduled_tasks import (
replay_created_notifications,
check_precompiled_letter_state,
check_templated_letter_state,
check_for_missing_rows_in_completed_jobs
)
from app.config import QueueNames, TaskNames
from app.dao.jobs_dao import dao_get_job_by_id
@@ -32,6 +34,7 @@ from app.models import (
NOTIFICATION_PENDING_VIRUS_CHECK,
)
from app.v2.errors import JobIncompleteError
from tests.app import load_example_csv
from tests.app.db import (
create_notification,
@@ -403,3 +406,47 @@ def test_check_templated_letter_state_during_utc(mocker, sample_letter_template)
subject="[test] Letters still in 'created' status",
ticket_type='incident'
)
def test_check_for_missing_rows_in_completed_jobs(mocker, sample_email_template):
mocker.patch('app.celery.tasks.s3.get_job_from_s3', return_value=load_example_csv('multiple_email'))
mocker.patch('app.encryption.encrypt', return_value="something_encrypted")
process_row = mocker.patch('app.celery.scheduled_tasks.process_row')
job = create_job(template=sample_email_template,
notification_count=5,
job_status=JOB_STATUS_FINISHED,
processing_finished=datetime.utcnow() - timedelta(minutes=11))
for i in range(0, 4):
create_notification(job=job, job_row_number=i)
check_for_missing_rows_in_completed_jobs()
process_row.assert_called_once_with(
mock.ANY, mock.ANY, job, job.service
)
def test_check_for_missing_rows_in_completed_jobs_calls_save_email(mocker, sample_email_template):
mocker.patch('app.celery.tasks.s3.get_job_from_s3', return_value=load_example_csv('multiple_email'))
save_email_task = mocker.patch('app.celery.tasks.save_email.apply_async')
mocker.patch('app.encryption.encrypt', return_value="something_encrypted")
mocker.patch('app.celery.tasks.create_uuid', return_value='uuid')
job = create_job(template=sample_email_template,
notification_count=5,
job_status=JOB_STATUS_FINISHED,
processing_finished=datetime.utcnow() - timedelta(minutes=11))
for i in range(0, 4):
create_notification(job=job, job_row_number=i)
check_for_missing_rows_in_completed_jobs()
save_email_task.assert_called_once_with(
(
str(job.service_id),
"uuid",
"something_encrypted",
),
{},
queue="database-tasks"
)

View File

@@ -4,6 +4,7 @@ from functools import partial
import pytest
from freezegun import freeze_time
from sqlalchemy.exc import IntegrityError
from app.dao.jobs_dao import (
can_letter_job_be_cancelled,
@@ -415,21 +416,45 @@ def test_can_letter_job_be_cancelled_returns_false_and_error_message_if_notifica
def test_find_jobs_with_missing_rows(sample_email_template):
job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED)
job = create_job(template=sample_email_template,
notification_count=5,
job_status=JOB_STATUS_FINISHED,
processing_finished=datetime.utcnow() - timedelta(minutes=11)
)
for i in range(0, 4):
create_notification(job=job, job_row_number=i)
results = find_jobs_with_missing_rows()
assert len(results) == 1
assert results[0] == (4, 4, job.id, job.service_id)
assert results[0][0] == 4
assert results[0][1] == job
def test_find_jobs_with_missing_rows_returns_nothing_for_a_job_completed_less_than_10_minutes_ago(
sample_email_template
):
job = create_job(template=sample_email_template,
notification_count=5,
job_status=JOB_STATUS_FINISHED,
processing_finished=datetime.utcnow() - timedelta(minutes=9)
)
for i in range(0, 4):
create_notification(job=job, job_row_number=i)
results = find_jobs_with_missing_rows()
assert len(results) == 0
@pytest.mark.parametrize('status', ['pending', 'in progress', 'cancelled', 'scheduled'])
def test_find_jobs_with_missing_rows_doesnt_return_jobs_that_are_not_finished(
sample_email_template, status
):
job = create_job(template=sample_email_template, notification_count=5, job_status=status)
job = create_job(template=sample_email_template,
notification_count=5,
job_status=status,
processing_finished=datetime.utcnow() - timedelta(minutes=11))
for i in range(0, 4):
create_notification(job=job, job_row_number=i)
@@ -439,7 +464,10 @@ def test_find_jobs_with_missing_rows_doesnt_return_jobs_that_are_not_finished(
def test_find_missing_row_for_job(sample_email_template):
job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED)
job = create_job(template=sample_email_template,
notification_count=5,
job_status=JOB_STATUS_FINISHED,
processing_finished=datetime.utcnow() - timedelta(minutes=11))
create_notification(job=job, job_row_number=0)
create_notification(job=job, job_row_number=1)
create_notification(job=job, job_row_number=3)
@@ -451,7 +479,8 @@ def test_find_missing_row_for_job(sample_email_template):
def test_find_missing_row_for_job_more_than_one_missing_row(sample_email_template):
job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED)
job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED,
processing_finished=datetime.utcnow() - timedelta(minutes=11))
create_notification(job=job, job_row_number=0)
create_notification(job=job, job_row_number=1)
create_notification(job=job, job_row_number=4)
@@ -463,10 +492,18 @@ def test_find_missing_row_for_job_more_than_one_missing_row(sample_email_templat
def test_find_missing_row_for_job_return_none_when_row_isnt_missing(sample_email_template):
job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED)
job = create_job(template=sample_email_template, notification_count=5, job_status=JOB_STATUS_FINISHED,
processing_finished=datetime.utcnow() - timedelta(minutes=11))
for i in range(0, 5):
create_notification(job=job, job_row_number=i)
results = find_missing_row_for_job(job.id, 5)
print(results)
assert len(results) == 0
def test_unique_key_on_job_id_and_job_row_number(sample_email_template):
job = create_job(template=sample_email_template)
create_notification(job=job, job_row_number=0)
with pytest.raises(expected_exception=IntegrityError):
create_notification(job=job, job_row_number=0)

View File

@@ -377,6 +377,7 @@ def create_job(
job_status='pending',
scheduled_for=None,
processing_started=None,
processing_finished=None,
original_file_name='some.csv',
archived=False
):
@@ -393,6 +394,7 @@ def create_job(
'job_status': job_status,
'scheduled_for': scheduled_for,
'processing_started': processing_started,
'processing_finished': processing_finished,
'archived': archived
}
job = Job(**data)