From 9ad33045dd3b663c1c5c299433dda8ebdd5c3396 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Sat, 26 Sep 2020 10:57:21 +0100 Subject: [PATCH 1/2] Improve efficiency of process missing rows task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For every missing row this was: - downloading the CSV file from S3 - looping through every row in it until it found the one matching the index of the missing row `RecipientCSV` implements `__getitem__`[1] (which maybe it didn’t before) so we can create it once, then index the relevant row directly. *** 1. https://github.com/alphagov/notifications-utils/blob/5ae0572d4188483fa140bccc128ecf80f3473000/notifications_utils/recipients.py#L78-L79 --- app/celery/scheduled_tasks.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index 4fcac0bbe..c6d8aea5d 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -237,14 +237,13 @@ def check_for_missing_rows_in_completed_jobs(): jobs_and_job_size = find_jobs_with_missing_rows() for x in jobs_and_job_size: job = x[1] + recipient_csv, template, sender_id = get_recipient_csv_and_template_and_sender_id(job) missing_rows = find_missing_row_for_job(job.id, job.notification_count) for row_to_process in missing_rows: - recipient_csv, template, sender_id = get_recipient_csv_and_template_and_sender_id(job) - for row in recipient_csv.get_rows(): - if row.index == row_to_process.missing_row: - current_app.logger.info( - "Processing missing row: {} for job: {}".format(row_to_process.missing_row, job.id)) - process_row(row, template, job, job.service, sender_id=sender_id) + row = recipient_csv[row_to_process.missing_row] + current_app.logger.info( + "Processing missing row: {} for job: {}".format(row_to_process.missing_row, job.id)) + process_row(row, template, job, job.service, sender_id=sender_id) @notify_celery.task(name='check-for-services-with-high-failure-rates-or-sending-to-tv-numbers') From 1d50bfaafc97682a11992dbcef8510914c231244 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Sat, 26 Sep 2020 12:09:54 +0100 Subject: [PATCH 2/2] Remove unused column from query --- app/celery/scheduled_tasks.py | 5 ++--- app/dao/jobs_dao.py | 1 - tests/app/dao/test_jobs_dao.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index c6d8aea5d..f09630bfa 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -234,9 +234,8 @@ def check_templated_letter_state(): @notify_celery.task(name='check-for-missing-rows-in-completed-jobs') def check_for_missing_rows_in_completed_jobs(): - jobs_and_job_size = find_jobs_with_missing_rows() - for x in jobs_and_job_size: - job = x[1] + jobs = find_jobs_with_missing_rows() + for job in jobs: recipient_csv, template, sender_id = get_recipient_csv_and_template_and_sender_id(job) missing_rows = find_missing_row_for_job(job.id, job.notification_count) for row_to_process in missing_rows: diff --git a/app/dao/jobs_dao.py b/app/dao/jobs_dao.py index ce09fef7b..b2b512013 100644 --- a/app/dao/jobs_dao.py +++ b/app/dao/jobs_dao.py @@ -214,7 +214,6 @@ def find_jobs_with_missing_rows(): ten_minutes_ago = datetime.utcnow() - timedelta(minutes=10) yesterday = datetime.utcnow() - timedelta(days=1) jobs_with_rows_missing = db.session.query( - func.count(Notification.id).label('actual_count'), Job ).filter( Job.job_status == JOB_STATUS_FINISHED, diff --git a/tests/app/dao/test_jobs_dao.py b/tests/app/dao/test_jobs_dao.py index fccb9b96c..d154af5f1 100644 --- a/tests/app/dao/test_jobs_dao.py +++ b/tests/app/dao/test_jobs_dao.py @@ -461,8 +461,7 @@ def test_find_jobs_with_missing_rows(sample_email_template): results = find_jobs_with_missing_rows() assert len(results) == 1 - assert results[0].actual_count == 4 - assert results[0][1] == job_with_missing_rows + assert results[0] == job_with_missing_rows def test_find_jobs_with_missing_rows_returns_nothing_for_a_job_completed_less_than_10_minutes_ago(