From ddbf55648646ac3e45ea669bf63fa2f471daeb32 Mon Sep 17 00:00:00 2001 From: Ben Thorner Date: Tue, 11 Jan 2022 16:33:56 +0000 Subject: [PATCH] Rewrite task to aggregate status by service This is a step towards parallelising the task by service and day. --- app/celery/reporting_tasks.py | 41 ++++++++++++++++--------- app/dao/fact_notification_status_dao.py | 40 ++++++++++-------------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/app/celery/reporting_tasks.py b/app/celery/reporting_tasks.py index fae3f148e..8ac841c9e 100644 --- a/app/celery/reporting_tasks.py +++ b/app/celery/reporting_tasks.py @@ -3,7 +3,7 @@ from datetime import datetime, timedelta from flask import current_app from notifications_utils.timezones import convert_utc_to_bst -from app import notify_celery +from app import db, notify_celery from app.config import QueueNames from app.cronitor import cronitor from app.dao.fact_billing_dao import ( @@ -11,10 +11,10 @@ from app.dao.fact_billing_dao import ( update_fact_billing, ) from app.dao.fact_notification_status_dao import ( - fetch_notification_status_for_day, + fetch_status_data_for_service_and_day, update_fact_notification_status, ) -from app.models import EMAIL_TYPE, LETTER_TYPE, SMS_TYPE +from app.models import EMAIL_TYPE, LETTER_TYPE, SMS_TYPE, Service @notify_celery.task(name="create-nightly-billing") @@ -124,17 +124,28 @@ def create_nightly_notification_status_for_day(process_day, notification_type): f'create-nightly-notification-status-for-day task for {process_day} type {notification_type}: started' ) - start = datetime.utcnow() - transit_data = fetch_notification_status_for_day(process_day=process_day, notification_type=notification_type) - end = datetime.utcnow() - current_app.logger.info( - f'create-nightly-notification-status-for-day task for {process_day} type {notification_type}: ' - f'data fetched in {(end - start).seconds} seconds' - ) + for (service_id,) in db.session.query(Service.id): + start = datetime.utcnow() + transit_data = fetch_status_data_for_service_and_day( + process_day=process_day, + notification_type=notification_type, + service_id=service_id, + ) - update_fact_notification_status(transit_data, process_day, notification_type) + end = datetime.utcnow() + current_app.logger.info( + f'create-nightly-notification-status-for-day task for {process_day} type {notification_type}: ' + f'data fetched in {(end - start).seconds} seconds' + ) - current_app.logger.info( - f'create-nightly-notification-status-for-day task for {process_day} type {notification_type}: ' - f'task complete - {len(transit_data)} rows updated' - ) + update_fact_notification_status( + transit_data=transit_data, + process_day=process_day, + notification_type=notification_type, + service_id=service_id + ) + + current_app.logger.info( + f'create-nightly-notification-status-for-day task for {process_day} type {notification_type}: ' + f'task complete - {len(transit_data)} rows updated' + ) diff --git a/app/dao/fact_notification_status_dao.py b/app/dao/fact_notification_status_dao.py index ad26ddd2d..3e315da5a 100644 --- a/app/dao/fact_notification_status_dao.py +++ b/app/dao/fact_notification_status_dao.py @@ -1,6 +1,5 @@ from datetime import datetime, time, timedelta -from flask import current_app from notifications_utils.timezones import convert_bst_to_utc from sqlalchemy import Date, case, func from sqlalchemy.dialects.postgresql import insert @@ -36,29 +35,21 @@ from app.utils import ( ) -def fetch_notification_status_for_day(process_day, notification_type): +def fetch_status_data_for_service_and_day(process_day, service_id, notification_type): start_date = convert_bst_to_utc(datetime.combine(process_day, time.min)) end_date = convert_bst_to_utc(datetime.combine(process_day + timedelta(days=1), time.min)) - current_app.logger.info("Fetch ft_notification_status for {} to {}".format(start_date, end_date)) + # query notifications or notification_history for the day, depending on their data retention + service = Service.query.get(service_id) + table = get_notification_table_to_use(service, notification_type, process_day, has_delete_task_run=False) - all_data_for_process_day = [] - services = Service.query.all() - # for each service query notifications or notification_history for the day, depending on their data retention - for service in services: - table = get_notification_table_to_use(service, notification_type, process_day, has_delete_task_run=False) - - data_for_service_and_type = query_for_fact_status_data( - table=table, - start_date=start_date, - end_date=end_date, - notification_type=notification_type, - service_id=service.id - ) - - all_data_for_process_day += data_for_service_and_type - - return all_data_for_process_day + return query_for_fact_status_data( + table=table, + start_date=start_date, + end_date=end_date, + notification_type=notification_type, + service_id=service.id + ) def query_for_fact_status_data(table, start_date, end_date, notification_type, service_id): @@ -86,18 +77,19 @@ def query_for_fact_status_data(table, start_date, end_date, notification_type, s @autocommit -def update_fact_notification_status(data, process_day, notification_type): +def update_fact_notification_status(transit_data, process_day, notification_type, service_id): table = FactNotificationStatus.__table__ FactNotificationStatus.query.filter( FactNotificationStatus.bst_date == process_day, - FactNotificationStatus.notification_type == notification_type + FactNotificationStatus.notification_type == notification_type, + FactNotificationStatus.service_id == service_id, ).delete() - for row in data: + for row in transit_data: stmt = insert(table).values( bst_date=process_day, template_id=row.template_id, - service_id=row.service_id, + service_id=service_id, job_id=row.job_id, notification_type=notification_type, key_type=row.key_type,