From ed379a3724c80da7ba9a788d7fa5bca7e0a7a6b6 Mon Sep 17 00:00:00 2001 From: Ben Thorner Date: Tue, 10 May 2022 11:14:59 +0100 Subject: [PATCH] Fix out-of-date rows in ft_notification_status This can happen in the following scenario (primarily for letters): 1. A service has a mixture of "delivered" and "sending" letters, which the status task aggregates into two rows: sending | 123 delivered | 456 2. After the 7 day retention has passed, only the "delivered" letters will be archived [^1]. 3. The status task now looks at the history table [^2], which means it only sees the "delivered" letters. 4. The "sending" letters are eventually "delivered" and archived (before the 10 day aggregation cutoff). 5. But the status aggregation task doesn't run. This commit fixes (5). [^1]: https://github.com/alphagov/notifications-api/pull/3063 [^2]: https://github.com/alphagov/notifications-api/blob/f87ebb094d9c2037ce6a4c4a38bab87daa868be3/app/dao/fact_notification_status_dao.py#L51 --- app/dao/notifications_dao.py | 32 +++++++++++++------ .../notification_dao/test_notification_dao.py | 11 +++++++ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index e9fc5bd30..b3eafb386 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -13,7 +13,7 @@ from notifications_utils.recipients import ( validate_and_format_email_address, ) from notifications_utils.timezones import convert_bst_to_utc, convert_utc_to_bst -from sqlalchemy import and_, asc, desc, func, or_ +from sqlalchemy import and_, asc, desc, func, or_, union from sqlalchemy.orm import joinedload from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.sql import functions @@ -807,14 +807,26 @@ def get_service_ids_with_notifications_on_date(notification_type, date): start_date = get_london_midnight_in_utc(date) end_date = get_london_midnight_in_utc(date + timedelta(days=1)) + notification_table_query = db.session.query( + Notification.service_id.label('service_id') + ).filter( + Notification.notification_type == notification_type, + # using >= + < is much more efficient than date(created_at) + Notification.created_at >= start_date, + Notification.created_at < end_date, + ) + + # Looking at this table is more efficient for historical notifications, + # provided the task to populate it has run before they were archived. + ft_status_table_query = db.session.query( + FactNotificationStatus.service_id.label('service_id') + ).filter( + FactNotificationStatus.notification_type == notification_type, + FactNotificationStatus.bst_date == date, + ) + return { - row.service_id - for row in db.session.query( - Notification.service_id - ).filter( - Notification.notification_type == notification_type, - # using >= + < is much more efficient than date(created_at) - Notification.created_at >= start_date, - Notification.created_at < end_date, - ).distinct() + row.service_id for row in db.session.query(union( + notification_table_query, ft_status_table_query + ).subquery()).distinct() } diff --git a/tests/app/dao/notification_dao/test_notification_dao.py b/tests/app/dao/notification_dao/test_notification_dao.py index 03b75e345..76c1f6ed4 100644 --- a/tests/app/dao/notification_dao/test_notification_dao.py +++ b/tests/app/dao/notification_dao/test_notification_dao.py @@ -48,6 +48,7 @@ from app.models import ( NotificationHistory, ) from tests.app.db import ( + create_ft_notification_status, create_job, create_notification, create_notification_history, @@ -1742,3 +1743,13 @@ def test_get_service_ids_with_notifications_on_date_respects_gmt_bst( create_notification(template=sample_template, created_at=created_at_utc) service_ids = get_service_ids_with_notifications_on_date(SMS_TYPE, date_to_check) assert len(service_ids) == expected_count + + +def test_get_service_ids_with_notifications_on_date_checks_ft_status( + sample_template, +): + create_notification(template=sample_template, created_at='2022-01-01T09:30') + create_ft_notification_status(template=sample_template, bst_date='2022-01-02') + + assert len(get_service_ids_with_notifications_on_date(SMS_TYPE, date(2022, 1, 1))) == 1 + assert len(get_service_ids_with_notifications_on_date(SMS_TYPE, date(2022, 1, 2))) == 1