From 5d838415d3d4c7fbb12fc93d55414c8973abe783 Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Wed, 9 Jan 2019 11:26:08 +0000 Subject: [PATCH 1/2] fix filter to look at right table a query for notifications was filtering on FtNotificationStatus - we aren't joining to that table in the query, so sqlalchemy added a cross join between ft_notification_status (3.7k rows) and Notifications (3.9m rows), resulting in a 1.3 trillion row materialised table. This query took 17 hours and pending. Also, remove orders from querys other than the outer one, since we're grouping anyway. --- app/dao/fact_notification_status_dao.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/dao/fact_notification_status_dao.py b/app/dao/fact_notification_status_dao.py index c273ca066..bf6ec3c8e 100644 --- a/app/dao/fact_notification_status_dao.py +++ b/app/dao/fact_notification_status_dao.py @@ -153,8 +153,6 @@ def fetch_notification_status_totals_for_all_services(start_date, end_date): FactNotificationStatus.notification_type, FactNotificationStatus.notification_status, FactNotificationStatus.key_type, - ).order_by( - FactNotificationStatus.notification_type ) today = get_london_midnight_in_utc(datetime.utcnow()) if start_date <= today.date() <= end_date: @@ -184,7 +182,9 @@ def fetch_notification_status_totals_for_all_services(start_date, end_date): all_stats_table.c.notification_type ) else: - query = stats + query = stats.order_by( + FactNotificationStatus.notification_type + ) return query.all() @@ -245,7 +245,7 @@ def fetch_stats_for_all_services_by_date_range(start_date, end_date, include_fro Notification.service_id ) if not include_from_test_key: - subquery = subquery.filter(FactNotificationStatus.key_type != KEY_TYPE_TEST) + subquery = subquery.filter(Notification.key_type != KEY_TYPE_TEST) subquery = subquery.subquery() stats_for_today = db.session.query( @@ -261,7 +261,7 @@ def fetch_stats_for_all_services_by_date_range(start_date, end_date, include_fro ).outerjoin( subquery, subquery.c.service_id == Service.id - ).order_by(Service.id) + ) all_stats_table = stats.union_all(stats_for_today).subquery() query = db.session.query( From 3e21f5748144b09c6611234cb85cbdd6d2904ca8 Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Wed, 9 Jan 2019 11:43:40 +0000 Subject: [PATCH 2/2] fix platform admin stats row-order bug now that we're reading from two tables (ft_notification_status and notifications) for stats, we'll get a couple of rows for each notification type. If a service doesn't have any rows in one of those tables, the query will return a row with nulls for the notification types and counts. Some services will have history but no stats from today, others will have data from today but no history. This commit acknowledges that any row might have nulls, not just the first row. --- app/service/rest.py | 5 +---- app/service/statistics.py | 5 ++++- tests/app/service/test_statistics.py | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/app/service/rest.py b/app/service/rest.py index f62a99967..e355ebc5b 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -478,10 +478,7 @@ def get_detailed_services(start_date, end_date, only_active=False, include_from_ results = [] for service_id, rows in itertools.groupby(stats, lambda x: x.service_id): rows = list(rows) - if rows[0].count is None: - s = statistics.create_zeroed_stats_dicts() - else: - s = statistics.format_statistics(rows) + s = statistics.format_statistics(rows) results.append({ 'id': str(rows[0].service_id), 'name': rows[0].name, diff --git a/app/service/statistics.py b/app/service/statistics.py index e1ed57aeb..d942ad427 100644 --- a/app/service/statistics.py +++ b/app/service/statistics.py @@ -13,7 +13,10 @@ def format_statistics(statistics): # so we can return emails/sms * created, sent, and failed counts = create_zeroed_stats_dicts() for row in statistics: - _update_statuses_from_row(counts[row.notification_type], row) + # any row could be null, if the service either has no notifications in the notifications table, + # or no historical data in the ft_notification_status table. + if row.notification_type: + _update_statuses_from_row(counts[row.notification_type], row) return counts diff --git a/tests/app/service/test_statistics.py b/tests/app/service/test_statistics.py index 6ba357d3b..d553b90f2 100644 --- a/tests/app/service/test_statistics.py +++ b/tests/app/service/test_statistics.py @@ -45,6 +45,10 @@ NewStatsRow = collections.namedtuple('row', ('notification_type', 'status', 'key StatsRow('sms', 'delivered', 1), StatsRow('sms', 'sent', 1), ], [0, 0, 0], [3, 2, 0], [0, 0, 0]), + 'handles_none_rows': ([ + StatsRow('sms', 'sending', 1), + StatsRow(None, None, None) + ], [0, 0, 0], [1, 0, 0], [0, 0, 0]) }) def test_format_statistics(stats, email_counts, sms_counts, letter_counts):