From 5d838415d3d4c7fbb12fc93d55414c8973abe783 Mon Sep 17 00:00:00 2001
From: Leo Hemsted <leo.hemsted@digital.cabinet-office.gov.uk>
Date: Wed, 9 Jan 2019 11:26:08 +0000
Subject: [PATCH 1/2] fix filter to look at right table

a query for notifications was filtering on FtNotificationStatus - we
aren't joining to that table in the query, so sqlalchemy added a cross
join between ft_notification_status (3.7k rows) and Notifications (3.9m
rows), resulting in a 1.3 trillion row materialised table. This query
took 17 hours and pending.

Also, remove orders from querys other than the outer one, since we're
grouping anyway.
---
 app/dao/fact_notification_status_dao.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/dao/fact_notification_status_dao.py b/app/dao/fact_notification_status_dao.py
index c273ca066..bf6ec3c8e 100644
--- a/app/dao/fact_notification_status_dao.py
+++ b/app/dao/fact_notification_status_dao.py
@@ -153,8 +153,6 @@ def fetch_notification_status_totals_for_all_services(start_date, end_date):
         FactNotificationStatus.notification_type,
         FactNotificationStatus.notification_status,
         FactNotificationStatus.key_type,
-    ).order_by(
-        FactNotificationStatus.notification_type
     )
     today = get_london_midnight_in_utc(datetime.utcnow())
     if start_date <= today.date() <= end_date:
@@ -184,7 +182,9 @@ def fetch_notification_status_totals_for_all_services(start_date, end_date):
             all_stats_table.c.notification_type
         )
     else:
-        query = stats
+        query = stats.order_by(
+            FactNotificationStatus.notification_type
+        )
     return query.all()
 
 
@@ -245,7 +245,7 @@ def fetch_stats_for_all_services_by_date_range(start_date, end_date, include_fro
             Notification.service_id
         )
         if not include_from_test_key:
-            subquery = subquery.filter(FactNotificationStatus.key_type != KEY_TYPE_TEST)
+            subquery = subquery.filter(Notification.key_type != KEY_TYPE_TEST)
         subquery = subquery.subquery()
 
         stats_for_today = db.session.query(
@@ -261,7 +261,7 @@ def fetch_stats_for_all_services_by_date_range(start_date, end_date, include_fro
         ).outerjoin(
             subquery,
             subquery.c.service_id == Service.id
-        ).order_by(Service.id)
+        )
 
         all_stats_table = stats.union_all(stats_for_today).subquery()
         query = db.session.query(

From 3e21f5748144b09c6611234cb85cbdd6d2904ca8 Mon Sep 17 00:00:00 2001
From: Leo Hemsted <leo.hemsted@digital.cabinet-office.gov.uk>
Date: Wed, 9 Jan 2019 11:43:40 +0000
Subject: [PATCH 2/2] fix platform admin stats row-order bug

now that we're reading from two tables (ft_notification_status and
notifications) for stats, we'll get a couple of rows for each
notification type. If a service doesn't have any rows in one of those
tables, the query will return a row with nulls for the notification
types and counts. Some services will have history but no stats from
today, others will have data from today but no history.

This commit acknowledges that any row might have nulls, not just the
first row.
---
 app/service/rest.py                  | 5 +----
 app/service/statistics.py            | 5 ++++-
 tests/app/service/test_statistics.py | 4 ++++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/app/service/rest.py b/app/service/rest.py
index f62a99967..e355ebc5b 100644
--- a/app/service/rest.py
+++ b/app/service/rest.py
@@ -478,10 +478,7 @@ def get_detailed_services(start_date, end_date, only_active=False, include_from_
     results = []
     for service_id, rows in itertools.groupby(stats, lambda x: x.service_id):
         rows = list(rows)
-        if rows[0].count is None:
-            s = statistics.create_zeroed_stats_dicts()
-        else:
-            s = statistics.format_statistics(rows)
+        s = statistics.format_statistics(rows)
         results.append({
             'id': str(rows[0].service_id),
             'name': rows[0].name,
diff --git a/app/service/statistics.py b/app/service/statistics.py
index e1ed57aeb..d942ad427 100644
--- a/app/service/statistics.py
+++ b/app/service/statistics.py
@@ -13,7 +13,10 @@ def format_statistics(statistics):
     # so we can return emails/sms * created, sent, and failed
     counts = create_zeroed_stats_dicts()
     for row in statistics:
-        _update_statuses_from_row(counts[row.notification_type], row)
+        # any row could be null, if the service either has no notifications in the notifications table,
+        # or no historical data in the ft_notification_status table.
+        if row.notification_type:
+            _update_statuses_from_row(counts[row.notification_type], row)
 
     return counts
 
diff --git a/tests/app/service/test_statistics.py b/tests/app/service/test_statistics.py
index 6ba357d3b..d553b90f2 100644
--- a/tests/app/service/test_statistics.py
+++ b/tests/app/service/test_statistics.py
@@ -45,6 +45,10 @@ NewStatsRow = collections.namedtuple('row', ('notification_type', 'status', 'key
         StatsRow('sms', 'delivered', 1),
         StatsRow('sms', 'sent', 1),
     ], [0, 0, 0], [3, 2, 0], [0, 0, 0]),
+    'handles_none_rows': ([
+        StatsRow('sms', 'sending', 1),
+        StatsRow(None, None, None)
+    ], [0, 0, 0], [1, 0, 0], [0, 0, 0])
 })
 def test_format_statistics(stats, email_counts, sms_counts, letter_counts):