From 782514a0f11f94df3313205943f14955d5f2f703 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Mon, 24 May 2021 14:36:07 +0100 Subject: [PATCH 1/4] Update the dao_fetch_todays_stats_for_service query. We have an index on Notifications(service_id, created_at), by updating the query to use between created_at rather than date(created_at) this query will use the index. Changing the query plan to use an index scan rather than a sequence scan, see query plans below. This query is still rather slow but is improved by this update. https://www.pivotaltracker.com/story/show/178263480 explain analyze SELECT notification_type, notification_status, count(id) FROM notifications WHERE service_id = 'e791dbd4-09ea-413a-b773-ead8728ddb09' AND date(created_at) = '2021-05-23' AND key_type != 'test' GROUP BY notification_type, notification_status; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Finalize GroupAggregate (cost=6326816.31..6326926.48 rows=24 width=22) (actual time=91666.805..91712.976 rows=10 loops=1) Group Key: notification_type, notification_status -> Gather Merge (cost=6326816.31..6326925.88 rows=48 width=22) (actual time=91666.712..91712.962 rows=30 loops=1) Workers Planned: 2 Workers Launched: 2 -> Partial GroupAggregate (cost=6325816.28..6325920.31 rows=24 width=22) (actual time=91662.907..91707.027 rows=10 loops=3) Group Key: notification_type, notification_status -> Sort (cost=6325816.28..6325842.23 rows=10379 width=30) (actual time=91635.890..91676.225 rows=270884 loops=3) Sort Key: notification_type, notification_status Sort Method: external merge Disk: 10584kB Worker 0: Sort Method: external merge Disk: 10648kB Worker 1: Sort Method: external merge Disk: 10696kB -> Parallel Seq Scan on notifications (cost=0.00..6325123.93 rows=10379 width=30) (actual time=0.036..91513.985 rows=270884 loops=3) Filter: (((key_type)::text <> 'test'::text) AND (service_id = 'e791dbd4-09ea-413a-b773-ead8728ddb09'::uuid) AND (date(created_at) = '2021-05-23'::date)) Rows Removed by Filter: 16191366 Planning Time: 0.760 ms Execution Time: 91714.500 ms (17 rows) explain analyze SELECT notification_type, notification_status, count(id) FROM notifications WHERE service_id = 'e791dbd4-09ea-413a-b773-ead8728ddb09' AND created_at >= '2021-05-22 23:00' and created_at < '2021-05-23 23:00' AND key_type != 'test' GROUP BY notification_type, notification_status; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Finalize GroupAggregate (cost=2114273.37..2114279.57 rows=24 width=22) (actual time=21032.076..21035.725 rows=10 loops=1) Group Key: notification_type, notification_status -> Gather Merge (cost=2114273.37..2114278.97 rows=48 width=22) (actual time=21032.056..21035.703 rows=30 loops=1) Workers Planned: 2 Workers Launched: 2 -> Sort (cost=2113273.35..2113273.41 rows=24 width=22) (actual time=21029.261..21029.265 rows=10 loops=3) Sort Key: notification_type, notification_status Sort Method: quicksort Memory: 25kB Worker 0: Sort Method: quicksort Memory: 25kB Worker 1: Sort Method: quicksort Memory: 25kB -> Partial HashAggregate (cost=2113272.56..2113272.80 rows=24 width=22) (actual time=21029.228..21029.230 rows=10 loops=3) Group Key: notification_type, notification_status -> Parallel Bitmap Heap Scan on notifications (cost=114455.71..2111695.14 rows=210322 width=30) (actual time=4983.790..20960.581 rows=271217 loops=3) Recheck Cond: ((service_id = 'e791dbd4-09ea-413a-b773-ead8728ddb09'::uuid) AND (created_at >= '2021-05-22 23:00:00'::timestamp without time zone) AND (created_at < '2021-05-23 23:00:00'::timestamp without time zone)) Rows Removed by Index Recheck: 1456269 Filter: ((key_type)::text <> 'test'::text) Heap Blocks: exact=12330 lossy=123418 -> Bitmap Index Scan on ix_notifications_service_created_at (cost=0.00..114329.51 rows=543116 width=0) (actual time=4973.139..4973.140 rows=813671 loops=1) Index Cond: ((service_id = 'e791dbd4-09ea-413a-b773-ead8728ddb09'::uuid) AND (created_at >= '2021-05-22 23:00:00'::timestamp without time zone) AND (created_at < '2021-05-23 23:00:00'::timestamp without time zone)) Planning Time: 0.191 ms Execution Time: 21035.770 ms (21 rows) --- app/dao/services_dao.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/dao/services_dao.py b/app/dao/services_dao.py index 836f5628b..d28267dae 100644 --- a/app/dao/services_dao.py +++ b/app/dao/services_dao.py @@ -429,8 +429,12 @@ def dao_fetch_stats_for_service(service_id, limit_days): def dao_fetch_todays_stats_for_service(service_id): + today = date.today() + start_date = get_london_midnight_in_utc(today) + end_date = get_london_midnight_in_utc(today + timedelta(days=1)) return _stats_for_service_query(service_id).filter( - func.date(Notification.created_at) == date.today() + Notification.created_at >= start_date, + Notification.created_at < end_date ).all() From dfacba5053753c7b8776419cf2af1ed8526a0654 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Wed, 26 May 2021 11:50:05 +0100 Subject: [PATCH 2/4] Add a test for a bst date --- tests/app/dao/test_services_dao.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/app/dao/test_services_dao.py b/tests/app/dao/test_services_dao.py index 9671e7896..f15db3d81 100644 --- a/tests/app/dao/test_services_dao.py +++ b/tests/app/dao/test_services_dao.py @@ -874,6 +874,24 @@ def test_fetch_stats_for_today_only_includes_today(notify_db_session): assert stats['created'] == 1 +def test_fetch_stats_for_today_only_includes_today_in_bst(notify_db_session): + template = create_template(service=create_service()) + # two created email, one failed email, and one created sms with freeze_time('2001-03-27T23:59:00'): + # just before midnight yesterday in UTC create_notification(template=template, to_field='1', status='delivered') + with freeze_time('2001-03-27T23:01:00'): + # just after midnight yesterday in UTC + create_notification(template=template, to_field='2', status='failed') + with freeze_time('2001-03-28T12:00:00'): + # right_now, we have entered BST at this point but had not for the previous two notifications + create_notification(template=template, to_field='3', status='created') + stats = dao_fetch_todays_stats_for_service(template.service_id) + + stats = {row.status: row.count for row in stats} + assert 'delivered' not in stats + assert stats['failed'] == 1 + assert stats['created'] == 1 + + @pytest.mark.parametrize('created_at, limit_days, rows_returned', [ ('Sunday 8th July 2018 12:00', 7, 0), ('Sunday 8th July 2018 22:59', 7, 0), From ed5e3b3d9c90b88249833e67a5d2d1ba68c797ae Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Wed, 26 May 2021 13:47:53 +0100 Subject: [PATCH 3/4] Removed the end date in the filter. It's always going to be in the future anyway. After some analysis the query does perform better without it. I'll make a note to update other queries where we get todays notification data to remove the end date filter in a separate PR. --- app/dao/services_dao.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/dao/services_dao.py b/app/dao/services_dao.py index d28267dae..85449f2fb 100644 --- a/app/dao/services_dao.py +++ b/app/dao/services_dao.py @@ -431,10 +431,8 @@ def dao_fetch_stats_for_service(service_id, limit_days): def dao_fetch_todays_stats_for_service(service_id): today = date.today() start_date = get_london_midnight_in_utc(today) - end_date = get_london_midnight_in_utc(today + timedelta(days=1)) return _stats_for_service_query(service_id).filter( - Notification.created_at >= start_date, - Notification.created_at < end_date + Notification.created_at >= start_date ).all() From d4b5373ee5774af10b30c12fdc9ea6c4fb4a9385 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Tue, 1 Jun 2021 13:03:43 +0100 Subject: [PATCH 4/4] Update tests for BST to be on the day BST starts. Add a test for just after midnight of the date the stats are collected. --- tests/app/dao/test_services_dao.py | 83 +++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 7 deletions(-) diff --git a/tests/app/dao/test_services_dao.py b/tests/app/dao/test_services_dao.py index f15db3d81..fff928206 100644 --- a/tests/app/dao/test_services_dao.py +++ b/tests/app/dao/test_services_dao.py @@ -874,15 +874,17 @@ def test_fetch_stats_for_today_only_includes_today(notify_db_session): assert stats['created'] == 1 -def test_fetch_stats_for_today_only_includes_today_in_bst(notify_db_session): +def test_fetch_stats_for_today_only_includes_today_when_clocks_spring_forward(notify_db_session): template = create_template(service=create_service()) - # two created email, one failed email, and one created sms with freeze_time('2001-03-27T23:59:00'): - # just before midnight yesterday in UTC create_notification(template=template, to_field='1', status='delivered') - with freeze_time('2001-03-27T23:01:00'): - # just after midnight yesterday in UTC + with freeze_time('2021-03-27T23:59:59'): + # just before midnight yesterday in UTC -- not included + create_notification(template=template, to_field='1', status='permanent-failure') + with freeze_time('2021-03-28T00:01:00'): + # just after midnight yesterday in UTC -- included create_notification(template=template, to_field='2', status='failed') - with freeze_time('2001-03-28T12:00:00'): - # right_now, we have entered BST at this point but had not for the previous two notifications + with freeze_time('2021-03-28T12:00:00'): + # we have entered BST at this point but had not for the previous two notifications --included + # collect stats for this timestamp create_notification(template=template, to_field='3', status='created') stats = dao_fetch_todays_stats_for_service(template.service_id) @@ -890,6 +892,73 @@ def test_fetch_stats_for_today_only_includes_today_in_bst(notify_db_session): assert 'delivered' not in stats assert stats['failed'] == 1 assert stats['created'] == 1 + assert not stats.get('permanent-failure') + assert not stats.get('temporary-failure') + + +def test_fetch_stats_for_today_only_includes_today_during_bst(notify_db_session): + template = create_template(service=create_service()) + with freeze_time('2021-03-28T22:59:59'): + # just before midnight BST -- not included + create_notification(template=template, to_field='1', status='permanent-failure') + with freeze_time('2021-03-28T23:00:01'): + # just after midnight BST -- included + create_notification(template=template, to_field='2', status='failed') + with freeze_time('2021-03-29T12:00:00'): + # well after midnight BST -- included + # collect stats for this timestamp + create_notification(template=template, to_field='3', status='created') + stats = dao_fetch_todays_stats_for_service(template.service_id) + + stats = {row.status: row.count for row in stats} + assert 'delivered' not in stats + assert stats['failed'] == 1 + assert stats['created'] == 1 + assert not stats.get('permanent-failure') + + +def test_fetch_stats_for_today_only_includes_today_when_clocks_fall_back(notify_db_session): + template = create_template(service=create_service()) + with freeze_time('2021-10-30T22:59:59'): + # just before midnight BST -- not included + create_notification(template=template, to_field='1', status='permanent-failure') + with freeze_time('2021-10-31T23:00:01'): + # just after midnight BST -- included + create_notification(template=template, to_field='2', status='failed') + # clocks go back to UTC on 31 October at 2am + with freeze_time('2021-10-31T12:00:00'): + # well after midnight -- included + # collect stats for this timestamp + create_notification(template=template, to_field='3', status='created') + stats = dao_fetch_todays_stats_for_service(template.service_id) + + stats = {row.status: row.count for row in stats} + assert 'delivered' not in stats + assert stats['failed'] == 1 + assert stats['created'] == 1 + assert not stats.get('permanent-failure') + + +def test_fetch_stats_for_today_only_includes_during_utc(notify_db_session): + template = create_template(service=create_service()) + with freeze_time('2021-10-30T12:59:59'): + # just before midnight UTC -- not included + create_notification(template=template, to_field='1', status='permanent-failure') + with freeze_time('2021-10-31T00:00:01'): + # just after midnight UTC -- included + create_notification(template=template, to_field='2', status='failed') + # clocks go back to UTC on 31 October at 2am + with freeze_time('2021-10-31T12:00:00'): + # well after midnight -- included + # collect stats for this timestamp + create_notification(template=template, to_field='3', status='created') + stats = dao_fetch_todays_stats_for_service(template.service_id) + + stats = {row.status: row.count for row in stats} + assert 'delivered' not in stats + assert stats['failed'] == 1 + assert stats['created'] == 1 + assert not stats.get('permanent-failure') @pytest.mark.parametrize('created_at, limit_days, rows_returned', [