From d1561903ed1d2b9a60cb243eacfe835b4aecb8d9 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Wed, 25 Oct 2017 10:40:02 +0100 Subject: [PATCH 1/3] [WIP] Removing marshmallow from the get_services endpoint should give us better performance. --- app/dao/services_dao.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/app/dao/services_dao.py b/app/dao/services_dao.py index e6e677adc..05ef0a3b8 100644 --- a/app/dao/services_dao.py +++ b/app/dao/services_dao.py @@ -371,7 +371,7 @@ def dao_fetch_monthly_historical_stats_for_service(service_id, year): @statsd(namespace='dao') def dao_fetch_todays_stats_for_all_services(include_from_test_key=True): - query = db.session.query( + subquery = db.session.query( Notification.notification_type, Notification.status, Notification.service_id, @@ -382,12 +382,25 @@ def dao_fetch_todays_stats_for_all_services(include_from_test_key=True): Notification.notification_type, Notification.status, Notification.service_id - ).order_by( - Notification.service_id ) if not include_from_test_key: - query = query.filter(Notification.key_type != KEY_TYPE_TEST) + subquery = subquery.filter(Notification.key_type != KEY_TYPE_TEST) + + subquery = subquery.subquery() + + query = db.session.query( + Service.id.label('service_id'), + Service.name, + Service.restricted, + Service.research_mode, + subquery.c.notification_type, + subquery.c.status, + subquery.c.count + ).join( + subquery, + subquery.c.service_id == Service.id + ).order_by(Service.id) return query.all() From 61a68a2d13bad287aad968a6d2962e950842c031 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Wed, 25 Oct 2017 16:47:39 +0100 Subject: [PATCH 2/3] [WIP] --- app/dao/services_dao.py | 22 ++++++++++++++++------ app/service/rest.py | 9 ++++++--- tests/app/dao/test_services_dao.py | 18 ++++++++++-------- tests/app/service/test_rest.py | 3 ++- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/app/dao/services_dao.py b/app/dao/services_dao.py index 05ef0a3b8..7dc0a1158 100644 --- a/app/dao/services_dao.py +++ b/app/dao/services_dao.py @@ -413,8 +413,7 @@ def fetch_stats_by_date_range_for_all_services(start_date, end_date, include_fro if start_date >= datetime.utcnow() - timedelta(days=7): table = Notification - - query = db.session.query( + subquery = db.session.query( table.notification_type, table.status, table.service_id, @@ -426,12 +425,23 @@ def fetch_stats_by_date_range_for_all_services(start_date, end_date, include_fro table.notification_type, table.status, table.service_id - ).order_by( - table.service_id ) - if not include_from_test_key: - query = query.filter(table.key_type != KEY_TYPE_TEST) + subquery = subquery.filter(table.key_type != KEY_TYPE_TEST) + subquery = subquery.subquery() + + query = db.session.query( + Service.id.label('service_id'), + Service.name, + Service.restricted, + Service.research_mode, + subquery.c.notification_type, + subquery.c.status, + subquery.c.count + ).join( + subquery, + subquery.c.service_id == Service.id + ).order_by(Service.id) return query.all() diff --git a/app/service/rest.py b/app/service/rest.py index 3d2dc2827..b3c865166 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -409,7 +409,7 @@ def get_detailed_service(service_id, today_only=False): def get_detailed_services(start_date, end_date, only_active=False, include_from_test_key=True): - services = {service.id: service for service in dao_fetch_all_services(only_active)} + services_old = {service.id: service for service in dao_fetch_all_services(only_active)} if start_date == datetime.utcnow().date(): stats = dao_fetch_todays_stats_for_all_services(include_from_test_key=include_from_test_key) else: @@ -417,15 +417,18 @@ def get_detailed_services(start_date, end_date, only_active=False, include_from_ stats = fetch_stats_by_date_range_for_all_services(start_date=start_date, end_date=end_date, include_from_test_key=include_from_test_key) - + services = {service.service_id: service for service in stats} for service_id, rows in itertools.groupby(stats, lambda x: x.service_id): services[service_id].statistics = statistics.format_statistics(rows) + # for service_id, rows in itertools.groupby(stats, lambda x: x.service_id): + # services[service_id].statistics = statistics.format_statistics(rows) + # if service has not sent anything, query will not have set statistics correctly for service in services.values(): if not hasattr(service, 'statistics'): service.statistics = statistics.create_zeroed_stats_dicts() - return detailed_service_schema.dump(services.values(), many=True).data + return services @service_blueprint.route('//whitelist', methods=['GET']) diff --git a/tests/app/dao/test_services_dao.py b/tests/app/dao/test_services_dao.py index 299c72777..a122cda30 100644 --- a/tests/app/dao/test_services_dao.py +++ b/tests/app/dao/test_services_dao.py @@ -711,12 +711,11 @@ def test_dao_fetch_todays_stats_for_all_services_groups_correctly(notify_db, not create_notification(notify_db, notify_db_session, service=service2) stats = dao_fetch_todays_stats_for_all_services() - assert len(stats) == 4 - assert ('sms', 'created', service1.id, 2) in stats - assert ('sms', 'failed', service1.id, 1) in stats - assert ('email', 'created', service1.id, 1) in stats - assert ('sms', 'created', service2.id, 1) in stats + assert (service1.id, service1.name, service1.restricted, service1.research_mode, 'sms', 'created', 2) in stats + assert (service1.id, service1.name, service1.restricted, service1.research_mode, 'sms', 'failed', 1) in stats + assert (service1.id, service1.name, service1.restricted, service1.research_mode, 'email', 'created', 1) in stats + assert (service2.id, service2.name, service2.restricted, service2.research_mode, 'sms', 'created', 1) in stats def test_dao_fetch_todays_stats_for_all_services_includes_all_keys_by_default(notify_db, notify_db_session): @@ -754,7 +753,8 @@ def test_fetch_stats_by_date_range_for_all_services(notify_db, notify_db_session results = fetch_stats_by_date_range_for_all_services(start_date, end_date) assert len(results) == 1 - assert results[0] == ('sms', 'created', result_one.service_id, 2) + assert results[0] == (result_one.service.id, result_one.service.name, result_one.service.restricted, + result_one.service.research_mode, 'sms', 'created', 2) @freeze_time('2001-01-01T23:59:00') @@ -793,7 +793,8 @@ def test_fetch_stats_by_date_range_for_all_services_returns_test_notifications(n results = fetch_stats_by_date_range_for_all_services(start_date, end_date, include_from_test_key=True) assert len(results) == 1 - assert results[0] == ('sms', 'created', result_one.service_id, int(expected)) + assert results[0] == (result_one.service.id, result_one.service.name, result_one.service.restricted, + result_one.service.research_mode, 'sms', 'created', int(expected)) @pytest.mark.parametrize("start_delta, end_delta, expected", @@ -820,7 +821,8 @@ def test_fetch_stats_by_date_range_during_bst_hour_for_all_services_returns_test results = fetch_stats_by_date_range_for_all_services(start_date, end_date, include_from_test_key=True) assert len(results) == 1 - assert results[0] == ('sms', 'created', result_one.service_id, int(expected)) + assert results[0] == (result_one.service.id, result_one.service.name, result_one.service.restricted, + result_one.service.research_mode, 'sms', 'created', int(expected)) @freeze_time('2001-01-01T23:59:00') diff --git a/tests/app/service/test_rest.py b/tests/app/service/test_rest.py index 3e1483d1a..909f0be6e 100644 --- a/tests/app/service/test_rest.py +++ b/tests/app/service/test_rest.py @@ -1753,7 +1753,8 @@ def test_get_detailed_services_only_includes_todays_notifications(notify_db, not @pytest.mark.parametrize( 'set_time', - ['2017-03-28T12:00:00', '2017-01-28T12:00:00', '2017-01-02T12:00:00', '2017-10-31T12:00:00'] + ['2017-03-28T12:00:00'] + #, '2017-01-28T12:00:00', '2017-01-02T12:00:00', '2017-10-31T12:00:00'] ) def test_get_detailed_services_for_date_range(notify_db, notify_db_session, set_time): from app.service.rest import get_detailed_services From 1998034b52e39c9e1bafe037458b6289ca96732d Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Thu, 26 Oct 2017 12:15:52 +0100 Subject: [PATCH 3/3] Refactored the get_detailed_services to stop using marshmallow. Also removed an extra query to services. The query has been refactored to use an outer join to services on the notifications or notification_history table. The expectation is that this change will improve the performance of the trial/live-services pages for platform admins. --- app/dao/services_dao.py | 17 +++++++++++---- app/service/rest.py | 35 ++++++++++++++++++------------ tests/app/dao/test_services_dao.py | 23 +++++++++++++------- tests/app/service/test_rest.py | 3 +-- 4 files changed, 50 insertions(+), 28 deletions(-) diff --git a/app/dao/services_dao.py b/app/dao/services_dao.py index 7dc0a1158..f9347b6b7 100644 --- a/app/dao/services_dao.py +++ b/app/dao/services_dao.py @@ -369,7 +369,7 @@ def dao_fetch_monthly_historical_stats_for_service(service_id, year): @statsd(namespace='dao') -def dao_fetch_todays_stats_for_all_services(include_from_test_key=True): +def dao_fetch_todays_stats_for_all_services(include_from_test_key=True, only_active=True): subquery = db.session.query( Notification.notification_type, @@ -394,19 +394,24 @@ def dao_fetch_todays_stats_for_all_services(include_from_test_key=True): Service.name, Service.restricted, Service.research_mode, + Service.active, + Service.created_at, subquery.c.notification_type, subquery.c.status, subquery.c.count - ).join( + ).outerjoin( subquery, subquery.c.service_id == Service.id ).order_by(Service.id) + if only_active: + query = query.filter(Service.active) + return query.all() @statsd(namespace='dao') -def fetch_stats_by_date_range_for_all_services(start_date, end_date, include_from_test_key=True): +def fetch_stats_by_date_range_for_all_services(start_date, end_date, include_from_test_key=True, only_active=True): start_date = get_london_midnight_in_utc(start_date) end_date = get_london_midnight_in_utc(end_date + timedelta(days=1)) table = NotificationHistory @@ -435,13 +440,17 @@ def fetch_stats_by_date_range_for_all_services(start_date, end_date, include_fro Service.name, Service.restricted, Service.research_mode, + Service.active, + Service.created_at, subquery.c.notification_type, subquery.c.status, subquery.c.count - ).join( + ).outerjoin( subquery, subquery.c.service_id == Service.id ).order_by(Service.id) + if only_active: + query = query.filter(Service.active) return query.all() diff --git a/app/service/rest.py b/app/service/rest.py index b3c865166..522ad30d9 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -409,26 +409,33 @@ def get_detailed_service(service_id, today_only=False): def get_detailed_services(start_date, end_date, only_active=False, include_from_test_key=True): - services_old = {service.id: service for service in dao_fetch_all_services(only_active)} if start_date == datetime.utcnow().date(): - stats = dao_fetch_todays_stats_for_all_services(include_from_test_key=include_from_test_key) + stats = dao_fetch_todays_stats_for_all_services(include_from_test_key=include_from_test_key, + only_active=only_active) else: stats = fetch_stats_by_date_range_for_all_services(start_date=start_date, end_date=end_date, - include_from_test_key=include_from_test_key) - services = {service.service_id: service for service in stats} + include_from_test_key=include_from_test_key, + only_active=only_active) + results = [] for service_id, rows in itertools.groupby(stats, lambda x: x.service_id): - services[service_id].statistics = statistics.format_statistics(rows) - - # for service_id, rows in itertools.groupby(stats, lambda x: x.service_id): - # services[service_id].statistics = statistics.format_statistics(rows) - - # if service has not sent anything, query will not have set statistics correctly - for service in services.values(): - if not hasattr(service, 'statistics'): - service.statistics = statistics.create_zeroed_stats_dicts() - return services + rows = list(rows) + if rows[0].count is None: + s = statistics.create_zeroed_stats_dicts() + else: + s = statistics.format_statistics(rows) + results.append({ + 'id': str(rows[0].service_id), + 'name': rows[0].name, + 'notification_type': rows[0].notification_type, + 'research_mode': rows[0].research_mode, + 'restricted': rows[0].restricted, + 'active': rows[0].active, + 'created_at': rows[0].created_at, + 'statistics': s + }) + return results @service_blueprint.route('//whitelist', methods=['GET']) diff --git a/tests/app/dao/test_services_dao.py b/tests/app/dao/test_services_dao.py index a122cda30..cbfbe9c45 100644 --- a/tests/app/dao/test_services_dao.py +++ b/tests/app/dao/test_services_dao.py @@ -682,7 +682,7 @@ def test_dao_fetch_todays_stats_for_all_services_includes_all_services(notify_db assert stats == sorted(stats, key=lambda x: x.service_id) -def test_dao_fetch_todays_stats_for_all_services_only_includes_today(notify_db): +def test_dao_fetch_todays_stats_for_all_services_only_includes_today(notify_db, notify_db_session): with freeze_time('2001-01-01T23:59:00'): just_before_midnight_yesterday = create_notification(notify_db, None, to_field='1', status='delivered') @@ -712,10 +712,14 @@ def test_dao_fetch_todays_stats_for_all_services_groups_correctly(notify_db, not stats = dao_fetch_todays_stats_for_all_services() assert len(stats) == 4 - assert (service1.id, service1.name, service1.restricted, service1.research_mode, 'sms', 'created', 2) in stats - assert (service1.id, service1.name, service1.restricted, service1.research_mode, 'sms', 'failed', 1) in stats - assert (service1.id, service1.name, service1.restricted, service1.research_mode, 'email', 'created', 1) in stats - assert (service2.id, service2.name, service2.restricted, service2.research_mode, 'sms', 'created', 1) in stats + assert (service1.id, service1.name, service1.restricted, service1.research_mode, service1.active, + service1.created_at, 'sms', 'created', 2) in stats + assert (service1.id, service1.name, service1.restricted, service1.research_mode, service1.active, + service1.created_at, 'sms', 'failed', 1) in stats + assert (service1.id, service1.name, service1.restricted, service1.research_mode, service1.active, + service1.created_at, 'email', 'created', 1) in stats + assert (service2.id, service2.name, service2.restricted, service2.research_mode, service2.active, + service2.created_at, 'sms', 'created', 1) in stats def test_dao_fetch_todays_stats_for_all_services_includes_all_keys_by_default(notify_db, notify_db_session): @@ -754,7 +758,8 @@ def test_fetch_stats_by_date_range_for_all_services(notify_db, notify_db_session assert len(results) == 1 assert results[0] == (result_one.service.id, result_one.service.name, result_one.service.restricted, - result_one.service.research_mode, 'sms', 'created', 2) + result_one.service.research_mode, result_one.service.active, + result_one.service.created_at, 'sms', 'created', 2) @freeze_time('2001-01-01T23:59:00') @@ -794,7 +799,8 @@ def test_fetch_stats_by_date_range_for_all_services_returns_test_notifications(n assert len(results) == 1 assert results[0] == (result_one.service.id, result_one.service.name, result_one.service.restricted, - result_one.service.research_mode, 'sms', 'created', int(expected)) + result_one.service.research_mode, result_one.service.active, result_one.service.created_at, + 'sms', 'created', int(expected)) @pytest.mark.parametrize("start_delta, end_delta, expected", @@ -822,7 +828,8 @@ def test_fetch_stats_by_date_range_during_bst_hour_for_all_services_returns_test assert len(results) == 1 assert results[0] == (result_one.service.id, result_one.service.name, result_one.service.restricted, - result_one.service.research_mode, 'sms', 'created', int(expected)) + result_one.service.research_mode, result_one.service.active, result_one.service.created_at, + 'sms', 'created', int(expected)) @freeze_time('2001-01-01T23:59:00') diff --git a/tests/app/service/test_rest.py b/tests/app/service/test_rest.py index 909f0be6e..3e1483d1a 100644 --- a/tests/app/service/test_rest.py +++ b/tests/app/service/test_rest.py @@ -1753,8 +1753,7 @@ def test_get_detailed_services_only_includes_todays_notifications(notify_db, not @pytest.mark.parametrize( 'set_time', - ['2017-03-28T12:00:00'] - #, '2017-01-28T12:00:00', '2017-01-02T12:00:00', '2017-10-31T12:00:00'] + ['2017-03-28T12:00:00', '2017-01-28T12:00:00', '2017-01-02T12:00:00', '2017-10-31T12:00:00'] ) def test_get_detailed_services_for_date_range(notify_db, notify_db_session, set_time): from app.service.rest import get_detailed_services