From a62e63fcef32b8140d33bfe29695f52354a6a536 Mon Sep 17 00:00:00 2001 From: David McDonald Date: Thu, 2 Dec 2021 09:37:50 +0000 Subject: [PATCH 1/4] Add tests for existing pagination behaviour No functionality change, just documenting what already exists --- tests/app/service/test_rest.py | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/app/service/test_rest.py b/tests/app/service/test_rest.py index 9d0c64696..b63b27d66 100644 --- a/tests/app/service/test_rest.py +++ b/tests/app/service/test_rest.py @@ -1883,6 +1883,51 @@ def test_get_notifications_for_service_without_page_count( assert len(resp['notifications']) == 1 assert resp['total'] is None assert resp['notifications'][0]['id'] == str(without_job.id) + assert 'prev' not in resp['links'] + assert 'next' not in resp['links'] + assert 'last' not in resp['links'] + + +def test_get_notifications_for_service_pagination_links( + admin_request, + sample_job, + sample_template, + sample_user, +): + for _ in range(101): + create_notification(sample_template, to_field='+447700900855', normalised_to='447700900855') + + resp = admin_request.get( + 'service.get_all_notifications_for_service', + service_id=sample_template.service_id + ) + + assert resp['total'] == 101 + assert 'prev' not in resp['links'] + assert '?page=2' in resp['links']['next'] + assert '?page=3' in resp['links']['last'] + + resp = admin_request.get( + 'service.get_all_notifications_for_service', + service_id=sample_template.service_id, + page=2 + ) + + assert resp['total'] == 101 + assert '?page=1' in resp['links']['prev'] + assert '?page=3' in resp['links']['next'] + assert '?page=3' in resp['links']['last'] + + resp = admin_request.get( + 'service.get_all_notifications_for_service', + service_id=sample_template.service_id, + page=3 + ) + + assert resp['total'] == 101 + assert '?page=2' in resp['links']['prev'] + assert 'next' not in resp['links'] + assert 'last' not in resp['links'] @pytest.mark.parametrize('should_prefix', [ @@ -2229,6 +2274,7 @@ def test_search_for_notification_by_to_field_returns_next_link_if_more_than_50( assert len(response_json['notifications']) == 50 assert 'prev' not in response_json['links'] assert 'page=2' in response_json['links']['next'] + assert 'page=2' in response_json['links']['last'] def test_search_for_notification_by_to_field_for_letter( From 989ef9c21a2dc65bc607b6dcf3cabd8fcb3de54a Mon Sep 17 00:00:00 2001 From: David McDonald Date: Thu, 2 Dec 2021 09:44:43 +0000 Subject: [PATCH 2/4] Remove `last` and `total` keys from pagination links These don't appear to be used anywhere in the admin app and this route is only used by the admin app. Therefore it is safe to remove them. We remove them because the calculate the total number of notifications or the final page number of results can be particularly slow for services with many many notifications, for example 100 seconds for a service with 500k notifications sent in the past 7 days. Given neither are being used, this will give us the potential in the next commit to reduce the number of slow queries and improve page load times. Note, I've kept the scope small by only introducing the new pagination function for this one endpoint but there could be scope in future to get all pagination using the next function if appropriate. --- app/service/rest.py | 17 ++++++++++++++--- tests/app/service/test_rest.py | 8 -------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/app/service/rest.py b/app/service/rest.py index a22373b02..4acd04bb3 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -1,7 +1,7 @@ import itertools from datetime import datetime -from flask import Blueprint, current_app, jsonify, request +from flask import Blueprint, current_app, jsonify, request, url_for from notifications_utils.letter_timings import ( letter_can_be_cancelled, too_late_to_cancel_letter, @@ -441,11 +441,22 @@ def get_all_notifications_for_service(service_id): notifications = [notification.serialize_for_csv() for notification in pagination.items] else: notifications = notification_with_template_schema.dump(pagination.items, many=True).data + + + def get_prev_next_pagination_links(pagination, endpoint, **kwargs): + if 'page' in kwargs: + kwargs.pop('page', None) + links = {} + if pagination.has_prev: + links['prev'] = url_for(endpoint, page=pagination.prev_num, **kwargs) + if pagination.has_next: + links['next'] = url_for(endpoint, page=pagination.next_num, **kwargs) + return links + return jsonify( notifications=notifications, page_size=page_size, - total=pagination.total, - links=pagination_links( + links=get_prev_next_pagination_links( pagination, '.get_all_notifications_for_service', **kwargs diff --git a/tests/app/service/test_rest.py b/tests/app/service/test_rest.py index b63b27d66..020348918 100644 --- a/tests/app/service/test_rest.py +++ b/tests/app/service/test_rest.py @@ -1881,11 +1881,9 @@ def test_get_notifications_for_service_without_page_count( count_pages=False ) assert len(resp['notifications']) == 1 - assert resp['total'] is None assert resp['notifications'][0]['id'] == str(without_job.id) assert 'prev' not in resp['links'] assert 'next' not in resp['links'] - assert 'last' not in resp['links'] def test_get_notifications_for_service_pagination_links( @@ -1902,10 +1900,8 @@ def test_get_notifications_for_service_pagination_links( service_id=sample_template.service_id ) - assert resp['total'] == 101 assert 'prev' not in resp['links'] assert '?page=2' in resp['links']['next'] - assert '?page=3' in resp['links']['last'] resp = admin_request.get( 'service.get_all_notifications_for_service', @@ -1913,10 +1909,8 @@ def test_get_notifications_for_service_pagination_links( page=2 ) - assert resp['total'] == 101 assert '?page=1' in resp['links']['prev'] assert '?page=3' in resp['links']['next'] - assert '?page=3' in resp['links']['last'] resp = admin_request.get( 'service.get_all_notifications_for_service', @@ -1924,10 +1918,8 @@ def test_get_notifications_for_service_pagination_links( page=3 ) - assert resp['total'] == 101 assert '?page=2' in resp['links']['prev'] assert 'next' not in resp['links'] - assert 'last' not in resp['links'] @pytest.mark.parametrize('should_prefix', [ From c68d1a2f2391bf712cfe630f9e753dc51048d395 Mon Sep 17 00:00:00 2001 From: David McDonald Date: Fri, 3 Dec 2021 17:07:03 +0000 Subject: [PATCH 3/4] Optimise queries run for creating pagination links We have been running in to the problem in https://github.com/pallets/flask-sqlalchemy/issues/518 where our page loads very slow when viewing a single page of notifications for a service in the admin app. Tracing this back and using SQL explain analyze I can see that getting the notifications takes about a second but the second query to count how many notifications there are (to work out if there is a next page of pagination) can take up to 100 seconds. As suggested in that issue, we do the pagination ourselves. Our pagination doesn't need us to know exactly how many notifications there are, just whether there are any on the next page and that can be done without running the slow query to count how many notifications in total by using `count_pages=False`. --- app/dao/notifications_dao.py | 6 ++++-- app/service/rest.py | 37 ++++++++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 0b4ba58dc..9fa26ce2a 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -241,7 +241,8 @@ def get_notifications_for_service( include_from_test_key=False, older_than=None, client_reference=None, - include_one_off=True + include_one_off=True, + error_out=True ): if page_size is None: page_size = current_app.config['PAGE_SIZE'] @@ -280,7 +281,8 @@ def get_notifications_for_service( return query.order_by(desc(Notification.created_at)).paginate( page=page, per_page=page_size, - count=count_pages + count=count_pages, + error_out=error_out, ) diff --git a/app/service/rest.py b/app/service/rest.py index 4acd04bb3..6d49b583e 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -420,6 +420,8 @@ def get_all_notifications_for_service(service_id): include_from_test_key = data.get('include_from_test_key', False) include_one_off = data.get('include_one_off', True) + # count_pages is not being used for whether to count the number of pages, but instead as a flag + # for whether to show pagination links count_pages = data.get('count_pages', True) pagination = notifications_dao.get_notifications_for_service( @@ -427,7 +429,7 @@ def get_all_notifications_for_service(service_id): filter_dict=data, page=page, page_size=page_size, - count_pages=count_pages, + count_pages=False, limit_days=limit_days, include_jobs=include_jobs, include_from_test_key=include_from_test_key, @@ -442,25 +444,44 @@ def get_all_notifications_for_service(service_id): else: notifications = notification_with_template_schema.dump(pagination.items, many=True).data + # We try and get the next page of results to work out if we need provide a pagination link to the next page + # in our response if it exists. Note, this could be done instead by changing `count_pages` in the previous + # call to be True which will enable us to use Flask-Sqlalchemy to tell if there is a next page of results but + # this way is much more performant for services with many results (unlike Flask SqlAlchemy, this approach + # doesn't do an additional query to count all the results of which there could be millions but instead only + # asks for a single extra page of results). + next_page_of_pagination = notifications_dao.get_notifications_for_service( + service_id, + filter_dict=data, + page=page + 1, + page_size=page_size, + count_pages=False, + limit_days=limit_days, + include_jobs=include_jobs, + include_from_test_key=include_from_test_key, + include_one_off=include_one_off, + error_out=False # False so that if there are no results, it doesn't end in aborting with a 404 + ) - def get_prev_next_pagination_links(pagination, endpoint, **kwargs): + def get_prev_next_pagination_links(current_page, next_page_exists, endpoint, **kwargs): if 'page' in kwargs: kwargs.pop('page', None) links = {} - if pagination.has_prev: - links['prev'] = url_for(endpoint, page=pagination.prev_num, **kwargs) - if pagination.has_next: - links['next'] = url_for(endpoint, page=pagination.next_num, **kwargs) + if page > 1: + links['prev'] = url_for(endpoint, page=page - 1, **kwargs) + if next_page_exists: + links['next'] = url_for(endpoint, page=page + 1, **kwargs) return links return jsonify( notifications=notifications, page_size=page_size, links=get_prev_next_pagination_links( - pagination, + page, + len(next_page_of_pagination.items), '.get_all_notifications_for_service', **kwargs - ) + ) if count_pages else {} ), 200 From e8dd136678f1a98854183b969da8a10bad4545e3 Mon Sep 17 00:00:00 2001 From: David McDonald Date: Fri, 3 Dec 2021 17:30:12 +0000 Subject: [PATCH 4/4] Document area that may be doing pagination links when not needed --- app/service/rest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/service/rest.py b/app/service/rest.py index 6d49b583e..89a47cb63 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -542,6 +542,9 @@ def search_for_notification_by_to_field(service_id, search_term, statuses, notif ) return jsonify( notifications=notification_with_template_schema.dump(results.items, many=True).data, + # TODO: this may be a bug to include the pagination links as currently `search_for_notification_by_to_field` + # hardcodes the pages of results to always be the first page so not sure what benefit is to show a link to + # page 2 which would have the page parameter ignored links=pagination_links( results, '.get_all_notifications_for_service',