From e7ec77c5bb344f8d7e335c7c3d7151fc45e87a37 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Mon, 5 Jul 2021 15:54:06 +0100 Subject: [PATCH 1/6] Make calculating overlapping areas faster By using the simplified polygons instead of the full resolutions ones we: - query less data from SQLite - pass less data around - give Shapely a less complicated shape to do its calculations on This makes it faster to calculate how much of each electoral ward a custom area overlaps. For the two areas in our tests: Place represented by custom area | Before | After ---------------------------------|--------|-------- Bristol | 0.07s | 0.02s Skye | 0.02s | 0.01s --- app/broadcast_areas/models.py | 2 +- tests/app/broadcast_areas/test_broadcast_area.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/broadcast_areas/models.py b/app/broadcast_areas/models.py index 4af79ed54..2ce313969 100644 --- a/app/broadcast_areas/models.py +++ b/app/broadcast_areas/models.py @@ -171,7 +171,7 @@ class CustomBroadcastArea(BaseBroadcastArea): @cached_property def count_of_phones(self): return sum( - area.polygons.ratio_of_intersection_with(self.polygons) * area.count_of_phones + area.simple_polygons.ratio_of_intersection_with(self.polygons) * area.count_of_phones for area in self.overlapping_areas ) diff --git a/tests/app/broadcast_areas/test_broadcast_area.py b/tests/app/broadcast_areas/test_broadcast_area.py index 448b76f00..e637318e0 100644 --- a/tests/app/broadcast_areas/test_broadcast_area.py +++ b/tests/app/broadcast_areas/test_broadcast_area.py @@ -377,7 +377,7 @@ def test_estimated_bleed( 'Stoke Bishop', 'Windmill Hill', ], - 73_496, + 73_119, ), ( SKYE, @@ -387,7 +387,7 @@ def test_estimated_bleed( 'Na Hearadh agus Ceann a Deas nan Loch', 'Wester Ross, Strathpeffer and Lochalsh', ], - 3_517, + 3_534, ), )) def test_count_of_phones_for_custom_area( From 775954da9d7bd79053c200c8bd117872ad6cbd99 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Mon, 5 Jul 2021 17:30:45 +0100 Subject: [PATCH 2/6] Avoid doing a single SQL query per overlapping area To count phones in a custom polygon we need to work out the percentage of overlap with each known area. This means we need to get each known area from the database to compare it. At the moment we do this by running: - one SQLite query to get the details of all matching areas - a loop, which performs one SQLite query *per area* to get the polygons This commit reduces the number of SQLite queries to one, which uses a `JOIN` to get both the details of the areas and their polygons. This gives a speed increase of about 25% for a big area like Lincolnshire. --- app/broadcast_areas/models.py | 15 ++++++++++++++- app/broadcast_areas/repo.py | 17 +++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/app/broadcast_areas/models.py b/app/broadcast_areas/models.py index 2ce313969..afb3fe13b 100644 --- a/app/broadcast_areas/models.py +++ b/app/broadcast_areas/models.py @@ -86,6 +86,12 @@ class BroadcastArea(BaseBroadcastArea, SortableMixin): def __init__(self, row): self.id, self.name, self._count_of_phones, self.library_id = row + @classmethod + def from_row_with_simple_polygons(cls, row): + instance = cls(row[:4]) + instance.simple_polygons = Polygons(row[4]) + return instance + @cached_property def polygons(self): return Polygons( @@ -162,7 +168,7 @@ class CustomBroadcastArea(BaseBroadcastArea): def overlapping_areas(self): if not self.polygons: return [] - return broadcast_area_libraries.get_areas([ + return broadcast_area_libraries.get_areas_with_simple_polygons([ overlap.data for overlap in rtree_index.query( Rect(*self.polygons.bounds) ) @@ -229,5 +235,12 @@ class BroadcastAreaLibraries(SerialisedModelCollection, GetItemByIdMixin): areas = BroadcastAreasRepository().get_areas(area_ids) return [BroadcastArea(area) for area in areas] + def get_areas_with_simple_polygons(self, *area_ids): + if len(area_ids) == 1 and isinstance(area_ids[0], list): + area_ids = area_ids[0] + + areas = BroadcastAreasRepository().get_areas_with_simple_polygons(area_ids) + return [BroadcastArea.from_row_with_simple_polygons(area) for area in areas] + broadcast_area_libraries = BroadcastAreaLibraries() diff --git a/app/broadcast_areas/repo.py b/app/broadcast_areas/repo.py index 91014077a..0e4712aba 100644 --- a/app/broadcast_areas/repo.py +++ b/app/broadcast_areas/repo.py @@ -141,6 +141,23 @@ class BroadcastAreasRepository(object): return areas + def get_areas_with_simple_polygons(self, area_ids): + q = """ + SELECT broadcast_areas.id, name, count_of_phones, broadcast_area_library_id, simple_polygons + FROM broadcast_areas + JOIN broadcast_area_polygons on broadcast_area_polygons.id = broadcast_areas.id + WHERE broadcast_areas.id IN ({}) + """.format(("?," * len(area_ids))[:-1]) + + results = self.query(q, *area_ids) + + areas = [ + (row[0], row[1], row[2], row[3], json.loads(row[4])) + for row in results + ] + + return areas + def get_all_areas_for_library(self, library_id): is_multi_tier_library = self.query(""" SELECT exists( From 6c766c24b6f42c8de5997efa3d8099314259c0c9 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Tue, 13 Jul 2021 09:21:58 +0100 Subject: [PATCH 3/6] Ensure that phones are only counted once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The page which shows the count of phones does some logic based on how close the ‘will get’ and ‘likely to get’ numbers are. This means it accesses the `BroadcastMessage.count_of_phones` and `BroadcastMessage.count_of_phones_likely` properties multiple times. These properties are computed fresh every time, and are quite expensive to compute. By caching them in memory we can cut the page load time approximately in half. --- app/models/broadcast_message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/broadcast_message.py b/app/models/broadcast_message.py index b1c0a8ad5..d0928be48 100644 --- a/app/models/broadcast_message.py +++ b/app/models/broadcast_message.py @@ -172,14 +172,14 @@ class BroadcastMessage(JSONModel): def cancelled_by(self): return User.from_id(self.cancelled_by_id) - @property + @cached_property def count_of_phones(self): return round_to_significant_figures( sum(area.count_of_phones for area in self.areas), 1 ) - @property + @cached_property def count_of_phones_likely(self): estimated_area = self.simple_polygons.estimated_area From 5e1b96a3a7b6d67459aa4b6969dbb0858c078171 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Fri, 6 Aug 2021 10:06:26 +0100 Subject: [PATCH 4/6] Remove argument unpacking from `get_areas` Making it only callable in one way is just less stuff to understand. --- app/broadcast_areas/models.py | 11 +----- app/main/views/broadcast.py | 2 +- app/models/broadcast_message.py | 6 +-- .../broadcast_areas/test_broadcast_area.py | 37 ++++++------------- 4 files changed, 17 insertions(+), 39 deletions(-) diff --git a/app/broadcast_areas/models.py b/app/broadcast_areas/models.py index afb3fe13b..4cd7f6d3b 100644 --- a/app/broadcast_areas/models.py +++ b/app/broadcast_areas/models.py @@ -227,18 +227,11 @@ class BroadcastAreaLibraries(SerialisedModelCollection, GetItemByIdMixin): def __init__(self): self.items = BroadcastAreasRepository().get_libraries() - def get_areas(self, *area_ids): - # allow people to call `get_areas('a', 'b') or get_areas(['a', 'b'])` - if len(area_ids) == 1 and isinstance(area_ids[0], list): - area_ids = area_ids[0] - + def get_areas(self, area_ids): areas = BroadcastAreasRepository().get_areas(area_ids) return [BroadcastArea(area) for area in areas] - def get_areas_with_simple_polygons(self, *area_ids): - if len(area_ids) == 1 and isinstance(area_ids[0], list): - area_ids = area_ids[0] - + def get_areas_with_simple_polygons(self, area_ids): areas = BroadcastAreasRepository().get_areas_with_simple_polygons(area_ids) return [BroadcastArea.from_row_with_simple_polygons(area) for area in areas] diff --git a/app/main/views/broadcast.py b/app/main/views/broadcast.py index 1123b5c5b..e11a8fb30 100644 --- a/app/main/views/broadcast.py +++ b/app/main/views/broadcast.py @@ -287,7 +287,7 @@ def choose_broadcast_sub_area(service_id, broadcast_message_id, library_slug, ar broadcast_message_id, service_id=current_service.id, ) - area = BroadcastMessage.libraries.get_areas(area_slug)[0] + area = BroadcastMessage.libraries.get_areas([area_slug])[0] back_link = _get_broadcast_sub_area_back_link(service_id, broadcast_message_id, library_slug) diff --git a/app/models/broadcast_message.py b/app/models/broadcast_message.py index d0928be48..a3ea36499 100644 --- a/app/models/broadcast_message.py +++ b/app/models/broadcast_message.py @@ -18,9 +18,9 @@ from app.notify_client.broadcast_message_api_client import ( broadcast_message_api_client, ) -ESTIMATED_AREA_OF_LARGEST_UK_COUNTY = broadcast_area_libraries.get_areas( +ESTIMATED_AREA_OF_LARGEST_UK_COUNTY = broadcast_area_libraries.get_areas([ 'ctyua19-E10000023' # North Yorkshire -)[0].polygons.estimated_area +])[0].polygons.estimated_area class BroadcastMessage(JSONModel): @@ -203,7 +203,7 @@ class BroadcastMessage(JSONModel): def get_areas(self, areas): return broadcast_area_libraries.get_areas( - *areas + areas ) def get_simple_polygons(self, areas): diff --git a/tests/app/broadcast_areas/test_broadcast_area.py b/tests/app/broadcast_areas/test_broadcast_area.py index e637318e0..4c2af384b 100644 --- a/tests/app/broadcast_areas/test_broadcast_area.py +++ b/tests/app/broadcast_areas/test_broadcast_area.py @@ -77,40 +77,25 @@ def test_loads_areas_from_libraries(id): assert ( broadcast_area_libraries.get('ctry19').get(id) ) == ( - broadcast_area_libraries.get_areas(id)[0] + broadcast_area_libraries.get_areas([id])[0] ) def test_get_names_of_areas(): - areas = broadcast_area_libraries.get_areas( + areas = broadcast_area_libraries.get_areas([ 'ctry19-W92000004', 'lad20-W06000014', 'ctry19-E92000001', - ) + ]) assert [area.name for area in sorted(areas)] == [ 'England', 'Vale of Glamorgan', 'Wales', ] -def test_get_areas_accepts_lists(): - areas_from_list = broadcast_area_libraries.get_areas( - [ - 'ctry19-W92000004', - 'ctry19-E92000001', - ] - ) - areas_from_args = broadcast_area_libraries.get_areas( - 'ctry19-W92000004', - 'ctry19-E92000001', - ) - assert len(areas_from_args) == len(areas_from_list) == 2 - assert areas_from_args == areas_from_list - - def test_has_polygons(): - england = broadcast_area_libraries.get_areas('ctry19-E92000001')[0] - scotland = broadcast_area_libraries.get_areas('ctry19-S92000003')[0] + england = broadcast_area_libraries.get_areas(['ctry19-E92000001'])[0] + scotland = broadcast_area_libraries.get_areas(['ctry19-S92000003'])[0] assert len(england.polygons) == 35 assert len(scotland.polygons) == 195 @@ -130,7 +115,7 @@ def test_polygons_are_enclosed(): def test_lat_long_order(): - england = broadcast_area_libraries.get_areas('ctry19-E92000001')[0] + england = broadcast_area_libraries.get_areas(['ctry19-E92000001'])[0] lat_long = england.polygons.as_coordinate_pairs_lat_long long_lat = england.polygons.as_coordinate_pairs_long_lat @@ -212,13 +197,13 @@ def test_every_area_has_count_of_phones(library): )) def test_count_of_phones_for_all_levels(area_id, area_name, expected_count): - area = broadcast_area_libraries.get_areas(area_id)[0] + area = broadcast_area_libraries.get_areas([area_id])[0] assert area.name == area_name assert area.count_of_phones == expected_count def test_city_of_london_counts_are_not_derived_from_population(): - city_of_london = broadcast_area_libraries.get_areas('lad20-E09000001')[0] + city_of_london = broadcast_area_libraries.get_areas(['lad20-E09000001'])[0] assert city_of_london.name == 'City of London' assert len(city_of_london.sub_areas) == len(CITY_OF_LONDON.WARDS) == 25 @@ -315,7 +300,7 @@ def test_phone_density( area, expected_phones_per_square_mile, ): assert close_enough( - broadcast_area_libraries.get_areas(area)[0].phone_density, + broadcast_area_libraries.get_areas([area])[0].phone_density, expected_phones_per_square_mile, ) @@ -351,11 +336,11 @@ def test_estimated_bleed( area, expected_bleed_in_m, expected_bleed_in_degrees, ): assert close_enough( - broadcast_area_libraries.get_areas(area)[0].estimated_bleed_in_m, + broadcast_area_libraries.get_areas([area])[0].estimated_bleed_in_m, expected_bleed_in_m, ) assert close_enough( - broadcast_area_libraries.get_areas(area)[0].estimated_bleed_in_degrees, + broadcast_area_libraries.get_areas([area])[0].estimated_bleed_in_degrees, expected_bleed_in_degrees, ) From de364bba3c091e2f347b4688d62a3abd2d71a049 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Fri, 6 Aug 2021 10:11:24 +0100 Subject: [PATCH 5/6] Make `overlapping_areas` a cached property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s quite expensive to calculate and there’s no guarantee we’ll only use it once. --- app/broadcast_areas/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/broadcast_areas/models.py b/app/broadcast_areas/models.py index 4cd7f6d3b..b33ee1ee4 100644 --- a/app/broadcast_areas/models.py +++ b/app/broadcast_areas/models.py @@ -164,7 +164,7 @@ class CustomBroadcastArea(BaseBroadcastArea): simple_polygons = polygons - @property + @cached_property def overlapping_areas(self): if not self.polygons: return [] From b273037462220c2fd3b6f64162773efe17cd6d1d Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Fri, 6 Aug 2021 10:14:15 +0100 Subject: [PATCH 6/6] Use str.join to build query This avoids the nasty slice operator to trim the trailing comma. --- app/broadcast_areas/repo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/broadcast_areas/repo.py b/app/broadcast_areas/repo.py index 0e4712aba..b18a46456 100644 --- a/app/broadcast_areas/repo.py +++ b/app/broadcast_areas/repo.py @@ -130,7 +130,7 @@ class BroadcastAreasRepository(object): SELECT id, name, count_of_phones, broadcast_area_library_id FROM broadcast_areas WHERE id IN ({}) - """.format(("?," * len(area_ids))[:-1]) + """.format(",".join("?" * len(area_ids))) results = self.query(q, *area_ids) @@ -147,7 +147,7 @@ class BroadcastAreasRepository(object): FROM broadcast_areas JOIN broadcast_area_polygons on broadcast_area_polygons.id = broadcast_areas.id WHERE broadcast_areas.id IN ({}) - """.format(("?," * len(area_ids))[:-1]) + """.format(",".join("?" * len(area_ids))) results = self.query(q, *area_ids)