Add broadcast area model, loading from GeoJSON

This commit adds a new model class which can be used by any app to
interact with a broadcast area. A broadcast area is one or more polygons
representing geographical areas.

It also adds some models that make browsing collections of these areas
more straightforward. So the hierarchy looks like:

> **BroadcastAreaLibraries*
> Contains multiple libraries of broadcast area
> > **BroadcastAreaLibrary**
> > A collection of geographic areas, all of the same type, for example
> > counties or electoral wards
> > **BroadcastArea**
> > Contains one or more shapes that make up an area, for example
> > England
> > > **BroadcastArea.polygons[n]**
> > > A single shape, for example the Isle of Wight or Lindisfarne
> > > > **BroadcastArea.polygons[n][o]**
> > > > A single coordinate along a polygons

The classes support iteration, so all the areas in a library can be
looped over, for example if `countries` is an instance of
`BroadcastAreaLibrary` you can do:
```python
for country in countries:
    print(country.name)
```

The `BroadcastAreaLibraries` class also provides some useful methods for
quickly getting the polygons for an area or areas, for example to
render them on a map. So if `libraries` is an instance of
`BroadcastAreaLibraries` you can do:
```python
libraries.get_polygons_for_areas_long_lat('england', 'wales')
```

This will give polygons for the Welsh mainland, the Isle of Wight,
Anglesey, etc.

The models load data from GeoJSON files, which is an open standard for
serialising geographic data. I’ve added a few example files taken from
http://geoportal.statistics.gov.uk to show how it works.
This commit is contained in:
Chris Hill-Scott
2020-07-06 10:53:40 +01:00
committed by Toby Lorne
parent 3573ce1437
commit 078f1dd8d3
5 changed files with 315 additions and 0 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,168 @@
import itertools
from contextlib import suppress
from pathlib import Path
from functools import lru_cache
import geojson
from notifications_utils.formatters import formatted_list
from notifications_utils.serialised_model import SerialisedModelCollection
from notifications_utils.safe_string import make_string_safe_for_id
@lru_cache(maxsize=128)
def load_geojson_file(filename):
path = Path(__file__).resolve().parent / filename
geojson_data = geojson.loads(path.read_text())
if not isinstance(geojson_data, geojson.GeoJSON) or not geojson_data.is_valid:
raise ValueError(
f'Contents of {path} are not valid GeoJSON'
)
return path.stem, geojson_data
class IdFromNameMixin:
@property
def id(self):
return make_string_safe_for_id(self.name)
def __repr__(self):
return f'{self.__class__.__name__}(<{self.id}>)'
def __lt__(self, other):
# Implementing __lt__ means any classes inheriting from this
# method are sortable
return self.id < other.id
class GetItemByIdMixin:
def get(self, id):
for item in self:
if item.id == id:
return item
raise KeyError(id)
class BroadcastArea(IdFromNameMixin):
def __init__(self, feature):
self.feature = feature
for coordinates in self.polygons:
if coordinates[0] != coordinates[-1]:
# The CAP XML format requires shapes to be closed
raise ValueError(
f'Area {self.name} is not a closed shape '
f'({coordinates[0]}, {coordinates[-1]})'
)
def __eq__(self, other):
return self.id == other.id
@property
def name(self):
for possible_name_key in {
'rgn18nm', 'ctyua16nm', 'ctry19nm',
}:
with suppress(KeyError):
return self.feature['properties'][possible_name_key]
raise KeyError(f'No name found in {self.feature["properties"]}')
@property
def polygons(self):
if self.feature['geometry']['type'] == 'MultiPolygon':
return [
polygons[0]
for polygons in self.feature['geometry']['coordinates']
]
if self.feature['geometry']['type'] == 'Polygon':
return [
self.feature['geometry']['coordinates'][0]
]
raise TypeError(
f'Unknown geometry type {self.feature["geometry"]["type"]} '
f'in {self.__class__.__name} {self.name}'
)
@property
def unenclosed_polygons(self):
# Some mapping tools require shapes to be unenclosed, i.e. the
# last point joins the first point implicitly
return [
coordinates[:-1] for coordinates in self.polygons
]
class BroadcastAreaLibrary(SerialisedModelCollection, IdFromNameMixin, GetItemByIdMixin):
model = BroadcastArea
def __init__(self, filename):
self.name, geojson_data = load_geojson_file(filename)
self.items = geojson_data['features']
def get_examples(self, max_displayed=4):
truncate_at = max_displayed - 1
names = [area.name for area in sorted(self)]
count_of_excess_names = len(names) - truncate_at
if count_of_excess_names > 1:
names = names[:truncate_at] + [f'{count_of_excess_names} more…']
return formatted_list(names, before_each='', after_each='')
class BroadcastAreaLibraries(SerialisedModelCollection, GetItemByIdMixin):
model = BroadcastAreaLibrary
def __init__(self):
self.items = list(
Path(__file__).resolve().parent.glob('*.geojson')
)
self.all_areas = list(self.get_all_areas())
seen_area_ids = set()
for area_id in (area.id for area in self.all_areas):
if area_id in seen_area_ids:
raise ValueError(
f'{area_id} found more than once in '
f'{self.__class__.__name__}'
)
seen_area_ids.add(area_id)
def get_all_areas(self):
for library in self:
for area in library:
yield area
def get_areas(self, *area_ids):
return list(itertools.chain(*(
[area for area in self.all_areas if area.id == area_id]
for area_id in area_ids
)))
def get_polygons_for_areas_long_lat(self, *area_ids):
return list(itertools.chain(*(
area.polygons
for area in self.get_areas(*area_ids)
)))
def get_polygons_for_areas_lat_long(self, *area_ids):
return [
[[long, lat] for lat, long in polygon]
for polygon in self.get_polygons_for_areas_long_lat(*area_ids)
]
broadcast_area_libraries = BroadcastAreaLibraries()

View File

@@ -0,0 +1,144 @@
import pytest
from json import JSONDecodeError
from unittest import mock
from notifications_utils.broadcast_areas import (
BroadcastAreaLibraries,
broadcast_area_libraries,
load_geojson_file,
)
def test_loads_libraries():
assert [
(library.id, library.name) for library in sorted(broadcast_area_libraries)
] == [
(
'counties-and-unitary-authorities-in-england-and-wales',
'Counties and Unitary Authorities in England and Wales'),
(
'countries',
'Countries',
),
(
'regions-of-england',
'Regions of England',
),
]
@pytest.mark.xfail(raises=JSONDecodeError)
def test_raises_for_invalid_json():
load_geojson_file.cache_clear()
with mock.patch(
'notifications_utils.broadcast_areas.Path.read_text',
return_value='foo',
):
BroadcastAreaLibraries()
@pytest.mark.xfail(raises=ValueError)
def test_raises_for_invalid_geojson():
load_geojson_file.cache_clear()
with mock.patch(
'notifications_utils.broadcast_areas.Path.read_text',
return_value='{"a": 1}',
):
BroadcastAreaLibraries()
def test_loads_areas_from_library():
assert [
(area.id, area.name) for area in sorted(
broadcast_area_libraries.get('countries')
)
] == [
('england', 'England'),
('northern-ireland', 'Northern Ireland'),
('scotland', 'Scotland'),
('wales', 'Wales'),
]
def test_examples():
assert (
broadcast_area_libraries.get('countries').get_examples()
) == (
'England, Northern Ireland, Scotland and Wales'
)
assert (
broadcast_area_libraries.get('regions-of-england').get_examples()
) == (
'East Midlands, East of England, London and 6 more…'
)
@pytest.mark.parametrize('id', (
'england',
'northern-ireland',
'scotland',
'wales',
pytest.param('mercia', marks=pytest.mark.xfail(raises=KeyError)),
))
def test_loads_areas_from_libraries(id):
assert (
broadcast_area_libraries.get('countries').get(id)
) == (
broadcast_area_libraries.get_areas(id)[0]
)
def test_get_names_of_areas():
areas = broadcast_area_libraries.get_areas(
'wales', 'vale-of-glamorgan', 'england', 'essex',
)
assert [area.name for area in areas] == [
'Wales', 'Vale of Glamorgan', 'England', 'Essex',
]
assert [area.name for area in sorted(areas)] == [
'England', 'Essex', 'Vale of Glamorgan', 'Wales',
]
def test_has_polygons():
assert len(
broadcast_area_libraries.get_polygons_for_areas_long_lat('england')
) == 35
assert len(
broadcast_area_libraries.get_polygons_for_areas_long_lat('scotland')
) == 195
assert len(
broadcast_area_libraries.get_polygons_for_areas_long_lat('england', 'scotland')
) == 35 + 195 == 230
assert broadcast_area_libraries.get_polygons_for_areas_lat_long('england')[0][0] == [
55.811085, -2.034358 # https://goo.gl/maps/wsf2LUWzYinwydMk8
]
def test_polygons_are_enclosed_unless_asked_not_to_be():
england = broadcast_area_libraries.get('countries').get('england')
assert len(england.polygons) == len(england.unenclosed_polygons)
first_polygon = england.polygons[0]
assert first_polygon[0] != first_polygon[1] != first_polygon[2]
assert first_polygon[0] == first_polygon[-1]
first_polygon_unenclosed = england.unenclosed_polygons[0]
assert first_polygon_unenclosed[0] == first_polygon[0]
assert first_polygon_unenclosed[-1] != first_polygon[-1]
assert first_polygon_unenclosed[-1] == first_polygon[-2]
def test_lat_long_order():
lat_long = broadcast_area_libraries.get_polygons_for_areas_lat_long('england')
long_lat = broadcast_area_libraries.get_polygons_for_areas_long_lat('england')
assert len(lat_long[0]) == len(long_lat[0]) == 2082 # Coordinates in polygon
assert len(lat_long[0][0]) == len(long_lat[0][0]) == 2 # Axes in coordinates
assert lat_long[0][0] == list(reversed(long_lat[0][0]))