Files
notifications-admin/app/broadcast_areas/create-broadcast-areas-db.py
2021-04-13 16:31:06 +03:00

348 lines
11 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
import csv
import pickle
import sys
from pathlib import Path
import geojson
from notifications_utils.formatters import formatted_list
from notifications_utils.polygons import Polygons
from populations import (
BRYHER,
CITY_OF_LONDON,
MEDIAN_AGE_RANGE_UK,
MEDIAN_AGE_UK,
SMARTPHONE_OWNERSHIP_BY_AGE_RANGE,
estimate_number_of_smartphones_for_population,
)
from repo import BroadcastAreasRepository, rtree_index_path
from rtreelib import Rect, RTree
source_files_path = Path(__file__).resolve().parent / 'source_files'
point_counts = []
rtree_index = RTree()
def simplify_geometry(feature):
if feature["type"] == "Polygon":
return [feature["coordinates"][0]]
elif feature["type"] == "MultiPolygon":
return [polygon for polygon, *_holes in feature["coordinates"]]
else:
raise Exception("Unknown type: {}".format(feature["type"]))
def polygons_and_simplified_polygons(feature):
if keep_old_polygons:
# cheat and shortcut out
return [], []
polygons = Polygons(simplify_geometry(feature))
full_resolution = polygons.remove_too_small
smoothed = full_resolution.smooth
simplified = smoothed.simplify
print( # noqa: T001
f' Original:{full_resolution.point_count: >5} points'
f' Smoothed:{smoothed.point_count: >5} points'
f' Simplified:{simplified.point_count: >4} points'
)
point_counts.append(simplified.point_count)
if simplified.point_count >= 200:
raise RuntimeError(
'Too many points '
'(adjust Polygons.perimeter_to_simplification_ratio or '
'Polygons.perimeter_to_buffer_ratio)'
)
return (
full_resolution.as_coordinate_pairs_long_lat,
simplified.as_coordinate_pairs_long_lat,
)
def estimate_number_of_smartphones_in_area(country_or_ward_code):
if country_or_ward_code in CITY_OF_LONDON.WARDS:
# We dont have population figures for wards of the City of
# London. Well leave it empty here and estimate on the fly
# later based on physical area.
print(' Population: N/A') # noqa: T001
return None
# For some reason Bryher is the only ward missing population data, so we
# need to hard code it. For simplicity, lets assume all 84 people who
# live on Bryher are 40 years old
if country_or_ward_code == BRYHER.WD20_CODE:
return BRYHER.POPULATION * SMARTPHONE_OWNERSHIP_BY_AGE_RANGE[MEDIAN_AGE_RANGE_UK]
if country_or_ward_code not in area_to_population_mapping:
raise ValueError(f'No population data for {country_or_ward_code}')
return estimate_number_of_smartphones_for_population(
area_to_population_mapping[country_or_ward_code]
)
test_filepath = source_files_path / "Test.geojson"
ctry19_filepath = source_files_path / "Countries.geojson"
# https://geoportal.statistics.gov.uk/datasets/wards-may-2020-boundaries-uk-bgc
# Converted to geojson manually from SHP because of GeoJSON download limits
wd20_filepath = source_files_path / "Electoral Wards May 2020.geojson"
# http://geoportal.statistics.gov.uk/datasets/local-authority-districts-may-2020-boundaries-uk-bgc
lad20_filepath = source_files_path / "Local Authorities May 2020.geojson"
# https://geoportal.statistics.gov.uk/datasets/counties-and-unitary-authorities-december-2019-boundaries-uk-bgc
ctyua19_filepath = source_files_path / "Counties_and_Unitary_Authorities__December_2019__Boundaries_UK_BGC.geojson"
# http://geoportal.statistics.gov.uk/datasets/ward-to-westminster-parliamentary-constituency-to-local-authority-district-december-2019-lookup-in-the-united-kingdom/data
wd_lad_map_filepath = source_files_path / "Electoral Wards and Local Authorities 2020.geojson"
# https://geoportal.statistics.gov.uk/datasets/lower-tier-local-authority-to-upper-tier-local-authority-december-2019-lookup-in-england-and-wales?where=LTLA19CD%20%3D%20%27E06000045%27
ltla_utla_map_filepath = source_files_path / "Lower_Tier_Local_Authority_to_Upper_Tier_Local_Authority__December_2019__Lookup_in_England_and_Wales.csv" # noqa: E501
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/wardlevelmidyearpopulationestimatesexperimental
population_filepath_england_wales = source_files_path / "Mid-2019_Persons_England_Wales.csv"
# https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/population/population-estimates/2011-based-special-area-population-estimates/electoral-ward-population-estimates
population_filepath_scotland = source_files_path / "Mid-2019_Persons_Scotland.csv"
population_filepath_northern_ireland = source_files_path / "Ward-2014_Northern_Ireland.csv"
population_filepath_uk = source_files_path / "MYE1-2019.csv"
ward_code_to_la_mapping = {
f["properties"]["WD19CD"]: f["properties"]["LAD19NM"]
for f in geojson.loads(wd_lad_map_filepath.read_text())["features"]
}
ward_code_to_la_id_mapping = {
f["properties"]["WD19CD"]: f["properties"]["LAD19CD"]
for f in geojson.loads(wd_lad_map_filepath.read_text())["features"]
}
# the mapping dict is empty for lower tier local authorities that are also upper tier (unitary authorities, etc)
ltla_utla_mapping_csv = csv.DictReader(ltla_utla_map_filepath.open())
la_code_to_cty_id_mapping = {
row['LTLA19CD']: row['UTLA19CD'] for row in ltla_utla_mapping_csv if row['LTLA19CD'] != row['UTLA19CD']
}
area_to_population_mapping = {}
for population_filepath in (
population_filepath_uk,
population_filepath_england_wales,
population_filepath_northern_ireland,
population_filepath_scotland,
):
area_to_population_csv = csv.DictReader(population_filepath.open())
for row in area_to_population_csv:
area_to_population_mapping[row['ward']] = [
(
int(k) if k.isnumeric() else MEDIAN_AGE_UK,
int(float(v.replace(',', '') or '0'))
)
for k, v in row.items() if k != 'ward'
]
def add_test_areas():
dataset_id = 'test'
dataset_geojson = geojson.loads(test_filepath.read_text())
repo.insert_broadcast_area_library(
dataset_id,
name='Test areas',
name_singular='test area',
is_group=False,
)
areas_to_add = []
for feature in dataset_geojson["features"]:
f_id = feature["properties"]['id']
f_name = feature["properties"]['name']
print() # noqa: T001
print(f_name) # noqa: T001
feature, _ = polygons_and_simplified_polygons(
feature["geometry"]
)
areas_to_add.append([
f'{dataset_id}-{f_id}', f_name,
dataset_id, None,
feature, feature,
0,
])
repo.insert_broadcast_areas(areas_to_add, keep_old_polygons)
def add_countries():
dataset_id = 'ctry19'
dataset_geojson = geojson.loads(ctry19_filepath.read_text())
repo.insert_broadcast_area_library(
'ctry19',
name='Countries',
name_singular='country',
is_group=False,
)
areas_to_add = []
for feature in dataset_geojson["features"]:
f_id = feature["properties"]['ctry19cd']
f_name = feature["properties"]['ctry19nm']
print() # noqa: T001
print(f_name) # noqa: T001
feature, simple_feature = (
polygons_and_simplified_polygons(feature["geometry"])
)
areas_to_add.append([
f'ctry19-{f_id}', f_name,
dataset_id, None,
feature, simple_feature,
estimate_number_of_smartphones_in_area(f_id),
])
repo.insert_broadcast_areas(areas_to_add, keep_old_polygons)
def add_wards_local_authorities_and_counties():
dataset_name = "Local authorities"
dataset_name_singular = "local authority"
dataset_id = "wd20-lad20-ctyua19"
repo.insert_broadcast_area_library(
dataset_id,
name=dataset_name,
name_singular=dataset_name_singular,
is_group=True,
)
_add_electoral_wards(dataset_id)
_add_local_authorities(dataset_id)
_add_counties_and_unitary_authorities(dataset_id)
def _add_electoral_wards(dataset_id):
areas_to_add = []
for feature in geojson.loads(wd20_filepath.read_text())["features"]:
ward_code = feature["properties"]["wd20cd"]
ward_name = feature["properties"]["wd20nm"]
ward_id = "wd20-" + ward_code
print() # noqa: T001
print(ward_name) # noqa: T001
try:
la_id = "lad20-" + ward_code_to_la_id_mapping[ward_code]
feature, simple_feature = (
polygons_and_simplified_polygons(feature["geometry"])
)
if feature:
rtree_index.insert(ward_id, Rect(*Polygons(feature).bounds))
areas_to_add.append([
ward_id, ward_name,
dataset_id, la_id,
feature, simple_feature,
estimate_number_of_smartphones_in_area(ward_code),
])
except KeyError:
print("Skipping", ward_code, ward_name) # noqa: T001
rtree_index_path.open('wb').write(pickle.dumps(rtree_index))
repo.insert_broadcast_areas(areas_to_add, keep_old_polygons)
def _add_local_authorities(dataset_id):
areas_to_add = []
for feature in geojson.loads(lad20_filepath.read_text())["features"]:
la_id = feature["properties"]["LAD20CD"]
group_name = feature["properties"]["LAD20NM"]
print() # noqa: T001
print(group_name) # noqa: T001
group_id = "lad20-" + la_id
feature, simple_feature = (
polygons_and_simplified_polygons(feature["geometry"])
)
ctyua_id = la_code_to_cty_id_mapping.get(la_id)
areas_to_add.append([
group_id,
group_name,
dataset_id,
'ctyua19-' + ctyua_id if ctyua_id else None,
feature,
simple_feature,
None,
])
repo.insert_broadcast_areas(areas_to_add, keep_old_polygons)
# counties and unitary authorities
def _add_counties_and_unitary_authorities(dataset_id):
areas_to_add = []
for feature in geojson.loads(ctyua19_filepath.read_text())['features']:
ctyua_id = feature["properties"]["ctyua19cd"]
group_name = feature["properties"]["ctyua19nm"]
la_id = 'lad20-' + ctyua_id
if repo.get_areas([la_id]):
continue
group_id = "ctyua19-" + ctyua_id
feature, simple_feature = (
polygons_and_simplified_polygons(feature["geometry"])
)
areas_to_add.append([
group_id, group_name,
dataset_id, None,
feature, simple_feature,
None,
])
repo.insert_broadcast_areas(areas_to_add, keep_old_polygons)
# cheeky global variable
keep_old_polygons = sys.argv[1:] == ['--keep-old-polygons']
print('keep_old_polygons: ', keep_old_polygons) # noqa: T001
repo = BroadcastAreasRepository()
if keep_old_polygons:
repo.delete_library_data()
else:
repo.delete_db()
repo.create_tables()
add_test_areas()
add_countries()
add_wards_local_authorities_and_counties()
most_detailed_polygons = formatted_list(
sorted(point_counts, reverse=True)[:5],
before_each='',
after_each='',
)
print( # noqa: T001
'\n'
'DONE\n'
f' Processed {len(point_counts):,} polygons.\n'
f' Highest point counts once simplifed: {most_detailed_polygons}\n'
)