mirror of
https://github.com/GSA/notifications-admin.git
synced 2026-02-28 14:20:59 -05:00
Fix invalid polygons while importing geographic data
Some of the polygons in our source data are invalid. An invalid polygon is one that self intersects, in other words has a point which causes the boundary of the shape to cross itself. This doesn’t cause an exception until we try to perform certain operations on one of these polygons, like intersecting them with another polygon. This is why we haven’t spotted that they are invalid until now. This commit adds checks so that as we import the polygons we make sure they are valid. If they are not valid, we can automatically fix them by just looking at the exterior boundary of the shape, and ignore any holes created by self intersection.
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
import csv
|
||||
import pickle
|
||||
import sys
|
||||
from math import isclose
|
||||
from pathlib import Path
|
||||
|
||||
import geojson
|
||||
@@ -18,9 +19,12 @@ from populations import (
|
||||
)
|
||||
from repo import BroadcastAreasRepository, rtree_index_path
|
||||
from rtreelib import Rect, RTree
|
||||
from shapely import wkt
|
||||
from shapely.geometry import MultiPolygon, Polygon
|
||||
|
||||
source_files_path = Path(__file__).resolve().parent / 'source_files'
|
||||
point_counts = []
|
||||
invalid_polygons = []
|
||||
rtree_index = RTree()
|
||||
|
||||
|
||||
@@ -33,12 +37,82 @@ def simplify_geometry(feature):
|
||||
raise Exception("Unknown type: {}".format(feature["type"]))
|
||||
|
||||
|
||||
def clean_up_invalid_polygons(polygons, indent=" "):
|
||||
for index, polygon in enumerate(polygons):
|
||||
shapely_polygon = Polygon(polygon)
|
||||
|
||||
if shapely_polygon.is_valid:
|
||||
print( # noqa: T001
|
||||
f"{indent}Polygon {index + 1}/{len(polygons)} is valid"
|
||||
)
|
||||
yield polygon
|
||||
|
||||
else:
|
||||
invalid_polygons.append(shapely_polygon)
|
||||
|
||||
# We’ve found polygons where all the points line up, so they
|
||||
# don’t have an area. They wouldn’t contribute to a broadcast
|
||||
# so we can ignore them.
|
||||
if shapely_polygon.area == 0:
|
||||
print( # noqa: T001
|
||||
f"{indent}Polygon {index + 1}/{len(polygons)} has 0 area, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
print( # noqa: T001
|
||||
f"{indent}Polygon {index + 1}/{len(polygons)} needs fixing..."
|
||||
)
|
||||
|
||||
# The simplest kind of invalid polygon is one that has two
|
||||
# duplicate points in a row at a given precision, so the
|
||||
# first thing to try is removing those points using
|
||||
# simplification with a tolerance of 0
|
||||
polygon_with_duplicate_points_removed = wkt.loads(
|
||||
wkt.dumps(shapely_polygon, rounding_precision=5)
|
||||
).simplify(0)
|
||||
|
||||
if polygon_with_duplicate_points_removed.is_valid:
|
||||
yield polygon_with_duplicate_points_removed
|
||||
continue
|
||||
|
||||
# Buffering with a size of 0 is a trick to make valid
|
||||
# geometries from polygons that self intersect
|
||||
buffered = shapely_polygon.buffer(0)
|
||||
|
||||
# If the buffering has caused our polygon to split into
|
||||
# multiple polygons, we need to recursively check them
|
||||
# instead
|
||||
if isinstance(buffered, MultiPolygon):
|
||||
for sub_polygon in clean_up_invalid_polygons(buffered, indent=" "):
|
||||
yield sub_polygon
|
||||
continue
|
||||
|
||||
# We only care about the exterior of the polygon, not an
|
||||
# holes in it that may have been created by fixing self
|
||||
# intersection
|
||||
fixed_polygon = Polygon(buffered.exterior)
|
||||
|
||||
# Make sure the polygon is now valid, and that we haven’t
|
||||
# drastically transformed the polygon by ‘fixing’ it
|
||||
assert fixed_polygon.is_valid
|
||||
assert isclose(fixed_polygon.area, polygon.area, rel_tol=0.001)
|
||||
|
||||
print( # noqa: T001
|
||||
f"{indent}Polygon {index + 1}/{len(polygons)} fixed!"
|
||||
)
|
||||
|
||||
yield fixed_polygon
|
||||
|
||||
|
||||
def polygons_and_simplified_polygons(feature):
|
||||
if keep_old_polygons:
|
||||
# cheat and shortcut out
|
||||
return [], []
|
||||
|
||||
polygons = Polygons(simplify_geometry(feature))
|
||||
polygons = list(clean_up_invalid_polygons(polygons))
|
||||
polygons = Polygons(polygons)
|
||||
|
||||
full_resolution = polygons.remove_too_small
|
||||
smoothed = full_resolution.smooth
|
||||
simplified = smoothed.simplify
|
||||
@@ -380,5 +454,6 @@ print( # noqa: T001
|
||||
'\n'
|
||||
'DONE\n'
|
||||
f' Processed {len(point_counts):,} polygons.\n'
|
||||
f' Cleaned up {len(invalid_polygons):,} polygons.\n'
|
||||
f' Highest point counts once simplifed: {most_detailed_polygons}\n'
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user