name-all-lakes-by-area-quiz/get_data.py

47 lines
1.7 KiB
Python
Raw Permalink Normal View History

2024-02-29 17:24:33 -06:00
import requests
import json
import zipfile
import tempfile
import os
import subprocess
import io
2024-02-29 17:24:33 -06:00
2024-07-06 15:25:57 -05:00
with tempfile.TemporaryDirectory() as tmpdir:
print(f"Fetching lake data", flush=True)
res = requests.get(f"https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_dnr/water_dnr_hydrography/shp_water_dnr_hydrography.zip")
print("extracting zip file", flush=True)
zipfile.ZipFile(io.BytesIO(res.content)).extractall(tmpdir)
shapefile_name = os.path.join(tmpdir, f"dnr_hydro_features_all.shp")
geojson_file_name = os.path.join(tmpdir, "out.geojson")
print("converting to geojson", flush=True)
subprocess.run(f"ogr2ogr -f GeoJSON {geojson_file_name} {shapefile_name}", shell=True)
print("loading json", flush=True)
with open(geojson_file_name) as f:
data = json.load(f)
print("processing", flush=True)
lakes_by_name = {} # {"Marion": {"centers": [[lon, lat], ...], "area": area_in_acres}, ...}
for feature in data['features']:
if feature['properties']['sub_flag'] == 'Y':
2024-02-29 17:24:33 -06:00
continue
2024-07-06 15:25:57 -05:00
name = feature['properties']['map_label'] # or pw_basin_n or pw_parent_ or...??
if not name: # many lakes with null name
continue
if name == "Unnamed":
continue
if name not in lakes_by_name:
lakes_by_name[name] = {"centers": [], "area": 0}
lakes_by_name[name]["centers"].append([feature['properties']['INSIDE_X'], feature['properties']['INSIDE_Y']])
lakes_by_name[name]["area"] += feature['properties']['acres']
2024-02-29 17:24:33 -06:00
2024-07-06 15:25:57 -05:00
lakes = []
for name, lake in lakes_by_name.items():
lake["name"] = name
lakes.append(lake)
2024-02-29 17:24:33 -06:00
2024-07-06 15:25:57 -05:00
lakes.sort(key=lambda lake: lake['area'], reverse=True)
2024-02-29 17:24:33 -06:00
2024-07-06 15:25:57 -05:00
with open(f"lakes.json", 'w') as f:
f.write(json.dumps(lakes))