import requests import json import zipfile import tempfile import os import subprocess import io with tempfile.TemporaryDirectory() as tmpdir: print(f"Fetching lake data", flush=True) res = requests.get(f"https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_dnr/water_dnr_hydrography/shp_water_dnr_hydrography.zip") print("extracting zip file", flush=True) zipfile.ZipFile(io.BytesIO(res.content)).extractall(tmpdir) shapefile_name = os.path.join(tmpdir, f"dnr_hydro_features_all.shp") geojson_file_name = os.path.join(tmpdir, "out.geojson") print("converting to geojson", flush=True) subprocess.run(f"ogr2ogr -f GeoJSON {geojson_file_name} {shapefile_name}", shell=True) print("loading json", flush=True) with open(geojson_file_name) as f: data = json.load(f) print("processing", flush=True) lakes_by_name = {} # {"Marion": {"centers": [[lon, lat], ...], "area": area_in_acres}, ...} for feature in data['features']: if feature['properties']['sub_flag'] == 'Y': continue name = feature['properties']['map_label'] # or pw_basin_n or pw_parent_ or...?? if not name: # many lakes with null name continue if name == "Unnamed": continue if name not in lakes_by_name: lakes_by_name[name] = {"centers": [], "area": 0} lakes_by_name[name]["centers"].append([feature['properties']['INSIDE_X'], feature['properties']['INSIDE_Y']]) lakes_by_name[name]["area"] += feature['properties']['acres'] lakes = [] for name, lake in lakes_by_name.items(): lake["name"] = name lakes.append(lake) lakes.sort(key=lambda lake: lake['area'], reverse=True) with open(f"lakes.json", 'w') as f: f.write(json.dumps(lakes))