maps.chandlerswift.com/layers/nhl-arenas/get_data.py
Chandler Swift 7957523c3c
Distribute data with layers instead of data/ dir
This was originally done to make the gitignoring easier, but ended up
being somewhat more complex when trying to include files, so they're
moving out closer to the point of use.
2023-07-25 19:14:05 -05:00

98 lines
3.7 KiB
Python
Executable file

#!/usr/bin/env python3
import re
from typing import Tuple
import requests
import json
BASE_URL="https://en.wikipedia.org/w/api.php"
# A previous attempt at this script used the NHL api...except for some reason
# they don't include all the arenas! Only 16 are included on this page:
#
# venueData = requests.get("https://statsapi.web.nhl.com/api/v1/venues").json()
#
# venues = [] for venue in venueData["venues"]: # Special-case a few entries if
# venue["name"] == "NASSAU LIVE CENTER": # As of 2021 the islanders now play out
# of UBS Arena. Not sure why this # is still in the list. continue if
# venue["name"] == "Prudential Center Map & Info": # not sure why they call
# it that venue["name"] = "Prudential Center" ...
def wikipedia_request(page_title: str) -> str:
params = {
"action": "parse",
"page": f"{page_title}",
"prop": "wikitext",
"formatversion": 2,
"format": "json",
}
return requests.get(url=BASE_URL, params=params).json()['parse']['wikitext']
def get_wikipedia_coords_for_arena(arena: str) -> Tuple[float, float]:
raw_arena_page = wikipedia_request(arena)
# print(raw_arena_page)
# e.g. `coordinates = {{coord|40.712094|N|73.727157|W|...}}`
match = re.search(r"[Cc]oord\|([0-9.]*)\|N\|([0-9.]*)\|W\|", raw_arena_page)
if match:
return (float(match[1]), -float(match[2]))
# e.g. `coordinates = {{Coord|47.622|-122.354|...}}`
match = re.search(r"[Cc]oord\|([0-9.]*)\|(-[0-9.]*)\|[^\d]", raw_arena_page)
if match:
return (float(match[1]), float(match[2]))
# e.g. `coordinates = {{coord|44|56|41|N|93|6|4|W|...}}`
match = re.search(r"[Cc]oord\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|N\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|W\|", raw_arena_page) # Assuming northern and western hemispheres; currently safe
lat_deg = match[1]
lat_min = match[2]
lat_sec = match[3]
lon_deg = match[4]
lon_min = match[5]
lon_sec = match[6]
lat = float(lat_deg) + float(lat_min) / 60 + float(lat_sec) / 3600
lon = float(lon_deg) + float(lon_min) / 60 + float(lon_sec) / 3600
return (lat, -lon)
print("Retrieving arena list...", flush=True)
raw_arenas_list = wikipedia_request("Template:NHL arenas")
arena_names = re.findall(r"\* +\[\[ ?(.*?)(?:\|.*)? ?\]\]", raw_arenas_list)
arenas = []
for arena in arena_names:
print(f"Retrieving data for {arena}...", flush=True)
nominatim_params = {
'q': arena,
'format': "json",
'addressdetails': 1,
}
if arena == "SAP Center":
nominatim_params['q'] = "SAP Center at San Jose" # https://en.wikipedia.org/w/index.php?title=SAP_Center&oldid=690907747
nominatim_result = requests.get(url="https://nominatim.openstreetmap.org/search", params=nominatim_params).json()[0]
# confirm it matches what wikipedia claims
wiki_lat, wiki_lon = get_wikipedia_coords_for_arena(arena)
if wiki_lat - float(nominatim_result["lat"]) > 0.1 or wiki_lon - float(nominatim_result["lon"]) > 0.1:
raise Exception(f"Data mismatch for {arena}: {wiki_lat} vs {nominatim_result['lat']}; {wiki_lon} vs {nominatim_result['lon']}")
arenas.append({
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [float(nominatim_result["lon"]), float(nominatim_result["lat"])], # yes, [lon, lat] since it's [x, y]
},
"properties": {
"name": arena,
"osm_id": nominatim_result["osm_id"],
"address": nominatim_result["address"], # requires &addressdetails=1 (https://nominatim.org/release-docs/latest/api/Search/#output-details)
},
})
geojson = {
"type": "FeatureCollection",
"features": arenas,
}
with open("data.geojson", "w") as f:
f.write(json.dumps(geojson))