maps.chandlerswift.com/layers/nhl-arenas/get_data.py

98 lines
3.7 KiB
Python
Raw Normal View History

2023-07-03 17:13:20 -05:00
#!/usr/bin/env python3
import re
from typing import Tuple
import requests
import json
BASE_URL="https://en.wikipedia.org/w/api.php"
# A previous attempt at this script used the NHL api...except for some reason
# they don't include all the arenas! Only 16 are included on this page:
#
# venueData = requests.get("https://statsapi.web.nhl.com/api/v1/venues").json()
#
# venues = [] for venue in venueData["venues"]: # Special-case a few entries if
# venue["name"] == "NASSAU LIVE CENTER": # As of 2021 the islanders now play out
# of UBS Arena. Not sure why this # is still in the list. continue if
# venue["name"] == "Prudential Center Map & Info": # not sure why they call
# it that venue["name"] = "Prudential Center" ...
def wikipedia_request(page_title: str) -> str:
params = {
"action": "parse",
"page": f"{page_title}",
"prop": "wikitext",
"formatversion": 2,
"format": "json",
}
return requests.get(url=BASE_URL, params=params).json()['parse']['wikitext']
def get_wikipedia_coords_for_arena(arena: str) -> Tuple[float, float]:
raw_arena_page = wikipedia_request(arena)
# print(raw_arena_page)
# e.g. `coordinates = {{coord|40.712094|N|73.727157|W|...}}`
match = re.search(r"[Cc]oord\|([0-9.]*)\|N\|([0-9.]*)\|W\|", raw_arena_page)
if match:
return (float(match[1]), -float(match[2]))
# e.g. `coordinates = {{Coord|47.622|-122.354|...}}`
match = re.search(r"[Cc]oord\|([0-9.]*)\|(-[0-9.]*)\|[^\d]", raw_arena_page)
if match:
return (float(match[1]), float(match[2]))
# e.g. `coordinates = {{coord|44|56|41|N|93|6|4|W|...}}`
match = re.search(r"[Cc]oord\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|N\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|W\|", raw_arena_page) # Assuming northern and western hemispheres; currently safe
lat_deg = match[1]
lat_min = match[2]
lat_sec = match[3]
lon_deg = match[4]
lon_min = match[5]
lon_sec = match[6]
lat = float(lat_deg) + float(lat_min) / 60 + float(lat_sec) / 3600
lon = float(lon_deg) + float(lon_min) / 60 + float(lon_sec) / 3600
return (lat, -lon)
print("Retrieving arena list...", flush=True)
raw_arenas_list = wikipedia_request("Template:NHL arenas")
arena_names = re.findall(r"\* +\[\[ ?(.*?)(?:\|.*)? ?\]\]", raw_arenas_list)
arenas = []
for arena in arena_names:
print(f"Retrieving data for {arena}...", flush=True)
nominatim_params = {
'q': arena,
'format': "json",
'addressdetails': 1,
}
if arena == "SAP Center":
nominatim_params['q'] = "SAP Center at San Jose" # https://en.wikipedia.org/w/index.php?title=SAP_Center&oldid=690907747
nominatim_result = requests.get(url="https://nominatim.openstreetmap.org/search", params=nominatim_params).json()[0]
# confirm it matches what wikipedia claims
wiki_lat, wiki_lon = get_wikipedia_coords_for_arena(arena)
if wiki_lat - float(nominatim_result["lat"]) > 0.1 or wiki_lon - float(nominatim_result["lon"]) > 0.1:
raise Exception(f"Data mismatch for {arena}: {wiki_lat} vs {nominatim_result['lat']}; {wiki_lon} vs {nominatim_result['lon']}")
arenas.append({
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [float(nominatim_result["lon"]), float(nominatim_result["lat"])], # yes, [lon, lat] since it's [x, y]
},
"properties": {
"name": arena,
"osm_id": nominatim_result["osm_id"],
"address": nominatim_result["address"], # requires &addressdetails=1 (https://nominatim.org/release-docs/latest/api/Search/#output-details)
},
})
geojson = {
"type": "FeatureCollection",
"features": arenas,
}
with open("nhl-arenas-data.geojson", "w") as f:
f.write(json.dumps(geojson))