98 lines
3.7 KiB
Python
98 lines
3.7 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
import re
|
||
|
from typing import Tuple
|
||
|
import requests
|
||
|
import json
|
||
|
|
||
|
BASE_URL="https://en.wikipedia.org/w/api.php"
|
||
|
|
||
|
# A previous attempt at this script used the NHL api...except for some reason
|
||
|
# they don't include all the arenas! Only 16 are included on this page:
|
||
|
#
|
||
|
# venueData = requests.get("https://statsapi.web.nhl.com/api/v1/venues").json()
|
||
|
#
|
||
|
# venues = [] for venue in venueData["venues"]: # Special-case a few entries if
|
||
|
# venue["name"] == "NASSAU LIVE CENTER": # As of 2021 the islanders now play out
|
||
|
# of UBS Arena. Not sure why this # is still in the list. continue if
|
||
|
# venue["name"] == "Prudential Center Map & Info": # not sure why they call
|
||
|
# it that venue["name"] = "Prudential Center" ...
|
||
|
|
||
|
def wikipedia_request(page_title: str) -> str:
|
||
|
params = {
|
||
|
"action": "parse",
|
||
|
"page": f"{page_title}",
|
||
|
"prop": "wikitext",
|
||
|
"formatversion": 2,
|
||
|
"format": "json",
|
||
|
}
|
||
|
return requests.get(url=BASE_URL, params=params).json()['parse']['wikitext']
|
||
|
|
||
|
def get_wikipedia_coords_for_arena(arena: str) -> Tuple[float, float]:
|
||
|
raw_arena_page = wikipedia_request(arena)
|
||
|
# print(raw_arena_page)
|
||
|
|
||
|
# e.g. `coordinates = {{coord|40.712094|N|73.727157|W|...}}`
|
||
|
match = re.search(r"[Cc]oord\|([0-9.]*)\|N\|([0-9.]*)\|W\|", raw_arena_page)
|
||
|
if match:
|
||
|
return (float(match[1]), -float(match[2]))
|
||
|
|
||
|
# e.g. `coordinates = {{Coord|47.622|-122.354|...}}`
|
||
|
match = re.search(r"[Cc]oord\|([0-9.]*)\|(-[0-9.]*)\|[^\d]", raw_arena_page)
|
||
|
if match:
|
||
|
return (float(match[1]), float(match[2]))
|
||
|
|
||
|
# e.g. `coordinates = {{coord|44|56|41|N|93|6|4|W|...}}`
|
||
|
match = re.search(r"[Cc]oord\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|N\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|W\|", raw_arena_page) # Assuming northern and western hemispheres; currently safe
|
||
|
lat_deg = match[1]
|
||
|
lat_min = match[2]
|
||
|
lat_sec = match[3]
|
||
|
lon_deg = match[4]
|
||
|
lon_min = match[5]
|
||
|
lon_sec = match[6]
|
||
|
lat = float(lat_deg) + float(lat_min) / 60 + float(lat_sec) / 3600
|
||
|
lon = float(lon_deg) + float(lon_min) / 60 + float(lon_sec) / 3600
|
||
|
return (lat, -lon)
|
||
|
|
||
|
|
||
|
print("Retrieving arena list...", flush=True)
|
||
|
raw_arenas_list = wikipedia_request("Template:NHL arenas")
|
||
|
arena_names = re.findall(r"\* +\[\[ ?(.*?)(?:\|.*)? ?\]\]", raw_arenas_list)
|
||
|
arenas = []
|
||
|
for arena in arena_names:
|
||
|
print(f"Retrieving data for {arena}...", flush=True)
|
||
|
nominatim_params = {
|
||
|
'q': arena,
|
||
|
'format': "json",
|
||
|
'addressdetails': 1,
|
||
|
}
|
||
|
if arena == "SAP Center":
|
||
|
nominatim_params['q'] = "SAP Center at San Jose" # https://en.wikipedia.org/w/index.php?title=SAP_Center&oldid=690907747
|
||
|
nominatim_result = requests.get(url="https://nominatim.openstreetmap.org/search", params=nominatim_params).json()[0]
|
||
|
|
||
|
# confirm it matches what wikipedia claims
|
||
|
wiki_lat, wiki_lon = get_wikipedia_coords_for_arena(arena)
|
||
|
if wiki_lat - float(nominatim_result["lat"]) > 0.1 or wiki_lon - float(nominatim_result["lon"]) > 0.1:
|
||
|
raise Exception(f"Data mismatch for {arena}: {wiki_lat} vs {nominatim_result['lat']}; {wiki_lon} vs {nominatim_result['lon']}")
|
||
|
|
||
|
arenas.append({
|
||
|
"type": "Feature",
|
||
|
"geometry": {
|
||
|
"type": "Point",
|
||
|
"coordinates": [float(nominatim_result["lon"]), float(nominatim_result["lat"])], # yes, [lon, lat] since it's [x, y]
|
||
|
},
|
||
|
"properties": {
|
||
|
"name": arena,
|
||
|
"osm_id": nominatim_result["osm_id"],
|
||
|
"address": nominatim_result["address"], # requires &addressdetails=1 (https://nominatim.org/release-docs/latest/api/Search/#output-details)
|
||
|
},
|
||
|
})
|
||
|
|
||
|
geojson = {
|
||
|
"type": "FeatureCollection",
|
||
|
"features": arenas,
|
||
|
}
|
||
|
|
||
|
with open("nhl-arenas-data.geojson", "w") as f:
|
||
|
f.write(json.dumps(geojson))
|