Chandler Swift
7957523c3c
This was originally done to make the gitignoring easier, but ended up being somewhat more complex when trying to include files, so they're moving out closer to the point of use.
98 lines
3.7 KiB
Python
Executable file
98 lines
3.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
import re
|
|
from typing import Tuple
|
|
import requests
|
|
import json
|
|
|
|
BASE_URL="https://en.wikipedia.org/w/api.php"
|
|
|
|
# A previous attempt at this script used the NHL api...except for some reason
|
|
# they don't include all the arenas! Only 16 are included on this page:
|
|
#
|
|
# venueData = requests.get("https://statsapi.web.nhl.com/api/v1/venues").json()
|
|
#
|
|
# venues = [] for venue in venueData["venues"]: # Special-case a few entries if
|
|
# venue["name"] == "NASSAU LIVE CENTER": # As of 2021 the islanders now play out
|
|
# of UBS Arena. Not sure why this # is still in the list. continue if
|
|
# venue["name"] == "Prudential Center Map & Info": # not sure why they call
|
|
# it that venue["name"] = "Prudential Center" ...
|
|
|
|
def wikipedia_request(page_title: str) -> str:
|
|
params = {
|
|
"action": "parse",
|
|
"page": f"{page_title}",
|
|
"prop": "wikitext",
|
|
"formatversion": 2,
|
|
"format": "json",
|
|
}
|
|
return requests.get(url=BASE_URL, params=params).json()['parse']['wikitext']
|
|
|
|
def get_wikipedia_coords_for_arena(arena: str) -> Tuple[float, float]:
|
|
raw_arena_page = wikipedia_request(arena)
|
|
# print(raw_arena_page)
|
|
|
|
# e.g. `coordinates = {{coord|40.712094|N|73.727157|W|...}}`
|
|
match = re.search(r"[Cc]oord\|([0-9.]*)\|N\|([0-9.]*)\|W\|", raw_arena_page)
|
|
if match:
|
|
return (float(match[1]), -float(match[2]))
|
|
|
|
# e.g. `coordinates = {{Coord|47.622|-122.354|...}}`
|
|
match = re.search(r"[Cc]oord\|([0-9.]*)\|(-[0-9.]*)\|[^\d]", raw_arena_page)
|
|
if match:
|
|
return (float(match[1]), float(match[2]))
|
|
|
|
# e.g. `coordinates = {{coord|44|56|41|N|93|6|4|W|...}}`
|
|
match = re.search(r"[Cc]oord\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|N\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|W\|", raw_arena_page) # Assuming northern and western hemispheres; currently safe
|
|
lat_deg = match[1]
|
|
lat_min = match[2]
|
|
lat_sec = match[3]
|
|
lon_deg = match[4]
|
|
lon_min = match[5]
|
|
lon_sec = match[6]
|
|
lat = float(lat_deg) + float(lat_min) / 60 + float(lat_sec) / 3600
|
|
lon = float(lon_deg) + float(lon_min) / 60 + float(lon_sec) / 3600
|
|
return (lat, -lon)
|
|
|
|
|
|
print("Retrieving arena list...", flush=True)
|
|
raw_arenas_list = wikipedia_request("Template:NHL arenas")
|
|
arena_names = re.findall(r"\* +\[\[ ?(.*?)(?:\|.*)? ?\]\]", raw_arenas_list)
|
|
arenas = []
|
|
for arena in arena_names:
|
|
print(f"Retrieving data for {arena}...", flush=True)
|
|
nominatim_params = {
|
|
'q': arena,
|
|
'format': "json",
|
|
'addressdetails': 1,
|
|
}
|
|
if arena == "SAP Center":
|
|
nominatim_params['q'] = "SAP Center at San Jose" # https://en.wikipedia.org/w/index.php?title=SAP_Center&oldid=690907747
|
|
nominatim_result = requests.get(url="https://nominatim.openstreetmap.org/search", params=nominatim_params).json()[0]
|
|
|
|
# confirm it matches what wikipedia claims
|
|
wiki_lat, wiki_lon = get_wikipedia_coords_for_arena(arena)
|
|
if wiki_lat - float(nominatim_result["lat"]) > 0.1 or wiki_lon - float(nominatim_result["lon"]) > 0.1:
|
|
raise Exception(f"Data mismatch for {arena}: {wiki_lat} vs {nominatim_result['lat']}; {wiki_lon} vs {nominatim_result['lon']}")
|
|
|
|
arenas.append({
|
|
"type": "Feature",
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [float(nominatim_result["lon"]), float(nominatim_result["lat"])], # yes, [lon, lat] since it's [x, y]
|
|
},
|
|
"properties": {
|
|
"name": arena,
|
|
"osm_id": nominatim_result["osm_id"],
|
|
"address": nominatim_result["address"], # requires &addressdetails=1 (https://nominatim.org/release-docs/latest/api/Search/#output-details)
|
|
},
|
|
})
|
|
|
|
geojson = {
|
|
"type": "FeatureCollection",
|
|
"features": arenas,
|
|
}
|
|
|
|
with open("data.geojson", "w") as f:
|
|
f.write(json.dumps(geojson))
|