#!/usr/bin/env python3 import re from typing import Tuple import requests import json BASE_URL="https://en.wikipedia.org/w/api.php" # A previous attempt at this script used the NHL api...except for some reason # they don't include all the arenas! Only 16 are included on this page: # # venueData = requests.get("https://statsapi.web.nhl.com/api/v1/venues").json() # # venues = [] for venue in venueData["venues"]: # Special-case a few entries if # venue["name"] == "NASSAU LIVE CENTER": # As of 2021 the islanders now play out # of UBS Arena. Not sure why this # is still in the list. continue if # venue["name"] == "Prudential Center Map & Info": # not sure why they call # it that venue["name"] = "Prudential Center" ... def wikipedia_request(page_title: str) -> str: params = { "action": "parse", "page": f"{page_title}", "prop": "wikitext", "formatversion": 2, "format": "json", } return requests.get(url=BASE_URL, params=params).json()['parse']['wikitext'] def get_wikipedia_coords_for_arena(arena: str) -> Tuple[float, float]: raw_arena_page = wikipedia_request(arena) # print(raw_arena_page) # e.g. `coordinates = {{coord|40.712094|N|73.727157|W|...}}` match = re.search(r"[Cc]oord\|([0-9.]*)\|N\|([0-9.]*)\|W\|", raw_arena_page) if match: return (float(match[1]), -float(match[2])) # e.g. `coordinates = {{Coord|47.622|-122.354|...}}` match = re.search(r"[Cc]oord\|([0-9.]*)\|(-[0-9.]*)\|[^\d]", raw_arena_page) if match: return (float(match[1]), float(match[2])) # e.g. `coordinates = {{coord|44|56|41|N|93|6|4|W|...}}` match = re.search(r"[Cc]oord\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|N\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|W\|", raw_arena_page) # Assuming northern and western hemispheres; currently safe lat_deg = match[1] lat_min = match[2] lat_sec = match[3] lon_deg = match[4] lon_min = match[5] lon_sec = match[6] lat = float(lat_deg) + float(lat_min) / 60 + float(lat_sec) / 3600 lon = float(lon_deg) + float(lon_min) / 60 + float(lon_sec) / 3600 return (lat, -lon) print("Retrieving arena list...", flush=True) raw_arenas_list = wikipedia_request("Template:NHL arenas") arena_names = re.findall(r"\* +\[\[ ?(.*?)(?:\|.*)? ?\]\]", raw_arenas_list) arenas = [] for arena in arena_names: print(f"Retrieving data for {arena}...", flush=True) nominatim_params = { 'q': arena, 'format': "json", 'addressdetails': 1, } if arena == "SAP Center": nominatim_params['q'] = "SAP Center at San Jose" # https://en.wikipedia.org/w/index.php?title=SAP_Center&oldid=690907747 nominatim_result = requests.get(url="https://nominatim.openstreetmap.org/search", params=nominatim_params).json()[0] # confirm it matches what wikipedia claims wiki_lat, wiki_lon = get_wikipedia_coords_for_arena(arena) if wiki_lat - float(nominatim_result["lat"]) > 0.1 or wiki_lon - float(nominatim_result["lon"]) > 0.1: raise Exception(f"Data mismatch for {arena}: {wiki_lat} vs {nominatim_result['lat']}; {wiki_lon} vs {nominatim_result['lon']}") arenas.append({ "type": "Feature", "geometry": { "type": "Point", "coordinates": [float(nominatim_result["lon"]), float(nominatim_result["lat"])], # yes, [lon, lat] since it's [x, y] }, "properties": { "name": arena, "osm_id": nominatim_result["osm_id"], "address": nominatim_result["address"], # requires &addressdetails=1 (https://nominatim.org/release-docs/latest/api/Search/#output-details) }, }) geojson = { "type": "FeatureCollection", "features": arenas, } with open("data.geojson", "w") as f: f.write(json.dumps(geojson))