2023-07-03 17:13:20 -05:00
#!/usr/bin/env python3
import re
from typing import Tuple
import requests
import json
BASE_URL = " https://en.wikipedia.org/w/api.php "
# A previous attempt at this script used the NHL api...except for some reason
# they don't include all the arenas! Only 16 are included on this page:
#
# venueData = requests.get("https://statsapi.web.nhl.com/api/v1/venues").json()
#
# venues = [] for venue in venueData["venues"]: # Special-case a few entries if
# venue["name"] == "NASSAU LIVE CENTER": # As of 2021 the islanders now play out
# of UBS Arena. Not sure why this # is still in the list. continue if
# venue["name"] == "Prudential Center Map & Info": # not sure why they call
# it that venue["name"] = "Prudential Center" ...
def wikipedia_request ( page_title : str ) - > str :
params = {
" action " : " parse " ,
" page " : f " { page_title } " ,
" prop " : " wikitext " ,
" formatversion " : 2 ,
" format " : " json " ,
}
return requests . get ( url = BASE_URL , params = params ) . json ( ) [ ' parse ' ] [ ' wikitext ' ]
def get_wikipedia_coords_for_arena ( arena : str ) - > Tuple [ float , float ] :
raw_arena_page = wikipedia_request ( arena )
# print(raw_arena_page)
# e.g. `coordinates = {{coord|40.712094|N|73.727157|W|...}}`
match = re . search ( r " [Cc]oord \ |([0-9.]*) \ |N \ |([0-9.]*) \ |W \ | " , raw_arena_page )
if match :
return ( float ( match [ 1 ] ) , - float ( match [ 2 ] ) )
# e.g. `coordinates = {{Coord|47.622|-122.354|...}}`
match = re . search ( r " [Cc]oord \ |([0-9.]*) \ |(-[0-9.]*) \ |[^ \ d] " , raw_arena_page )
if match :
return ( float ( match [ 1 ] ) , float ( match [ 2 ] ) )
# e.g. `coordinates = {{coord|44|56|41|N|93|6|4|W|...}}`
match = re . search ( r " [Cc]oord \ |([0-9.]*) \ |([0-9.]*) \ |([0-9.]*) \ |N \ |([0-9.]*) \ |([0-9.]*) \ |([0-9.]*) \ |W \ | " , raw_arena_page ) # Assuming northern and western hemispheres; currently safe
lat_deg = match [ 1 ]
lat_min = match [ 2 ]
lat_sec = match [ 3 ]
lon_deg = match [ 4 ]
lon_min = match [ 5 ]
lon_sec = match [ 6 ]
lat = float ( lat_deg ) + float ( lat_min ) / 60 + float ( lat_sec ) / 3600
lon = float ( lon_deg ) + float ( lon_min ) / 60 + float ( lon_sec ) / 3600
return ( lat , - lon )
print ( " Retrieving arena list... " , flush = True )
raw_arenas_list = wikipedia_request ( " Template:NHL arenas " )
arena_names = re . findall ( r " \ * + \ [ \ [ ?(.*?)(?: \ |.*)? ? \ ] \ ] " , raw_arenas_list )
arenas = [ ]
for arena in arena_names :
print ( f " Retrieving data for { arena } ... " , flush = True )
nominatim_params = {
' q ' : arena ,
' format ' : " json " ,
' addressdetails ' : 1 ,
}
if arena == " SAP Center " :
nominatim_params [ ' q ' ] = " SAP Center at San Jose " # https://en.wikipedia.org/w/index.php?title=SAP_Center&oldid=690907747
nominatim_result = requests . get ( url = " https://nominatim.openstreetmap.org/search " , params = nominatim_params ) . json ( ) [ 0 ]
# confirm it matches what wikipedia claims
wiki_lat , wiki_lon = get_wikipedia_coords_for_arena ( arena )
if wiki_lat - float ( nominatim_result [ " lat " ] ) > 0.1 or wiki_lon - float ( nominatim_result [ " lon " ] ) > 0.1 :
raise Exception ( f " Data mismatch for { arena } : { wiki_lat } vs { nominatim_result [ ' lat ' ] } ; { wiki_lon } vs { nominatim_result [ ' lon ' ] } " )
arenas . append ( {
" type " : " Feature " ,
" geometry " : {
" type " : " Point " ,
" coordinates " : [ float ( nominatim_result [ " lon " ] ) , float ( nominatim_result [ " lat " ] ) ] , # yes, [lon, lat] since it's [x, y]
} ,
" properties " : {
" name " : arena ,
" osm_id " : nominatim_result [ " osm_id " ] ,
" address " : nominatim_result [ " address " ] , # requires &addressdetails=1 (https://nominatim.org/release-docs/latest/api/Search/#output-details)
} ,
} )
geojson = {
" type " : " FeatureCollection " ,
" features " : arenas ,
}
2023-07-25 19:12:30 -05:00
with open ( " data.geojson " , " w " ) as f :
2023-07-03 17:13:20 -05:00
f . write ( json . dumps ( geojson ) )