#!/usr/bin/env python3

import re
from typing import Tuple
import requests
import json

BASE_URL="https://en.wikipedia.org/w/api.php"

# A previous attempt at this script used the NHL api...except for some reason
# they don't include all the arenas! Only 16 are included on this page:
#
# venueData = requests.get("https://statsapi.web.nhl.com/api/v1/venues").json()
#
# venues = [] for venue in venueData["venues"]: # Special-case a few entries if
# venue["name"] == "NASSAU LIVE CENTER": # As of 2021 the islanders now play out
#     of UBS Arena. Not sure why this # is still in the list. continue if
#     venue["name"] == "Prudential Center Map & Info": # not sure why they call
#         it that venue["name"] = "Prudential Center" ...

def wikipedia_request(page_title: str) -> str:
    params = {
        "action": "parse",
        "page": f"{page_title}",
        "prop": "wikitext",
        "formatversion": 2,
        "format": "json",
    }
    return requests.get(url=BASE_URL, params=params).json()['parse']['wikitext']

def get_wikipedia_coords_for_arena(arena: str) -> Tuple[float, float]:
    raw_arena_page = wikipedia_request(arena)
    # print(raw_arena_page)

    # e.g. `coordinates = {{coord|40.712094|N|73.727157|W|...}}`
    match = re.search(r"[Cc]oord\|([0-9.]*)\|N\|([0-9.]*)\|W\|", raw_arena_page)
    if match:
        return (float(match[1]), -float(match[2]))

    # e.g. `coordinates = {{Coord|47.622|-122.354|...}}`
    match = re.search(r"[Cc]oord\|([0-9.]*)\|(-[0-9.]*)\|[^\d]", raw_arena_page)
    if match:
        return (float(match[1]), float(match[2]))

    # e.g. `coordinates = {{coord|44|56|41|N|93|6|4|W|...}}`
    match = re.search(r"[Cc]oord\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|N\|([0-9.]*)\|([0-9.]*)\|([0-9.]*)\|W\|", raw_arena_page) # Assuming northern and western hemispheres; currently safe
    lat_deg = match[1]
    lat_min = match[2]
    lat_sec = match[3]
    lon_deg = match[4]
    lon_min = match[5]
    lon_sec = match[6]
    lat = float(lat_deg) + float(lat_min) / 60 + float(lat_sec) / 3600
    lon = float(lon_deg) + float(lon_min) / 60 + float(lon_sec) / 3600
    return (lat, -lon)


print("Retrieving arena list...", flush=True)
raw_arenas_list = wikipedia_request("Template:NHL arenas")
arena_names = re.findall(r"\* +\[\[ ?(.*?)(?:\|.*)? ?\]\]", raw_arenas_list)
arenas = []
for arena in arena_names:
    print(f"Retrieving data for {arena}...", flush=True)
    nominatim_params = {
        'q': arena,
        'format': "json",
        'addressdetails': 1,
    }
    if arena == "SAP Center":
        nominatim_params['q'] = "SAP Center at San Jose" # https://en.wikipedia.org/w/index.php?title=SAP_Center&oldid=690907747
    nominatim_result = requests.get(url="https://nominatim.openstreetmap.org/search", params=nominatim_params).json()[0]

    # confirm it matches what wikipedia claims
    wiki_lat, wiki_lon = get_wikipedia_coords_for_arena(arena)
    if wiki_lat - float(nominatim_result["lat"]) > 0.1 or wiki_lon - float(nominatim_result["lon"]) > 0.1:
        raise Exception(f"Data mismatch for {arena}: {wiki_lat} vs {nominatim_result['lat']}; {wiki_lon} vs {nominatim_result['lon']}")

    arenas.append({
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [float(nominatim_result["lon"]), float(nominatim_result["lat"])], # yes, [lon, lat] since it's [x, y]
        },
        "properties": {
            "name": arena,
            "osm_id": nominatim_result["osm_id"],
            "address": nominatim_result["address"], # requires &addressdetails=1 (https://nominatim.org/release-docs/latest/api/Search/#output-details)
        },
    })

geojson = {
    "type": "FeatureCollection",
    "features": arenas,
}

with open("data.geojson", "w") as f:
    f.write(json.dumps(geojson))