maps.chandlerswift.com/layers/chains/punch-pizza/get_data.py

44 lines
1.3 KiB
Python
Raw Permalink Normal View History

2023-08-19 13:52:20 -05:00
#!/usr/bin/env python3
import requests
import json
from bs4 import BeautifulSoup
import re
import urllib.parse
# Stolen from my machine, appears to work; sufficient and necessary to get
# around their firewall apparently?
UA={
"User-Agent": 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0'
}
response = requests.get('https://punchpizza.com/locations/', headers=UA)
soup = BeautifulSoup(response.text, 'html.parser')
location_links = soup.select('div.loctop > .wpb_wrapper > ul > li > a') # Two rows with the same id :eyeroll:
locations = []
for location_link in location_links:
location_response = response = requests.get(urllib.parse.urljoin("https://punchpizza.com", location_link['href']), headers=UA)
latlon = re.search(r'var punchloc = {lat: ([0-9.-]*), lng: ([0-9.-]*)};', location_response.text)
if not latlon:
raise Exception("No latlon found")
locations.append({
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [float(latlon[2]), float(latlon[1])], # yes, [lon, lat] since it's [x, y]
},
# TODO: addresses are kind of a mess
})
geojson = {
"type": "FeatureCollection",
"features": locations,
}
print(len(locations), "locations found")
with open("data.geojson", "w") as f:
f.write(json.dumps(geojson))