maps.chandlerswift.com/layers/chains/whataburger/get_data.py

47 lines
1.8 KiB
Python
Raw Permalink Normal View History

2023-07-25 22:46:28 -05:00
#!/usr/bin/env python3
import requests
import json
from bs4 import BeautifulSoup
import re
response = requests.get('https://locations.whataburger.com/directory.html')
soup = BeautifulSoup(response.text, 'html.parser')
state_links = soup.select('a.Directory-listLink')
locations = []
for state_link in state_links:
print(f"Fetching state data for {state_link.find('span').text}")
response = requests.get(f"https://locations.whataburger.com/{state_link['href']}")
# this _could_ break if there's a closing script tag in the script somehow, but I'm not too concerned
raw_state_data = re.search(r'<script class="js-map-data" type="text/data">(.*?)</script>', response.text)[1]
state_locations = json.loads(raw_state_data)['response']['entities']
for location in state_locations:
locations.append({
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [location['profile']['yextDisplayCoordinate']['long'], location['profile']['yextDisplayCoordinate']['lat']], # yes, [lon, lat] since it's [x, y]
},
"properties": {
'address': location['profile']['meta']['id'], # kinda?
# 'city': location_li.find('dd', class_="city-state-zip").text.split(',')[0],
# 'state': location_li.find('dd', class_="city-state-zip").text.split(', ')[1].split(' ')[0],
# 'zip': location_li.find('dd', class_="city-state-zip").text.split(' ')[-1],
'website': "https://locations.whataburger.com/" + location['url'],
},
})
geojson = {
"type": "FeatureCollection",
"features": locations,
}
print(len(locations), "locations found")
with open("data.geojson", "w") as f:
f.write(json.dumps(geojson))