47 lines
1.8 KiB
Python
47 lines
1.8 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
import requests
|
||
|
import json
|
||
|
from bs4 import BeautifulSoup
|
||
|
import re
|
||
|
|
||
|
response = requests.get('https://locations.whataburger.com/directory.html')
|
||
|
|
||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
state_links = soup.select('a.Directory-listLink')
|
||
|
|
||
|
locations = []
|
||
|
for state_link in state_links:
|
||
|
print(f"Fetching state data for {state_link.find('span').text}")
|
||
|
response = requests.get(f"https://locations.whataburger.com/{state_link['href']}")
|
||
|
|
||
|
# this _could_ break if there's a closing script tag in the script somehow, but I'm not too concerned
|
||
|
raw_state_data = re.search(r'<script class="js-map-data" type="text/data">(.*?)</script>', response.text)[1]
|
||
|
state_locations = json.loads(raw_state_data)['response']['entities']
|
||
|
|
||
|
for location in state_locations:
|
||
|
locations.append({
|
||
|
"type": "Feature",
|
||
|
"geometry": {
|
||
|
"type": "Point",
|
||
|
"coordinates": [location['profile']['yextDisplayCoordinate']['long'], location['profile']['yextDisplayCoordinate']['lat']], # yes, [lon, lat] since it's [x, y]
|
||
|
},
|
||
|
"properties": {
|
||
|
'address': location['profile']['meta']['id'], # kinda?
|
||
|
# 'city': location_li.find('dd', class_="city-state-zip").text.split(',')[0],
|
||
|
# 'state': location_li.find('dd', class_="city-state-zip").text.split(', ')[1].split(' ')[0],
|
||
|
# 'zip': location_li.find('dd', class_="city-state-zip").text.split(' ')[-1],
|
||
|
'website': "https://locations.whataburger.com/" + location['url'],
|
||
|
},
|
||
|
})
|
||
|
|
||
|
geojson = {
|
||
|
"type": "FeatureCollection",
|
||
|
"features": locations,
|
||
|
}
|
||
|
|
||
|
print(len(locations), "locations found")
|
||
|
|
||
|
with open("data.geojson", "w") as f:
|
||
|
f.write(json.dumps(geojson))
|