#!/usr/bin/env python3 import requests import json from bs4 import BeautifulSoup import re response = requests.get('https://locations.whataburger.com/directory.html') soup = BeautifulSoup(response.text, 'html.parser') state_links = soup.select('a.Directory-listLink') locations = [] for state_link in state_links: print(f"Fetching state data for {state_link.find('span').text}") response = requests.get(f"https://locations.whataburger.com/{state_link['href']}") # this _could_ break if there's a closing script tag in the script somehow, but I'm not too concerned raw_state_data = re.search(r'', response.text)[1] state_locations = json.loads(raw_state_data)['response']['entities'] for location in state_locations: locations.append({ "type": "Feature", "geometry": { "type": "Point", "coordinates": [location['profile']['yextDisplayCoordinate']['long'], location['profile']['yextDisplayCoordinate']['lat']], # yes, [lon, lat] since it's [x, y] }, "properties": { 'address': location['profile']['meta']['id'], # kinda? # 'city': location_li.find('dd', class_="city-state-zip").text.split(',')[0], # 'state': location_li.find('dd', class_="city-state-zip").text.split(', ')[1].split(' ')[0], # 'zip': location_li.find('dd', class_="city-state-zip").text.split(' ')[-1], 'website': "https://locations.whataburger.com/" + location['url'], }, }) geojson = { "type": "FeatureCollection", "features": locations, } print(len(locations), "locations found") with open("data.geojson", "w") as f: f.write(json.dumps(geojson))