#!/usr/bin/env python3 import requests import json from bs4 import BeautifulSoup # Stolen from my machine, appears to work; sufficient and necessary to get # around their firewall apparently? UA={ "User-Agent": 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0' } locations = [] response = requests.get('https://locations.michaels.com/', headers=UA) soup = BeautifulSoup(response.text, 'html.parser') for state in soup.select('#content ul.state_list li a'): print(" ", state['href']) response = requests.get(state['href'], headers=UA) soup = BeautifulSoup(response.text, 'html.parser') for city in soup.select('#cities ul.city_list li > a'): print(" ", city['href']) response = requests.get(city['href'], headers=UA) soup = BeautifulSoup(response.text, 'html.parser') for location in soup.select('#locations > ul > li > a'): print(" ", location['href']) response = requests.get(location['href'], headers=UA) soup = BeautifulSoup(response.text, 'html.parser') scripts = [scr.text for scr in soup.find_all('script') if 'latitude' in scr.text] # TODO: filter this better?? data = json.loads(scripts[0]) locations.append({ "type": "Feature", "geometry": { "type": "Point", "coordinates": [data['geo']['longitude'], data['geo']['latitude']], # yes, [lon, lat] since it's [x, y] }, "properties": { 'address': data['address']['streetAddress'], 'city': data['address']['addressLocality'], 'state': data['address']['addressRegion'], 'zip': data['address']['postalCode'], 'country': data['address']['addressCountry'], 'url': data['url'], 'website': location['href'], }, }) geojson = { "type": "FeatureCollection", "features": locations, } print(len(locations), "locations found") with open("data.geojson", "w") as f: f.write(json.dumps(geojson))