maps.chandlerswift.com/layers/chains/michaels/get_data.py

58 lines
2.1 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import requests
import json
from bs4 import BeautifulSoup
# Stolen from my machine, appears to work; sufficient and necessary to get
# around their firewall apparently?
UA={
"User-Agent": 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0'
}
locations = []
response = requests.get('https://locations.michaels.com/', headers=UA)
soup = BeautifulSoup(response.text, 'html.parser')
for state in soup.select('#content ul.state_list li a'):
print(" ", state['href'])
response = requests.get(state['href'], headers=UA)
soup = BeautifulSoup(response.text, 'html.parser')
for city in soup.select('#cities ul.city_list li > a'):
print(" ", city['href'])
response = requests.get(city['href'], headers=UA)
soup = BeautifulSoup(response.text, 'html.parser')
for location in soup.select('#locations > ul > li > a'):
print(" ", location['href'])
response = requests.get(location['href'], headers=UA)
soup = BeautifulSoup(response.text, 'html.parser')
scripts = [scr.text for scr in soup.find_all('script') if 'latitude' in scr.text] # TODO: filter this better??
data = json.loads(scripts[0])
locations.append({
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [data['geo']['longitude'], data['geo']['latitude']], # yes, [lon, lat] since it's [x, y]
},
"properties": {
'address': data['address']['streetAddress'],
'city': data['address']['addressLocality'],
'state': data['address']['addressRegion'],
'zip': data['address']['postalCode'],
'country': data['address']['addressCountry'],
'url': data['url'],
'website': location['href'],
},
})
geojson = {
"type": "FeatureCollection",
"features": locations,
}
print(len(locations), "locations found")
with open("data.geojson", "w") as f:
f.write(json.dumps(geojson))