58 lines
2.1 KiB
Python
58 lines
2.1 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
import requests
|
||
|
import json
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
# Stolen from my machine, appears to work; sufficient and necessary to get
|
||
|
# around their firewall apparently?
|
||
|
UA={
|
||
|
"User-Agent": 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/114.0'
|
||
|
}
|
||
|
|
||
|
|
||
|
locations = []
|
||
|
response = requests.get('https://locations.michaels.com/', headers=UA)
|
||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
for state in soup.select('#content ul.state_list li a'):
|
||
|
print(" ", state['href'])
|
||
|
response = requests.get(state['href'], headers=UA)
|
||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
for city in soup.select('#cities ul.city_list li > a'):
|
||
|
print(" ", city['href'])
|
||
|
response = requests.get(city['href'], headers=UA)
|
||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
for location in soup.select('#locations > ul > li > a'):
|
||
|
print(" ", location['href'])
|
||
|
response = requests.get(location['href'], headers=UA)
|
||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
scripts = [scr.text for scr in soup.find_all('script') if 'latitude' in scr.text] # TODO: filter this better??
|
||
|
|
||
|
data = json.loads(scripts[0])
|
||
|
locations.append({
|
||
|
"type": "Feature",
|
||
|
"geometry": {
|
||
|
"type": "Point",
|
||
|
"coordinates": [data['geo']['longitude'], data['geo']['latitude']], # yes, [lon, lat] since it's [x, y]
|
||
|
},
|
||
|
"properties": {
|
||
|
'address': data['address']['streetAddress'],
|
||
|
'city': data['address']['addressLocality'],
|
||
|
'state': data['address']['addressRegion'],
|
||
|
'zip': data['address']['postalCode'],
|
||
|
'country': data['address']['addressCountry'],
|
||
|
'url': data['url'],
|
||
|
'website': location['href'],
|
||
|
},
|
||
|
})
|
||
|
|
||
|
geojson = {
|
||
|
"type": "FeatureCollection",
|
||
|
"features": locations,
|
||
|
}
|
||
|
|
||
|
print(len(locations), "locations found")
|
||
|
|
||
|
with open("data.geojson", "w") as f:
|
||
|
f.write(json.dumps(geojson))
|