#!/usr/bin/python # Turns out alltheplaces has done all the hard work here; we can use their # (CC0-licensed) data rather than trying to replicate the scraper ourselves. # # Unfortunately, many of the stores' individual location searches, including the # parent TJX's list at https://www.tjx.com/stores, don't provide a list of # stores, and only a search result. Some do, e.g. Sierra, with a chunk of # javascript containing a list of JS objects, but this isn't consistent across # stores, and I'm too lazy to reimplement something for every store. So, instead # we take advantage of the hard work of those who have gone before us! import requests import json data = requests.get('https://alltheplaces-data.openaddresses.io/runs/2024-04-20-13-31-46/output/tjx.geojson') chains = {} for store in data.json()['features']: # store = { # "type": "Feature", # "id": "iaLJnlhrRR8daHXO0SGtTHQ2aYM=", # "properties": { # "ref": "93743", # "@spider": "tjx", # "shop": "department_store", # "addr:full": "655 Sydney Ave", # "addr:city": "Windsor", # "addr:state": "ON", # "addr:postcode": "N8X 5C4", # "addr:country": "CA", # "name": "Windsor", # "phone": "+1 519-250-0494", # "opening_hours": "Mo-Fr 09:30-21:00; Sa 09:00-21:00; Su 10:00-18:00", # "brand": "Marshalls", # "brand:wikidata": "Q15903261", # "nsi_id": "marshalls-53f9e5" # }, # "geometry": { # "type": "Point", # "coordinates": [ # -82.9981994628906, # 42.2717170715332 # ] # } # }, if not store['properties']['brand'] in chains: chains[store['properties']['brand']] = [] chains[store['properties']['brand']].append({ "type": "Feature", "geometry": store['geometry'], "properties": { "name": store['properties']['name'], "addr": store['properties']['addr:full'], "city": store['properties']['addr:city'], "state": store['properties']['addr:state'], "postcode": store['properties']['addr:postcode'], "country": store['properties']['addr:country'], }, }) safe_name = lambda s: ''.join([c.lower() for c in s if c.isalpha()]) for chain, features in chains.items(): geojson = { "type": "FeatureCollection", "features": features, } with open(f"data/{safe_name(chain)}.geojson", "w") as f: f.write(json.dumps(geojson)) print(f"{len(features)} {chain} locations found") with open('data/chains.js', 'w') as f: for chain in chains: f.write(f"import {safe_name(chain)} from './{safe_name(chain)}.geojson?url';\n") f.write('\nexport default {\n') for chain in chains: f.write(f" \"{chain}\": {safe_name(chain)},\n") f.write("};\n")