2024-02-29 17:24:33 -06:00
|
|
|
import requests
|
|
|
|
import csv
|
|
|
|
import json
|
2024-02-29 23:19:08 -06:00
|
|
|
import zipfile
|
|
|
|
import tempfile
|
|
|
|
import os
|
|
|
|
import subprocess
|
|
|
|
import io
|
2024-02-29 17:24:33 -06:00
|
|
|
|
|
|
|
# https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2020-2022/SUB-EST2022.pdf
|
2024-02-29 23:19:08 -06:00
|
|
|
INCORPORATED_PLACE = "162"
|
|
|
|
|
|
|
|
# Get state FIPS/ANSI codes and other data
|
|
|
|
# from https://www.census.gov/library/reference/code-lists/ansi.html#states
|
|
|
|
print("Fetching states…", flush=True, end="")
|
|
|
|
res = requests.get("https://www2.census.gov/geo/docs/reference/codes2020/national_state2020.txt")
|
|
|
|
states = list(csv.DictReader(res.text.split('\n'), delimiter='|'))
|
|
|
|
# {'STATE': 'AL', 'STATEFP': '01', 'STATENS': '01779775', 'STATE_NAME': 'Alabama'}
|
|
|
|
print("done")
|
2024-02-29 17:24:33 -06:00
|
|
|
|
2024-02-29 23:19:08 -06:00
|
|
|
# Find geographic centers of cities
|
|
|
|
place_locations = {}
|
|
|
|
for state in states[:51]: # Just the 50 and DC, not Guam/American Samoa/PR/etc
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
|
print(f"Fetching data for {state['STATE_NAME']}…", flush=True, end="")
|
|
|
|
res = requests.get(f"https://www2.census.gov/geo/tiger/TIGER2020/PLACE/tl_2020_{state['STATEFP']}_place.zip")
|
|
|
|
print("processing…", end="", flush=True)
|
|
|
|
zipfile.ZipFile(io.BytesIO(res.content)).extractall(tmpdir)
|
|
|
|
shapefile_name = os.path.join(tmpdir, f"tl_2020_{state['STATEFP']}_place.shp")
|
|
|
|
geojson_file_name = os.path.join(tmpdir, "out.geojson")
|
|
|
|
subprocess.run(f"ogr2ogr -f GeoJSON {geojson_file_name} {shapefile_name}", shell=True)
|
|
|
|
with open(geojson_file_name) as f:
|
|
|
|
data = json.load(f)
|
|
|
|
for feature in data['features']:
|
|
|
|
# {"type": "Feature", "properties": {"STATEFP": "01", "PLACEFP": "02260", "PLACENS": "02405163", "GEOID": "0102260", "NAME": "Ardmore", "NAMELSAD": "Ardmore town", "LSAD": "43", "CLASSFP": "C1", "PCICBSA": "N", "PCINECTA": "N", "MTFCC": "G4110", "FUNCSTAT": "A", "ALAND": 5289895, "AWATER": 21830, "INTPTLAT": "+34.9878376", "INTPTLON": "-086.8290225"}, "geometry": {"type": "Polygon", "coordinates": [[[-86.856689, 34.992046], [-86.855354, 34.992044], [-86.855101, 34.99204]
|
|
|
|
state_place = (feature['properties']['STATEFP'], feature['properties']['PLACEFP'])
|
2024-03-01 19:18:40 -06:00
|
|
|
lon_lat = (float(feature['properties']['INTPTLON']), float(feature['properties']['INTPTLAT']))
|
|
|
|
place_locations[state_place] = lon_lat
|
2024-02-29 23:19:08 -06:00
|
|
|
print("done")
|
|
|
|
|
|
|
|
print("Fetching population data for all states…", flush=True, end="")
|
2024-02-29 17:24:33 -06:00
|
|
|
res = requests.get("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/cities/totals/sub-est2022.csv")
|
|
|
|
res.raise_for_status()
|
2024-02-29 23:19:08 -06:00
|
|
|
print("processing…", flush=True, end="")
|
2024-02-29 17:24:33 -06:00
|
|
|
cities_by_state = {}
|
|
|
|
for line in csv.DictReader(res.content.decode('utf-8-sig').split('\n')):
|
|
|
|
if line['SUMLEV'] != INCORPORATED_PLACE:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if not line['STNAME'] in cities_by_state:
|
|
|
|
cities_by_state[line['STNAME']] = []
|
|
|
|
|
2024-02-29 23:19:08 -06:00
|
|
|
try:
|
|
|
|
loc = place_locations[(line['STATE'], line['PLACE'])]
|
|
|
|
except KeyError:
|
|
|
|
# TODO: why do these happen? Currently these:
|
|
|
|
# WARN: KeyError for ('17', '10373')
|
|
|
|
# WARN: KeyError for ('17', '31991')
|
|
|
|
# WARN: KeyError for ('27', '13708')
|
|
|
|
# WARN: KeyError for ('36', '75779')
|
|
|
|
# WARN: KeyError for ('40', '43725')
|
|
|
|
# WARN: KeyError for ('40', '49860')
|
|
|
|
# WARN: KeyError for ('48', '21031')
|
|
|
|
# WARN: KeyError for ('48', '23176')
|
|
|
|
# WARN: KeyError for ('48', '58502')
|
|
|
|
# WARN: KeyError for ('48', '73493')
|
|
|
|
# WARN: KeyError for ('55', '31525')
|
|
|
|
# WARN: KeyError for ('55', '82575')
|
|
|
|
# WARN: KeyError for ('55', '84275')
|
|
|
|
# Well, we'll just shove 'em on Null Island, I guess
|
|
|
|
loc = [0,0]
|
|
|
|
print("WARN: KeyError for", (line['STATE'], line['PLACE']))
|
|
|
|
import time
|
|
|
|
time.sleep(0.1)
|
2024-02-29 17:24:33 -06:00
|
|
|
cities_by_state[line['STNAME']].append({
|
|
|
|
"name": " ".join(line['NAME'].split(" ")[:-1]), # Remove "city" or "town" from the end
|
|
|
|
"pop": int(line['POPESTIMATE2022']),
|
2024-02-29 23:19:08 -06:00
|
|
|
"location": loc,
|
2024-02-29 17:24:33 -06:00
|
|
|
})
|
2024-02-29 23:19:08 -06:00
|
|
|
print("done")
|
2024-02-29 17:24:33 -06:00
|
|
|
|
2024-02-29 23:19:08 -06:00
|
|
|
print("Writing data to disk…", flush=True, end="")
|
2024-02-29 17:24:33 -06:00
|
|
|
for state, cities in cities_by_state.items():
|
|
|
|
cities.sort(key=lambda i: i["pop"], reverse=True)
|
|
|
|
|
|
|
|
with open(f"data/{state}.json", 'w') as f:
|
|
|
|
f.write(json.dumps(cities))
|
|
|
|
|
|
|
|
with open(f"data/states.json", 'w') as f:
|
|
|
|
f.write(json.dumps(list(cities_by_state.keys())))
|
2024-02-29 23:19:08 -06:00
|
|
|
print("done")
|
2024-02-29 17:24:33 -06:00
|
|
|
|
|
|
|
# ----- MAP -----
|
2024-02-29 23:19:08 -06:00
|
|
|
print("Fetching state outlines…", flush=True, end="")
|
2024-02-29 17:24:33 -06:00
|
|
|
CMD="""
|
|
|
|
curl --silent --remote-name https://www2.census.gov/geo/tiger/GENZ2022/shp/cb_2022_us_state_20m.zip
|
|
|
|
unzip -q -o cb_2022_us_state_20m.zip
|
|
|
|
ogr2ogr -f GeoJSON data/states.geojson cb_2022_us_state_20m.shp
|
|
|
|
sed -i '/^"crs":/d' data/states.geojson
|
|
|
|
rm cb_2022_us_state_20m.*
|
|
|
|
"""
|
|
|
|
|
|
|
|
subprocess.run(CMD, shell=True)
|
2024-02-29 23:19:08 -06:00
|
|
|
print("done")
|