From 6e28990290e51e846e71594548bf4675a38cd8c1 Mon Sep 17 00:00:00 2001
From: Chandler Swift <chandler@chandlerswift.com>
Date: Mon, 11 Nov 2024 23:04:36 -0600
Subject: [PATCH] Add MN crop history by county layer

---
 .gitignore                              |   1 +
 layers/crop-history/extract_counties.py |  60 +++++++++++++
 layers/crop-history/get_data.sh         |  17 ++++
 layers/crop-history/index.js            | 111 ++++++++++++++++++++++++
 layers/index.js                         |   2 +
 5 files changed, 191 insertions(+)
 create mode 100644 layers/crop-history/extract_counties.py
 create mode 100755 layers/crop-history/get_data.sh
 create mode 100644 layers/crop-history/index.js

diff --git a/.gitignore b/.gitignore
index 72c4380..3d001ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@ dist
 layers/dot-cams/*/data/states.js
 layers/survey-markers/states.js
 layers/tjx/data/chains.js
+layers/crop-history/data/counties.js
 .direnv
diff --git a/layers/crop-history/extract_counties.py b/layers/crop-history/extract_counties.py
new file mode 100644
index 0000000..154093f
--- /dev/null
+++ b/layers/crop-history/extract_counties.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+
+import geopandas as gpd
+import os
+import fiona
+import sys
+import csv
+
+states_to_include = ["MN"]
+
+state_fipses_to_include = []
+county_lookup = {}
+print("Reading county census data...")
+with open('national_cousub2020.txt') as csvfile:
+    reader = csv.DictReader(csvfile, delimiter='|')
+    for row in reader:
+        if row['STATE'] in states_to_include:
+            state_fipses_to_include.append(row['STATEFP'])
+        county_lookup[row['STATEFP'] + row['COUNTYFP']] = row
+
+input_file = sys.argv[1]
+
+print("Reading input gdb...")
+gdf = gpd.read_file(input_file)
+gdf = gdf[gdf['STATEFIPS'].isin(state_fipses_to_include)]
+
+print("Reprojecting...")
+gdf = gdf.to_crs("EPSG:4326")
+
+print("Simplifying geometry...")
+gdf['geometry'] = gdf['geometry'].simplify(0.0001, preserve_topology=True)
+
+print("Calculating FULLFIPS...")
+gdf['FULLFIPS'] = gdf['STATEFIPS'].astype(str) + gdf['CNTYFIPS'].astype(str)
+
+print("Finding unique FULLFIPS...")
+counties = gdf['FULLFIPS'].unique()
+
+# TODO: Trim down which fields are included
+#
+# "CSBID", "CSBYEARS", "CSBACRES",
+# "CDL2016", "CDL2017", "CDL2018", "CDL2019", "CDL2020", "CDL2021", "CDL2022", "CDL2023",
+# "STATEFIPS", "STATEASD", "ASD", "CNTY", "CNTYFIPS",
+# "INSIDE_X", "INSIDE_Y", "Shape_Length", "Shape_Area", "FULLFIPS"
+
+for i, county in enumerate(counties, 1):
+    print(f"Processing county ({county}): {i}/{len(counties)}")
+    county_gdf = gdf[gdf['FULLFIPS'] == county]
+    output_file = f"{county}.geojson"
+    county_gdf.to_file(os.path.join("data", output_file), driver="GeoJSON", COORDINATE_PRECISION=5)
+
+with open('data/counties.js', 'w') as f:
+    for county in counties:
+        f.write(f"import county{county} from './{county}.geojson?url';\n")
+    f.write('\nexport default {\n')
+    for county in counties:
+        county_name = county_lookup[county]['COUNTYNAME']
+        state_name = county_lookup[county]['STATE']
+        f.write(f"    '{county_name}, {state_name}': county{county},\n")
+    f.write("};\n")
diff --git a/layers/crop-history/get_data.sh b/layers/crop-history/get_data.sh
new file mode 100755
index 0000000..429c1e2
--- /dev/null
+++ b/layers/crop-history/get_data.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env nix-shell
+#! nix-shell -i bash --pure
+#! nix-shell -p bash wget unzip python3 python3Packages.geopandas python3Packages.fiona python3Packages.pyproj
+# TODO: do I need all the python packages?
+
+set -x -euo pipefail
+
+wget -nc https://www.nass.usda.gov/Research_and_Science/Crop-Sequence-Boundaries/datasets/NationalCSB_2016-2023_rev23.zip
+wget -nc https://www2.census.gov/geo/docs/reference/codes2020/national_cousub2020.txt
+unzip -u NationalCSB_2016-2023_rev23.zip
+
+mkdir -p data
+
+# HEADS UP: this script takes something like 40GB of RAM. In theory, I could
+# probably do something clever with streaming...but I have 40 GB of RAM, so this
+# works!
+python -i extract_counties.py NationalCSB_2016-2023_rev23/CSB1623.gdb
diff --git a/layers/crop-history/index.js b/layers/crop-history/index.js
new file mode 100644
index 0000000..2c53a33
--- /dev/null
+++ b/layers/crop-history/index.js
@@ -0,0 +1,111 @@
+import VectorLayer from 'ol/layer/Vector';
+import {Vector as VectorSource} from 'ol/source.js';
+import GeoJSON from 'ol/format/GeoJSON.js';
+
+import {Style} from 'ol/style.js';
+
+import counties from './data/counties.js';
+
+// from https://www.nass.usda.gov/Research_and_Science/Crop-Sequence-Boundaries/metadata_Crop-Sequence-Boundaries-2023.htm
+const crops = {
+    "1": "Corn",
+    "2": "Cotton",
+    "3": "Rice",
+    "4": "Sorghum",
+    "5": "Soybeans",
+    "6": "Sunflower",
+    "10": "Peanuts",
+    "11": "Tobacco",
+    "12": "Sweet Corn",
+    "13": "Pop or Orn Corn",
+    "14": "Mint",
+    "21": "Barley",
+    "22": "Durum Wheat",
+    "23": "Spring Wheat",
+    "24": "Winter Wheat",
+    "25": "Other Small Grains",
+    "26": "Dbl Crop WinWht/Soybeans",
+    "27": "Rye",
+    "28": "Oats",
+    "29": "Millet",
+    "30": "Speltz",
+    "31": "Canola",
+    "32": "Flaxseed",
+    "33": "Safflower",
+    "34": "Rape Seed",
+    "35": "Mustard",
+    "36": "Alfalfa",
+    "37": "Other Hay/Non Alfalfa",
+    "38": "Camelina",
+    "39": "Buckwheat",
+    "41": "Sugarbeets",
+    "42": "Dry Beans",
+    "43": "Potatoes",
+    "44": "Other Crops",
+    "45": "Sugarcane",
+    "46": "Sweet Potatoes",
+    "47": "Misc Vegs & Fruits",
+    "48": "Watermelons",
+    "49": "Onions",
+    "50": "Cucumbers",
+    "51": "Chick Peas",
+    "52": "Lentils",
+    "53": "Peas",
+    "54": "Tomatoes",
+    "55": "Caneberries",
+    "56": "Hops",
+    "57": "Herbs",
+    "58": "Clover/Wildflowers",
+    "59": "Sod/Grass Seed",
+    "60": "Switchgrass",
+}
+
+const category = {
+    name: "County Crop History",
+    details: `<a href="https://www.nass.usda.gov/Research_and_Science/Crop-Sequence-Boundaries/index.php">https://www.nass.usda.gov/Research_and_Science/Crop-Sequence-Boundaries/index.php</a>`,
+    layers: [],
+};
+
+for (let [county, url] of Object.entries(counties)) {
+    const geojsonSource = new VectorSource({
+        url: url,
+        format: new GeoJSON,
+    });
+
+    geojsonSource.on('featuresloadend', function(event) {
+        event.features.forEach(feature => {
+            for (let year = 2016; year <= 2023; year++) {
+                const cropid = feature.get(`CDL${year}`);
+                
+                // Check if the value exists in the key, then replace it
+                if (cropid in crops) {
+                    feature.set(String(year), crops[cropid]);
+                } else {
+                    feature.set(String(year), cropid);
+                }
+                feature.unset(`CDL${year}`);
+            }
+        });
+    });
+    const vectorLayer = new VectorLayer({
+        source: geojsonSource,
+    });
+
+    category.layers.push({
+        name: county,
+        layer: vectorLayer,
+    });
+}
+
+category.layers.sort(function (a, b) {
+    const a_state = a.name.substr(a.length - 2);
+    const b_state = a.name.substr(b.length - 2);
+    // Sort by state...
+    if (a_state != b_state) {
+        return a_state > b_state ? 1 : -1;
+    }
+    // ...then by county
+    return a.name > b.name ? 1 : -1;
+});
+
+export default category;
diff --git a/layers/index.js b/layers/index.js
index 41f246a..2fd8c83 100644
--- a/layers/index.js
+++ b/layers/index.js
@@ -20,6 +20,7 @@ import dot_cams from './dot-cams/index.js';
 import survey_markers from './survey-markers/index.js';
 import tjx from './tjx/index.js';
 import minnesotaAdventureTrails from './minnesota-adventure-trails/index.js';
+import cropHistory from './crop-history/index.js';
 
 const layerCategories = [
     { // Base maps
@@ -103,6 +104,7 @@ const layerCategories = [
     cellular,
     light_pollution,
     tjx,
+    cropHistory,
 ];
 
 export default layerCategories;