Merge pull request dailyerosion#302 from akrherz/241121

Omnibus
akrherz · Dec 12, 2024 · 62d58b4 · 62d58b4
2 parents 8fc9db2 + d944663
commit 62d58b4
Show file tree

Hide file tree

Showing 4 changed files with 130 additions and 9 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.8.2"
+    rev: "v0.8.3"
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]

diff --git a/scripts/import/flowpath_importer.py b/scripts/import/flowpath_importer.py
@@ -446,7 +446,7 @@ def process_fields(cursor, scenario, huc12, fld_df):
                 huc12,
                 fbndid,
                 row["Acres"],
-                bool(row["isAG"]),
+                row["isAG"],
                 row["geometry"].wkt,
                 row["management"],
                 row["landuse"],

diff --git a/scripts/ofetool/summarize_groupid.py b/scripts/ofetool/summarize_groupid.py
@@ -0,0 +1,115 @@
+"""Summarize OFE results to support OFETool.
+
+Via one-time processing, each HUC12 in DEP has a oferesults CSV file.  We want
+to summarize and supplement these files to support the OFETool.  We will
+use a methodology of:
+
+1. If we have 10 samples locally, we are golden.
+2. We will look spatially 100km around the HUC12 centroid and summarize any
+   other groupids with at least 100 samples.
+
+"""
+
+import os
+
+import click
+import pandas as pd
+from pyiem.database import get_sqlalchemy_conn
+from pyiem.util import logger
+from sqlalchemy import text
+from tqdm import tqdm
+
+LOG = logger()
+
+
+def summarize(ofedf, groupid, is_local: bool) -> dict:
+    """Do the summarization."""
+    popdf = ofedf[ofedf["groupid"] == groupid]
+    row0 = popdf.iloc[0]
+    return {
+        "groupid": groupid,
+        "is_local": is_local,
+        "n": len(popdf),
+        "slope_reclass": row0["slope_reclass"],
+        "kw_reclass": row0["kw_reclass"],
+        "tillage_code_2022": row0["tillage_code_2022"],
+        "genlanduse": row0["genlanduse"],
+        "isag": row0["isag"],
+        "runoff[mm/yr]": popdf["runoff[mm/yr]"].mean(),
+        "ofe_loss[t/a/yr]": popdf["ofe_loss[t/a/yr]"].mean(),
+    }
+
+
+def process_huc12_results(huc12, oferesults, scenario: int):
+    """Do the processing of a single HUC12."""
+    results = []
+    local_oferesults = oferesults[oferesults["huc12"] == huc12]
+    # Figure out which local groupids we have 10 samples for
+    local_groupids = local_oferesults.groupby("groupid").size()
+    local_groupids = local_groupids[local_groupids >= 10].index.values
+    LOG.info("Found %s local groupids with 10 samples", len(local_groupids))
+    for groupid in local_groupids:
+        results.append(summarize(local_oferesults, groupid, True))
+    groupids = oferesults.groupby("groupid").size()
+    for groupid in groupids.index:
+        if groupid in local_groupids:
+            continue
+        if groupids[groupid] < 100:
+            continue
+        results.append(summarize(oferesults, groupid, False))
+
+    resultdf = pd.DataFrame(results)
+    resultdf.to_csv(
+        f"/i/{scenario}/ofe/{huc12[:8]}/{huc12[8:]}/ofetool_{huc12}.csv",
+        index=False,
+    )
+
+
+def do_huc12(pgconn, huc12, scenario: int):
+    """Process a single HUC12."""
+    domaindf = pd.read_sql(
+        text("""
+    select huc_12 from huc12 where scenario = :scenario and st_distance(geom,
+        (select st_centroid(geom) from huc12 where huc_12 = :huc12 and
+         scenario = :scenario)) < 100_000
+             """),
+        pgconn,
+        params={"huc12": huc12, "scenario": scenario},
+    )
+    LOG.info("Found %s neighboring HUC12s for %s", len(domaindf), huc12)
+
+    files = [
+        f"/i/{scenario}/ofe/{huc[:8]}/{huc[8:]}/oferesults_{huc}.csv"
+        for huc in domaindf["huc_12"]
+    ]
+    oferesults = pd.concat(
+        (
+            pd.read_csv(fn, dtype={"huc12": str, "ofe": int})
+            for fn in files
+            if os.path.isfile(fn)
+        ),
+        ignore_index=True,
+    )
+    # Only consider top of flowpath results
+    oferesults = oferesults[oferesults["ofe"] == 1]
+    process_huc12_results(huc12, oferesults, scenario)
+
+
+@click.command()
+@click.option("--scenario", default=0, type=int, help="Scenario to process")
+def main(scenario: int):
+    """Go Main Go."""
+    with get_sqlalchemy_conn("idep") as pgconn:
+        huc12df = pd.read_sql(
+            text("select huc_12 from huc12 where scenario = :scenario"),
+            pgconn,
+            params={"scenario": scenario},
+        )
+        progress = tqdm(huc12df["huc_12"].values)
+        for huc12 in progress:
+            progress.set_description(huc12)
+            do_huc12(pgconn, huc12, scenario)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/util/dump_ofe_results.py b/scripts/util/dump_ofe_results.py
@@ -1,13 +1,13 @@
 """Summarize the OFE files"""
 
-import datetime
 import glob
 import os
 import sys
+from datetime import datetime
 
 import click
 import pandas as pd
-from pyiem.util import get_dbconn, get_sqlalchemy_conn
+from pyiem.database import get_dbconn, get_sqlalchemy_conn
 from sqlalchemy import text
 from tqdm import tqdm
 
@@ -22,8 +22,8 @@ def fpmagic(cursor, scenario, envfn, rows, huc12, fpath, mlrarsym):
     df = read_env(envfn)
     # Only want 2017 through 2022
     df = df[
-        (df["date"] < datetime.datetime(2023, 1, 1))
-        & (df["date"] >= datetime.datetime(2017, 1, 1))
+        (df["date"] < datetime(2023, 1, 1))
+        & (df["date"] >= datetime(2017, 1, 1))
     ]
     cursor.execute(
         "SELECT real_length, bulk_slope, max_slope from flowpaths "
@@ -96,8 +96,8 @@ def do_huc12(cursor, scenario, huc12):
         )
         # Just 2017-2022
         ofedf = ofedf[
-            (ofedf["date"] < datetime.datetime(2023, 1, 1))
-            & (ofedf["date"] >= datetime.datetime(2017, 1, 1))
+            (ofedf["date"] < datetime(2023, 1, 1))
+            & (ofedf["date"] >= datetime(2017, 1, 1))
         ]
         # Figure out the crop string
         with get_sqlalchemy_conn("idep") as conn:
@@ -109,7 +109,7 @@ def do_huc12(cursor, scenario, huc12):
                     landuse, management,
                     mukey as surgo,
                     kwfact, hydrogroup, fbndid,
-                    o.real_length as length
+                    o.real_length as length, groupid
                     from flowpath_ofes o JOIN flowpaths f on
                     (o.flowpath = f.fid)
                     JOIN gssurgo g on (o.gssurgo_id = g.id)
@@ -144,8 +144,12 @@ def do_huc12(cursor, scenario, huc12):
             thisdelivery = (
                 myofe["sedleave"].sum() / YEARS / accum_length * 4.463
             )
+            groupid: str = meta_ofe["groupid"].values[0]
             res = {
                 "id": f"{os.path.basename(ofefn)[:-4]}_{ofe}",
+                "groupid": groupid,
+                "slope_reclass": groupid.split("_")[0],
+                "kw_reclass": groupid.split("_")[1],
                 "huc12": huc12,
                 "mlrarsym": huc12df.at[huc12, "mlrarsym"],
                 "fpath": fpath,
@@ -186,12 +190,14 @@ def do_huc12(cursor, scenario, huc12):
     df.to_csv(
         f"/i/{scenario}/ofe/{huc12[:8]}/{huc12[8:]}/oferesults_{huc12}.csv",
         index=False,
+        float_format="%.4f",
     )
 
     df = pd.DataFrame(fprows)
     df.to_csv(
         f"/i/{scenario}/ofe/{huc12[:8]}/{huc12[8:]}/fpresults_{huc12}.csv",
         index=False,
+        float_format="%.4f",
     )