From 6ab9f5a058e04b2fb04b9f0c592b9dbe1704c00c Mon Sep 17 00:00:00 2001
From: akrherz <akrherz@iastate.edu>
Date: Fri, 13 Dec 2024 13:14:58 -0600
Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20Refactor=20R-Factor=20WIP=20for?=
 =?UTF-8?q?=20#303?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/ofetool/summarize_groupid.py |  37 +++++--
 src/pydep/io/wepp.py                 |  46 +--------
 src/pydep/rfactor.py                 | 144 +++++++++++++++++++++++++++
 tests/io/test_rfactor.py             |  17 ++++
 4 files changed, 191 insertions(+), 53 deletions(-)
 create mode 100644 src/pydep/rfactor.py
 create mode 100644 tests/io/test_rfactor.py

diff --git a/scripts/ofetool/summarize_groupid.py b/scripts/ofetool/summarize_groupid.py
index f1d8acb2..06d889d7 100644
--- a/scripts/ofetool/summarize_groupid.py
+++ b/scripts/ofetool/summarize_groupid.py
@@ -11,6 +11,7 @@
 """
 
 import os
+from typing import Optional
 
 import click
 import pandas as pd
@@ -22,9 +23,11 @@
 LOG = logger()
 
 
-def summarize(ofedf, groupid, is_local: bool) -> dict:
+def summarize(ofedf: pd.DataFrame, groupid, is_local: bool) -> dict:
     """Do the summarization."""
-    popdf = ofedf[ofedf["groupid"] == groupid]
+    popdf: pd.DataFrame = ofedf[ofedf["groupid"] == groupid]
+    rstats = popdf["runoff[mm/yr]"].describe()
+    lstats = popdf["ofe_loss[t/a/yr]"].describe()
     row0 = popdf.iloc[0]
     return {
         "groupid": groupid,
@@ -35,8 +38,20 @@ def summarize(ofedf, groupid, is_local: bool) -> dict:
         "tillage_code_2022": row0["tillage_code_2022"],
         "genlanduse": row0["genlanduse"],
         "isag": row0["isag"],
-        "runoff[mm/yr]": popdf["runoff[mm/yr]"].mean(),
-        "ofe_loss[t/a/yr]": popdf["ofe_loss[t/a/yr]"].mean(),
+        "runoff[mm/yr]": rstats["mean"],
+        "runoff[mm/yr]_min": rstats["min"],
+        "runoff[mm/yr]_max": rstats["max"],
+        "runoff[mm/yr]_std": rstats["std"],
+        "runoff[mm/yr]_25": rstats["25%"],
+        "runoff[mm/yr]_median": rstats["50%"],
+        "runoff[mm/yr]_75": rstats["75%"],
+        "ofe_loss[t/a/yr]": lstats["mean"],
+        "ofe_loss[t/a/yr]_min": lstats["min"],
+        "ofe_loss[t/a/yr]_max": lstats["max"],
+        "ofe_loss[t/a/yr]_std": lstats["std"],
+        "ofe_loss[t/a/yr]_25": lstats["25%"],
+        "ofe_loss[t/a/yr]_median": lstats["50%"],
+        "ofe_loss[t/a/yr]_75": lstats["75%"],
     }
 
 
@@ -52,8 +67,6 @@ def process_huc12_results(huc12, oferesults, scenario: int):
         results.append(summarize(local_oferesults, groupid, True))
     groupids = oferesults.groupby("groupid").size()
     for groupid in groupids.index:
-        if groupid in local_groupids:
-            continue
         if groupids[groupid] < 100:
             continue
         results.append(summarize(oferesults, groupid, False))
@@ -62,6 +75,7 @@ def process_huc12_results(huc12, oferesults, scenario: int):
     resultdf.to_csv(
         f"/i/{scenario}/ofe/{huc12[:8]}/{huc12[8:]}/ofetool_{huc12}.csv",
         index=False,
+        float_format="%.4f",
     )
 
 
@@ -97,7 +111,8 @@ def do_huc12(pgconn, huc12, scenario: int):
 
 @click.command()
 @click.option("--scenario", default=0, type=int, help="Scenario to process")
-def main(scenario: int):
+@click.option("--huc12", type=str, help="Run for a single HUC12")
+def main(scenario: int, huc12: Optional[str]):
     """Go Main Go."""
     with get_sqlalchemy_conn("idep") as pgconn:
         huc12df = pd.read_sql(
@@ -106,9 +121,11 @@ def main(scenario: int):
             params={"scenario": scenario},
         )
         progress = tqdm(huc12df["huc_12"].values)
-        for huc12 in progress:
-            progress.set_description(huc12)
-            do_huc12(pgconn, huc12, scenario)
+        for _huc12 in progress:
+            progress.set_description(_huc12)
+            if huc12 is not None and huc12 != _huc12:
+                continue
+            do_huc12(pgconn, _huc12, scenario)
 
 
 if __name__ == "__main__":
diff --git a/src/pydep/io/wepp.py b/src/pydep/io/wepp.py
index 4241dd06..1e3fc695 100644
--- a/src/pydep/io/wepp.py
+++ b/src/pydep/io/wepp.py
@@ -5,7 +5,8 @@
 
 import numpy as np
 import pandas as pd
-from scipy.interpolate import interp1d
+
+from pydep.rfactor import calc_rfactor
 
 YLD_CROPTYPE = re.compile(r"Crop Type #\s+(?P<num>\d+)\s+is (?P<name>[^\s]+)")
 YLD_DATA = re.compile(
@@ -24,47 +25,6 @@ def _date_from_year_jday(df):
     )
 
 
-def _rfactor(times, points, return_rfactor_metric=True):
-    """Compute the R-factor.
-
-    https://www.hydrol-earth-syst-sci.net/19/4113/2015/hess-19-4113-2015.pdf
-    It would appear that a strict implementation would need to have a six
-    hour dry period around events and require more then 12mm of precipitation.
-
-    Args:
-      times (list): List of decimal time values for a date.
-      points (list): list of accumulated precip values (mm).
-      return_rfactor_metric (bool, optional): Should this return a metric
-        (default) or english unit R value.
-
-    Returns:
-      rfactor (float): Units of MJ mm ha-1 h-1
-    """
-    # No precip!
-    if not times:
-        return 0
-    # interpolate dataset into 30 minute bins
-    func = interp1d(
-        times,
-        points,
-        kind="linear",
-        fill_value=(0, points[-1]),
-        bounds_error=False,
-    )
-    accum = func(np.arange(0, 24.01, 0.5))
-    rate_mmhr = (accum[1:] - accum[0:-1]) * 2.0
-    # sum of E x I
-    # I is the 30 minute peak intensity (mm h-1), capped at 3 in/hr
-    Imax = min([3.0 * 25.4, np.max(rate_mmhr)])
-    # E is sum of e_r (MJ ha-1 mm-1) * p_r (mm)
-    e_r = 0.29 * (1.0 - 0.72 * np.exp(-0.082 * rate_mmhr))
-    # rate * times
-    p_r = rate_mmhr / 2.0
-    # MJ ha-1 * mm h-1  or MJ inch a-1 h-1
-    unitconv = 1.0 if return_rfactor_metric else (1.0 / 25.4 / 2.47105)
-    return np.sum(e_r * p_r) * Imax * unitconv
-
-
 def intensity(times, points, compute_intensity_over) -> dict:
     """Returns dict of computed intensities over the given minute intervals."""
     entry = {}
@@ -155,7 +115,7 @@ def read_cli(
             "rfactor": (
                 np.nan
                 if not compute_rfactor
-                else _rfactor(
+                else calc_rfactor(
                     times,
                     points,
                     return_rfactor_metric=return_rfactor_metric,
diff --git a/src/pydep/rfactor.py b/src/pydep/rfactor.py
new file mode 100644
index 00000000..0c32812d
--- /dev/null
+++ b/src/pydep/rfactor.py
@@ -0,0 +1,144 @@
+"""pydep's R-factor calculations."""
+
+import numpy as np
+import pandas as pd
+from scipy.interpolate import interp1d
+
+
+def calc_rfactor(times, points, return_rfactor_metric=True):
+    """Compute the R-factor.
+
+    https://www.hydrol-earth-syst-sci.net/19/4113/2015/hess-19-4113-2015.pdf
+    It would appear that a strict implementation would need to have a six
+    hour dry period around events and require more then 12mm of precipitation.
+
+    Args:
+      times (list): List of decimal time values for a date.
+      points (list): list of accumulated precip values (mm).
+      return_rfactor_metric (bool, optional): Should this return a metric
+        (default) or english unit R value.
+
+    Returns:
+      rfactor (float): Units of MJ mm ha-1 h-1
+    """
+    # No precip!
+    if not times:
+        return 0
+    # interpolate dataset into 30 minute bins
+    func = interp1d(
+        times,
+        points,
+        kind="linear",
+        fill_value=(0, points[-1]),
+        bounds_error=False,
+    )
+    accum = func(np.arange(0, 24.01, 0.5))
+    rate_mmhr = (accum[1:] - accum[0:-1]) * 2.0
+    # sum of E x I
+    # I is the 30 minute peak intensity (mm h-1), capped at 3 in/hr
+    Imax = min([3.0 * 25.4, np.max(rate_mmhr)])
+    # E is sum of e_r (MJ ha-1 mm-1) * p_r (mm)
+    e_r = 0.29 * (1.0 - 0.72 * np.exp(-0.082 * rate_mmhr))
+    # rate * times
+    p_r = rate_mmhr / 2.0
+    # MJ ha-1 * mm h-1  or MJ inch a-1 h-1
+    unitconv = 1.0 if return_rfactor_metric else (1.0 / 25.4 / 2.47105)
+    return np.sum(e_r * p_r) * Imax * unitconv
+
+
+def _compute_accum(lines):
+    """Return rectified accum values."""
+    times = []
+    points = []
+    for line in lines:
+        (ts, accum) = line.strip().split()
+        times.append(float(ts))
+        points.append(float(accum))
+    return interp1d(
+        times,
+        points,
+        kind="linear",
+        fill_value=(0, points[-1]),
+        bounds_error=False,
+    )(np.arange(0.5, 24.01, 0.5))
+
+
+def _rfactor_window(accum: np.ndarray) -> list:
+    """Compute R-factors for the last day in the 10 day window."""
+    storm_start_stop = []
+    storm_start = -1
+    x = 12
+    while x < accum.shape[0]:
+        is_dry = (accum[x] - accum[x - 12]) < 1.27
+        if not is_dry and storm_start == -1:
+            storm_start = x
+            x += 12
+            continue
+        if is_dry and storm_start != -1:
+            storm_start_stop.append((storm_start, x))
+            storm_start = -1
+        x += 1
+    rfactors = []
+    for start, stop in storm_start_stop:
+        # For a storm to be considered, it must stop within the tenth day
+        if stop < (9 * 24 * 2):
+            continue
+        event = accum[start:stop] - accum[start]
+        rate_mmhr = (event[1:] - event[0:-1]) * 2.0
+        # sum of E x I
+        # I is the 30 minute peak intensity (mm h-1), capped at 3 in/hr
+        Imax = min([3.0 * 25.4, np.max(rate_mmhr)])
+        # E is sum of e_r (MJ ha-1 mm-1) * p_r (mm)
+        e_r = 0.29 * (1.0 - 0.72 * np.exp(-0.082 * rate_mmhr))
+        # rate * times
+        p_r = rate_mmhr / 2.0
+        # MJ ha-1 * mm h-1  or MJ inch a-1 h-1
+        rfactors.append(np.sum(e_r * p_r) * Imax)
+    return rfactors
+
+
+def compute_rfactor_from_cli(
+    filename: str,
+) -> pd.DataFrame:
+    """Compute **yearly** R-factor values from a WEPP breakpoint file.
+
+    Args:
+      filename (str): Filename to read
+
+    Returns:
+        pandas.DataFrame with yearly values
+    """
+    with open(filename, encoding="ascii") as fh:
+        lines = fh.readlines()
+    (*_unused, year1, years_simuluated) = lines[4].strip().split()
+    lnum = 15
+    resultdf = None
+    resultdf = pd.DataFrame(
+        {"rfactor": 0.0, "storm_count": 0},
+        index=pd.Index(
+            range(int(year1), int(year1) + int(years_simuluated)), name="year"
+        ),
+    )
+    # Life Choice, store 10 days of 30 minute accum
+    window_accum = np.zeros(10 * 24 * 2)
+    while lnum < len(lines):
+        (_da, _mo, year, breakpoints, *_bah) = lines[lnum].strip().split()
+        breakpoints = int(breakpoints)
+        if breakpoints == 0:
+            accum = np.zeros(24 * 2)
+        else:
+            accum = _compute_accum(lines[lnum + 1 : lnum + 1 + breakpoints])
+        # Chop off the oldest day and add the new day
+        window_accum = np.concatenate(
+            (
+                window_accum[48:] - window_accum[48],
+                accum + window_accum[-1] - window_accum[48],
+            )
+        )
+        rfactors = _rfactor_window(window_accum)
+        if rfactors:
+            resultdf.at[int(year), "rfactor"] += np.sum(rfactors)
+            resultdf.at[int(year), "storm_count"] += len(rfactors)
+        lnum += breakpoints + 1
+
+    return resultdf
diff --git a/tests/io/test_rfactor.py b/tests/io/test_rfactor.py
new file mode 100644
index 00000000..e75ce2c0
--- /dev/null
+++ b/tests/io/test_rfactor.py
@@ -0,0 +1,17 @@
+"""Test things related to R-factor calculations."""
+
+import os
+
+from pydep.rfactor import compute_rfactor_from_cli
+
+
+def get_path(name):
+    """helper"""
+    basedir = os.path.dirname(__file__)
+    return os.path.join(basedir, "..", "data", name)
+
+
+def test_rfactor():
+    """Walk before we run."""
+    resultdf = compute_rfactor_from_cli(get_path("cli.txt"))
+    assert resultdf.at[2007, "storm_count"] == 76