From 6ab9f5a058e04b2fb04b9f0c592b9dbe1704c00c Mon Sep 17 00:00:00 2001 From: akrherz Date: Fri, 13 Dec 2024 13:14:58 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20Refactor=20R-Factor=20WIP=20for?= =?UTF-8?q?=20#303?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/ofetool/summarize_groupid.py | 37 +++++-- src/pydep/io/wepp.py | 46 +-------- src/pydep/rfactor.py | 144 +++++++++++++++++++++++++++ tests/io/test_rfactor.py | 17 ++++ 4 files changed, 191 insertions(+), 53 deletions(-) create mode 100644 src/pydep/rfactor.py create mode 100644 tests/io/test_rfactor.py diff --git a/scripts/ofetool/summarize_groupid.py b/scripts/ofetool/summarize_groupid.py index f1d8acb2..06d889d7 100644 --- a/scripts/ofetool/summarize_groupid.py +++ b/scripts/ofetool/summarize_groupid.py @@ -11,6 +11,7 @@ """ import os +from typing import Optional import click import pandas as pd @@ -22,9 +23,11 @@ LOG = logger() -def summarize(ofedf, groupid, is_local: bool) -> dict: +def summarize(ofedf: pd.DataFrame, groupid, is_local: bool) -> dict: """Do the summarization.""" - popdf = ofedf[ofedf["groupid"] == groupid] + popdf: pd.DataFrame = ofedf[ofedf["groupid"] == groupid] + rstats = popdf["runoff[mm/yr]"].describe() + lstats = popdf["ofe_loss[t/a/yr]"].describe() row0 = popdf.iloc[0] return { "groupid": groupid, @@ -35,8 +38,20 @@ def summarize(ofedf, groupid, is_local: bool) -> dict: "tillage_code_2022": row0["tillage_code_2022"], "genlanduse": row0["genlanduse"], "isag": row0["isag"], - "runoff[mm/yr]": popdf["runoff[mm/yr]"].mean(), - "ofe_loss[t/a/yr]": popdf["ofe_loss[t/a/yr]"].mean(), + "runoff[mm/yr]": rstats["mean"], + "runoff[mm/yr]_min": rstats["min"], + "runoff[mm/yr]_max": rstats["max"], + "runoff[mm/yr]_std": rstats["std"], + "runoff[mm/yr]_25": rstats["25%"], + "runoff[mm/yr]_median": rstats["50%"], + "runoff[mm/yr]_75": rstats["75%"], + "ofe_loss[t/a/yr]": lstats["mean"], + "ofe_loss[t/a/yr]_min": lstats["min"], + "ofe_loss[t/a/yr]_max": lstats["max"], + "ofe_loss[t/a/yr]_std": lstats["std"], + "ofe_loss[t/a/yr]_25": lstats["25%"], + "ofe_loss[t/a/yr]_median": lstats["50%"], + "ofe_loss[t/a/yr]_75": lstats["75%"], } @@ -52,8 +67,6 @@ def process_huc12_results(huc12, oferesults, scenario: int): results.append(summarize(local_oferesults, groupid, True)) groupids = oferesults.groupby("groupid").size() for groupid in groupids.index: - if groupid in local_groupids: - continue if groupids[groupid] < 100: continue results.append(summarize(oferesults, groupid, False)) @@ -62,6 +75,7 @@ def process_huc12_results(huc12, oferesults, scenario: int): resultdf.to_csv( f"/i/{scenario}/ofe/{huc12[:8]}/{huc12[8:]}/ofetool_{huc12}.csv", index=False, + float_format="%.4f", ) @@ -97,7 +111,8 @@ def do_huc12(pgconn, huc12, scenario: int): @click.command() @click.option("--scenario", default=0, type=int, help="Scenario to process") -def main(scenario: int): +@click.option("--huc12", type=str, help="Run for a single HUC12") +def main(scenario: int, huc12: Optional[str]): """Go Main Go.""" with get_sqlalchemy_conn("idep") as pgconn: huc12df = pd.read_sql( @@ -106,9 +121,11 @@ def main(scenario: int): params={"scenario": scenario}, ) progress = tqdm(huc12df["huc_12"].values) - for huc12 in progress: - progress.set_description(huc12) - do_huc12(pgconn, huc12, scenario) + for _huc12 in progress: + progress.set_description(_huc12) + if huc12 is not None and huc12 != _huc12: + continue + do_huc12(pgconn, _huc12, scenario) if __name__ == "__main__": diff --git a/src/pydep/io/wepp.py b/src/pydep/io/wepp.py index 4241dd06..1e3fc695 100644 --- a/src/pydep/io/wepp.py +++ b/src/pydep/io/wepp.py @@ -5,7 +5,8 @@ import numpy as np import pandas as pd -from scipy.interpolate import interp1d + +from pydep.rfactor import calc_rfactor YLD_CROPTYPE = re.compile(r"Crop Type #\s+(?P\d+)\s+is (?P[^\s]+)") YLD_DATA = re.compile( @@ -24,47 +25,6 @@ def _date_from_year_jday(df): ) -def _rfactor(times, points, return_rfactor_metric=True): - """Compute the R-factor. - - https://www.hydrol-earth-syst-sci.net/19/4113/2015/hess-19-4113-2015.pdf - It would appear that a strict implementation would need to have a six - hour dry period around events and require more then 12mm of precipitation. - - Args: - times (list): List of decimal time values for a date. - points (list): list of accumulated precip values (mm). - return_rfactor_metric (bool, optional): Should this return a metric - (default) or english unit R value. - - Returns: - rfactor (float): Units of MJ mm ha-1 h-1 - """ - # No precip! - if not times: - return 0 - # interpolate dataset into 30 minute bins - func = interp1d( - times, - points, - kind="linear", - fill_value=(0, points[-1]), - bounds_error=False, - ) - accum = func(np.arange(0, 24.01, 0.5)) - rate_mmhr = (accum[1:] - accum[0:-1]) * 2.0 - # sum of E x I - # I is the 30 minute peak intensity (mm h-1), capped at 3 in/hr - Imax = min([3.0 * 25.4, np.max(rate_mmhr)]) - # E is sum of e_r (MJ ha-1 mm-1) * p_r (mm) - e_r = 0.29 * (1.0 - 0.72 * np.exp(-0.082 * rate_mmhr)) - # rate * times - p_r = rate_mmhr / 2.0 - # MJ ha-1 * mm h-1 or MJ inch a-1 h-1 - unitconv = 1.0 if return_rfactor_metric else (1.0 / 25.4 / 2.47105) - return np.sum(e_r * p_r) * Imax * unitconv - - def intensity(times, points, compute_intensity_over) -> dict: """Returns dict of computed intensities over the given minute intervals.""" entry = {} @@ -155,7 +115,7 @@ def read_cli( "rfactor": ( np.nan if not compute_rfactor - else _rfactor( + else calc_rfactor( times, points, return_rfactor_metric=return_rfactor_metric, diff --git a/src/pydep/rfactor.py b/src/pydep/rfactor.py new file mode 100644 index 00000000..0c32812d --- /dev/null +++ b/src/pydep/rfactor.py @@ -0,0 +1,144 @@ +"""pydep's R-factor calculations.""" + +import numpy as np +import pandas as pd +from scipy.interpolate import interp1d + + +def calc_rfactor(times, points, return_rfactor_metric=True): + """Compute the R-factor. + + https://www.hydrol-earth-syst-sci.net/19/4113/2015/hess-19-4113-2015.pdf + It would appear that a strict implementation would need to have a six + hour dry period around events and require more then 12mm of precipitation. + + Args: + times (list): List of decimal time values for a date. + points (list): list of accumulated precip values (mm). + return_rfactor_metric (bool, optional): Should this return a metric + (default) or english unit R value. + + Returns: + rfactor (float): Units of MJ mm ha-1 h-1 + """ + # No precip! + if not times: + return 0 + # interpolate dataset into 30 minute bins + func = interp1d( + times, + points, + kind="linear", + fill_value=(0, points[-1]), + bounds_error=False, + ) + accum = func(np.arange(0, 24.01, 0.5)) + rate_mmhr = (accum[1:] - accum[0:-1]) * 2.0 + # sum of E x I + # I is the 30 minute peak intensity (mm h-1), capped at 3 in/hr + Imax = min([3.0 * 25.4, np.max(rate_mmhr)]) + # E is sum of e_r (MJ ha-1 mm-1) * p_r (mm) + e_r = 0.29 * (1.0 - 0.72 * np.exp(-0.082 * rate_mmhr)) + # rate * times + p_r = rate_mmhr / 2.0 + # MJ ha-1 * mm h-1 or MJ inch a-1 h-1 + unitconv = 1.0 if return_rfactor_metric else (1.0 / 25.4 / 2.47105) + return np.sum(e_r * p_r) * Imax * unitconv + + +def _compute_accum(lines): + """Return rectified accum values.""" + times = [] + points = [] + for line in lines: + (ts, accum) = line.strip().split() + times.append(float(ts)) + points.append(float(accum)) + return interp1d( + times, + points, + kind="linear", + fill_value=(0, points[-1]), + bounds_error=False, + )(np.arange(0.5, 24.01, 0.5)) + + +def _rfactor_window(accum: np.ndarray) -> list: + """Compute R-factors for the last day in the 10 day window.""" + storm_start_stop = [] + storm_start = -1 + x = 12 + while x < accum.shape[0]: + is_dry = (accum[x] - accum[x - 12]) < 1.27 + if not is_dry and storm_start == -1: + storm_start = x + x += 12 + continue + if is_dry and storm_start != -1: + storm_start_stop.append((storm_start, x)) + storm_start = -1 + x += 1 + rfactors = [] + for start, stop in storm_start_stop: + # For a storm to be considered, it must stop within the tenth day + if stop < (9 * 24 * 2): + continue + event = accum[start:stop] - accum[start] + rate_mmhr = (event[1:] - event[0:-1]) * 2.0 + # sum of E x I + # I is the 30 minute peak intensity (mm h-1), capped at 3 in/hr + Imax = min([3.0 * 25.4, np.max(rate_mmhr)]) + # E is sum of e_r (MJ ha-1 mm-1) * p_r (mm) + e_r = 0.29 * (1.0 - 0.72 * np.exp(-0.082 * rate_mmhr)) + # rate * times + p_r = rate_mmhr / 2.0 + # MJ ha-1 * mm h-1 or MJ inch a-1 h-1 + rfactors.append(np.sum(e_r * p_r) * Imax) + return rfactors + + +def compute_rfactor_from_cli( + filename: str, +) -> pd.DataFrame: + """Compute **yearly** R-factor values from a WEPP breakpoint file. + + Args: + filename (str): Filename to read + + Returns: + pandas.DataFrame with yearly values + """ + with open(filename, encoding="ascii") as fh: + lines = fh.readlines() + (*_unused, year1, years_simuluated) = lines[4].strip().split() + lnum = 15 + resultdf = None + resultdf = pd.DataFrame( + {"rfactor": 0.0, "storm_count": 0}, + index=pd.Index( + range(int(year1), int(year1) + int(years_simuluated)), name="year" + ), + ) + # Life Choice, store 10 days of 30 minute accum + window_accum = np.zeros(10 * 24 * 2) + while lnum < len(lines): + (_da, _mo, year, breakpoints, *_bah) = lines[lnum].strip().split() + breakpoints = int(breakpoints) + if breakpoints == 0: + accum = np.zeros(24 * 2) + else: + accum = _compute_accum(lines[lnum + 1 : lnum + 1 + breakpoints]) + # Chop off the oldest day and add the new day + window_accum = np.concatenate( + ( + window_accum[48:] - window_accum[48], + accum + window_accum[-1] - window_accum[48], + ) + ) + rfactors = _rfactor_window(window_accum) + if rfactors: + resultdf.at[int(year), "rfactor"] += np.sum(rfactors) + resultdf.at[int(year), "storm_count"] += len(rfactors) + lnum += breakpoints + 1 + + return resultdf diff --git a/tests/io/test_rfactor.py b/tests/io/test_rfactor.py new file mode 100644 index 00000000..e75ce2c0 --- /dev/null +++ b/tests/io/test_rfactor.py @@ -0,0 +1,17 @@ +"""Test things related to R-factor calculations.""" + +import os + +from pydep.rfactor import compute_rfactor_from_cli + + +def get_path(name): + """helper""" + basedir = os.path.dirname(__file__) + return os.path.join(basedir, "..", "data", name) + + +def test_rfactor(): + """Walk before we run.""" + resultdf = compute_rfactor_from_cli(get_path("cli.txt")) + assert resultdf.at[2007, "storm_count"] == 76