Skip to content

Commit

Permalink
Merge pull request dailyerosion#114 from akrherz/dev220126
Browse files Browse the repository at this point in the history
Omnibus
  • Loading branch information
akrherz authored Feb 1, 2022
2 parents 93399f3 + 3531c90 commit 52e72a5
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 31 deletions.
5 changes: 2 additions & 3 deletions scripts/RT/proctor_sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import subprocess
from multiprocessing import Pool

from pyiem.util import get_dbconn, logger
from pyiem.util import get_dbconnstr, logger
import pandas as pd
import requests
from tqdm import tqdm
Expand Down Expand Up @@ -135,7 +135,6 @@ def main(argv):
"""Go Main Go."""
parser = usage()
args = parser.parse_args(argv[1:])
pgconn = get_dbconn("idep")
df = read_sql(
"""
SELECT huc_12, fpath, scenario,
Expand All @@ -144,7 +143,7 @@ def main(argv):
from flowpaths where scenario = %s
and huc_12 in %s
""",
pgconn,
get_dbconnstr("idep"),
params=(args.scenario, tuple(HUC12S)),
index_col=None,
)
Expand Down
16 changes: 6 additions & 10 deletions scripts/cligen/r_factor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""R factor work."""

from pyiem.dep import read_cli
from pyiem.util import get_dbconn
from pyiem.util import get_dbconnstr
from pyiem.plot.use_agg import plt
from pyiem.plot import MapPlot
import cartopy.crs as ccrs
Expand All @@ -11,19 +11,16 @@
from tqdm import tqdm
from geopandas import read_postgis
import pandas as pd
from pandas.io.sql import read_sql
from pandas import read_sql


def plot():
"""Plot."""
df2 = pd.read_csv("/tmp/data.csv", dtype={"huc12": str}).set_index("huc12")
pgconn = get_dbconn("idep")
df = read_postgis(
"""
SELECT huc_12, ST_Transform(simple_geom, 4326) as geom
from huc12 WHERE scenario = 0
""",
pgconn,
"SELECT huc_12, ST_Transform(simple_geom, 4326) as geom from huc12 "
"WHERE scenario = 0",
get_dbconnstr("idep"),
geom_col="geom",
index_col="huc_12",
)
Expand Down Expand Up @@ -71,13 +68,12 @@ def plot():

def dump_data():
"""Go main Go."""
pgconn = get_dbconn("idep")
df = read_sql(
"""
SELECT huc_12, max(climate_file) as cli from flowpaths where
scenario = 0 GROUP by huc_12
""",
pgconn,
get_dbconnstr("idep"),
index_col="huc_12",
)
data = {
Expand Down
7 changes: 3 additions & 4 deletions scripts/import/check_huc12_zero_flowpaths.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
"""Report which HUC12s have 0 flowpaths."""
import sys

from pyiem.util import get_dbconn
from pandas.io.sql import read_sql
from pyiem.util import get_dbconnstr
from pandas import read_sql


def main(argv):
"""Go Main Go."""
scenario = int(argv[1])
huc12s = [s.strip() for s in open("myhucs.txt", encoding="utf8")]
pgconn = get_dbconn("idep")
df = read_sql(
"SELECT huc_12, count(*) from flowpaths where scenario = %s "
"GROUP by huc_12",
pgconn,
get_dbconnstr("idep"),
params=(scenario,),
index_col="huc_12",
)
Expand Down
12 changes: 6 additions & 6 deletions scripts/import/flowpath2prj.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
from math import atan2, degrees, pi

from tqdm import tqdm
from pandas.io.sql import read_sql
from pyiem.util import get_dbconn, logger
from pandas import read_sql
from pyiem.util import get_dbconn, get_dbconnstr, logger
from pyiem.dep import load_scenarios

LOG = logger()
Expand Down Expand Up @@ -356,7 +356,7 @@ def rewrite_flowpath(cursor, scenario, flowpath_id, df):
)


def do_flowpath(pgconn, cursor, scenario, zone, metadata):
def do_flowpath(cursor, scenario, zone, metadata):
"""Process a given flowpathid"""
# slope = compute_slope(fid)
# I need bad soilfiles so that the length can be computed
Expand All @@ -371,7 +371,7 @@ def do_flowpath(pgconn, cursor, scenario, zone, metadata):
WHERE flowpath = %s and length < 9999
ORDER by segid ASC
""",
pgconn,
get_dbconnstr("idep"),
params=(metadata["fid"],),
)
origsize = len(df.index)
Expand Down Expand Up @@ -577,7 +577,7 @@ def main(argv):
"SELECT ST_ymax(ST_Transform(geom, 4326)) as lat, fpath, fid, huc_12, "
"climate_file from flowpaths WHERE scenario = %s and fpath != 0 "
"ORDER by huc_12 ASC",
pgconn,
get_dbconnstr("idep"),
params=(get_flowpath_scenario(scenario),),
)
if os.path.isfile("myhucs.txt"):
Expand All @@ -596,7 +596,7 @@ def main(argv):
zone = "IA_CENTRAL"
elif row["lat"] >= 40.5:
zone = "IA_SOUTH"
data = do_flowpath(pgconn, cursor, scenario, zone, row)
data = do_flowpath(cursor, scenario, zone, row)
if data is not None:
write_prj(data)
cursor.close()
Expand Down
43 changes: 39 additions & 4 deletions scripts/import/flowpath_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
PREFIX = "fp"
TRUNC_GRIDORDER_AT = 4
GENLU_CODES = {}
PROCESSING_COUNTS = {
"flowpaths_deduped": 0,
}


def get_flowpath(cursor, scenario, huc12, fpath):
Expand Down Expand Up @@ -111,6 +114,24 @@ def get_genlu_code(cursor, label):
return GENLU_CODES[label]


def dedupe(df, lencolname):
"""Deduplicate by checking the FBndID."""
# Optmization, a 1 field value count is likely the dup we want to dump
fields = df["FBndID"].value_counts().sort_values(ascending=False)
# Find any fields with a count of 1
fields2 = fields[fields == 1]
if not fields2.empty and len(fields.index) == 2:
PROCESSING_COUNTS["flowpaths_deduped"] += 1
return df[df["FBndID"] != fields2.index[0]]
# Could have a perfect duplicate?
if fields.min() == fields.max():
PROCESSING_COUNTS["flowpaths_deduped"] += 1
return df[df["FBndID"] != fields.index[0]]
# high field wins
PROCESSING_COUNTS["flowpaths_deduped"] += 1
return df[df["FBndID"] == fields.index[0]]


def process_flowpath(cursor, scenario, huc12, db_fid, df):
"""Do one flowpath please."""
lencolname = f"{PREFIX}Len{huc12}"
Expand All @@ -119,6 +140,11 @@ def process_flowpath(cursor, scenario, huc12, db_fid, df):
# Sort along the length column, which orders the points from top
# to bottom
df = df.sort_values(lencolname, ascending=True)
# remove duplicate points due to a bkgelder sampling issue whereby some
# points exist in two fields
if df[lencolname].duplicated().any():
df = dedupe(df, lencolname)

# Remove any previous data for this flowpath
cursor.execute(
"DELETE from flowpath_points WHERE flowpath = %s", (db_fid,)
Expand All @@ -142,7 +168,10 @@ def process_flowpath(cursor, scenario, huc12, db_fid, df):
elev_change += dy
dx = abs(row2[lencolname] - row[lencolname])
if dx == 0:
raise Exception(f"dx is zero at segid: {segid} {row} {row2}")
# We have a duplicate point, abort as should not be possible
print(f"ABORT duplicate point {segid} {row} {row2}")
print(df[["OBJECTID", "FBndID", lencolname]])
sys.exit()
x_change += dx
gridorder = row[gordcolname]
if gridorder > TRUNC_GRIDORDER_AT or pd.isnull(gridorder):
Expand Down Expand Up @@ -215,17 +244,19 @@ def process(cursor, scenario, huc12df):
"""
# Hack compute the huc12 by finding the fp field name
huc12 = None
fpcol = None
for col in huc12df.columns:
if col.startswith(PREFIX):
huc12 = col[len(PREFIX) :]
fpcol = col
huc12 = col[len(PREFIX) :].replace("_tif", "")
break
if huc12 is None:
if huc12 is None or len(huc12) != 12:
raise Exception(f"Could not find huc12 from {huc12df.columns}")

delete_previous(cursor, scenario, huc12)
# the inbound dataframe has lots of data, one row per flowpath point
# We group the dataframe by the column which uses a PREFIX and the huc8
for flowpath_num, df in huc12df.groupby(f"{PREFIX}{huc12}"):
for flowpath_num, df in huc12df.groupby(fpcol):
# These are upstream errors I should ignore
if flowpath_num == 0 or len(df.index) < 2:
continue
Expand Down Expand Up @@ -276,6 +307,10 @@ def main(argv):
cursor.close()
pgconn.commit()

print("Processing accounting:")
for key, val in PROCESSING_COUNTS.items():
print(f" {key}: {val}")


if __name__ == "__main__":
main(sys.argv)
7 changes: 3 additions & 4 deletions scripts/util/yearly_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import datetime

import matplotlib.pyplot as plt
from pandas.io.sql import read_sql
from pyiem.util import get_dbconn
from pandas import read_sql
from pyiem.util import get_dbconnstr
from pyiem.reference import state_names


Expand All @@ -13,7 +13,6 @@ def main(argv):
scenario = int(argv[1])
state = argv[2]
print(f"This report covers the inclusive years 2008-2021 for {state}")
pgconn = get_dbconn("idep")

df = read_sql(
"""
Expand All @@ -35,7 +34,7 @@ def main(argv):
round((avg(detachment) * 4.463)::numeric, 2) as detachment_ta
from agg GROUP by yr ORDER by yr
""",
pgconn,
get_dbconnstr("idep"),
params=(state, scenario),
index_col="yr",
)
Expand Down

0 comments on commit 52e72a5

Please sign in to comment.