From a82d2683ad2e961812500b37b5a928cde1c746cb Mon Sep 17 00:00:00 2001
From: marcadella <mayeul.marcadella@unibe.ch>
Date: Thu, 24 Oct 2024 14:04:41 +0200
Subject: [PATCH] Revert "Removed rsofun mentions"

This reverts commit 8b19960d4525eaaf63ca990b0fca0fb8a1882782.
---
 .gitignore                                    |   1 -
 analysis/00_batch_convert_LSM_data.R          |   1 +
 ...ers.R => 02_batch_format_rsofun_drivers.R} |   0
 analysis/03_screen_data.R                     |  77 -------
 analysis/03_screen_rsofun_data.R              | 202 ++++++++++++++++++
 .../{b_06_p-model_run.R => 06_p-model_run.R}  |   0
 analysis/README.md                            |   9 +-
 vignettes/03_data_generation.Rmd              |   9 +-
 8 files changed, 209 insertions(+), 90 deletions(-)
 rename analysis/{b_02_batch_format_rsofun_drivers.R => 02_batch_format_rsofun_drivers.R} (100%)
 delete mode 100644 analysis/03_screen_data.R
 create mode 100644 analysis/03_screen_rsofun_data.R
 rename analysis/{b_06_p-model_run.R => 06_p-model_run.R} (100%)

diff --git a/.gitignore b/.gitignore
index ea490a3..5113bc5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,3 @@ data-raw/cloud_cover/
 data-raw/modis/
 data/tmp/
 docs/
-.idea/
\ No newline at end of file
diff --git a/analysis/00_batch_convert_LSM_data.R b/analysis/00_batch_convert_LSM_data.R
index edb7e18..76504f0 100644
--- a/analysis/00_batch_convert_LSM_data.R
+++ b/analysis/00_batch_convert_LSM_data.R
@@ -8,6 +8,7 @@ library(FluxDataKit)
 library(FluxnetLSM)
 library(dplyr)
 library(ingestr)
+library(rsofun)
 
 input_path <- "/data_2/FluxDataKit/FDK_inputs/"
 output_path <- "/data_2/FluxDataKit/v3.4/"
diff --git a/analysis/b_02_batch_format_rsofun_drivers.R b/analysis/02_batch_format_rsofun_drivers.R
similarity index 100%
rename from analysis/b_02_batch_format_rsofun_drivers.R
rename to analysis/02_batch_format_rsofun_drivers.R
diff --git a/analysis/03_screen_data.R b/analysis/03_screen_data.R
deleted file mode 100644
index 648cfd1..0000000
--- a/analysis/03_screen_data.R
+++ /dev/null
@@ -1,77 +0,0 @@
-# Determine longest good-quality sequence by site
-library(tidyverse)
-library(FluxDataKit)
-
-path <- "/data_2/FluxDataKit/v3.4"
-
-sites <- FluxDataKit::fdk_site_info |>
-  filter(!(sitename %in% c("MX-Tes", "US-KS3")))
-
-# # site subset------------------
-# # xxx debug
-# # chose representative sites for LES book
-# use_sites <- c(
-#   # "FI-Hyy", # Boreal Forests/Taiga
-#   # "US-SRM", # Deserts & Xeric Shrublands
-#   # "FR-Pue", # Mediterranean Forests, Woodlands & Scrub
-#   # "DE-Hai", # Temperate Broadleaf & Mixed Forests
-#   "IT-Ro1"
-#   # "US-Tw1", # Temperate Grasslands, Savannas & Shrublands
-#   # "AU-How", # Tropical & Subtropical Grasslands, Savannas & Shrubland
-#   # "BR-Sa3", # Tropical
-#   # "ZM-Mon", # Tropical deciduous forest (xeric woodland)
-#   # "US-ICh"  # Tundra
-# )
-# sites <- sites |>
-#   filter(sitename %in% use_sites)
-# #----------------------------
-
-# determine longest sequence of good-quality data for each site
-list_seq <- lapply(sites$sitename, function(site){
-  message(sprintf("Analysing %s ----", site))
-
-  # get file name path
-  filn <- list.files(
-    file.path(path, "fluxnet"),
-    pattern = paste0("FLX_", site, ".*_FULLSET_DD.*.csv"),
-    recursive = TRUE
-  )
-
-  df <- read.csv(file.path(file.path(path, "fluxnet"), filn))
-
-  df_seq <- suppressMessages(
-    suppressWarnings(
-      try(fdk_get_sequence(
-        df,
-        site = site,
-        qc_threshold = 0.25,
-        leng_threshold = 90,
-        do_plot = TRUE,
-        out_path = file.path(path, "plots")
-      )
-      )
-    )
-  )
-
-  if(inherits(df_seq, "try-error")){
-    message("!!! plotting failed !!!")
-    return(NULL)
-  }
-
-  return(df_seq)
-})
-
-
-fdk_site_fullyearsequence <- bind_rows(list_seq)
-
-# write CSV file
-save(fdk_site_fullyearsequence,
-     file = here::here("data/fdk_site_fullyearsequence.rda"),
-     compress = "xz"
-     )
-
-# write CSV file for upload to Zenodo
-readr::write_csv(
-  fdk_site_fullyearsequence,
-  file = "/data_2/FluxDataKit/v3.4/fdk_site_fullyearsequence.csv"
-)
diff --git a/analysis/03_screen_rsofun_data.R b/analysis/03_screen_rsofun_data.R
new file mode 100644
index 0000000..8090598
--- /dev/null
+++ b/analysis/03_screen_rsofun_data.R
@@ -0,0 +1,202 @@
+# Determine longest good-quality sequence by site
+library(tidyverse)
+library(FluxDataKit)
+
+path <- "/data_2/FluxDataKit/v3.4"
+
+failed_sites <- readRDS(here::here("data/failed_sites.rds"))
+
+sites <- FluxDataKit::fdk_site_info |>
+  filter(!(sitename %in% c("MX-Tes", "US-KS3")))
+
+# # site subset------------------
+# # xxx debug
+# # chose representative sites for LES book
+# use_sites <- c(
+#   # "FI-Hyy", # Boreal Forests/Taiga
+#   # "US-SRM", # Deserts & Xeric Shrublands
+#   # "FR-Pue", # Mediterranean Forests, Woodlands & Scrub
+#   # "DE-Hai", # Temperate Broadleaf & Mixed Forests
+#   "IT-Ro1"
+#   # "US-Tw1", # Temperate Grasslands, Savannas & Shrublands
+#   # "AU-How", # Tropical & Subtropical Grasslands, Savannas & Shrubland
+#   # "BR-Sa3", # Tropical
+#   # "ZM-Mon", # Tropical deciduous forest (xeric woodland)
+#   # "US-ICh"  # Tundra
+# )
+# sites <- sites |>
+#   filter(sitename %in% use_sites)
+# #----------------------------
+
+# determine longest sequence of good-quality data for each site
+list_seq <- lapply(sites$sitename, function(site){
+  message(sprintf("Analysing %s ----", site))
+
+  # get file name path
+  filn <- list.files(
+    file.path(path, "fluxnet"),
+    pattern = paste0("FLX_", site, ".*_FULLSET_DD.*.csv"),
+    recursive = TRUE
+  )
+
+  df <- read.csv(file.path(file.path(path, "fluxnet"), filn))
+
+  df_seq <- suppressMessages(
+    suppressWarnings(
+      try(fdk_get_sequence(
+        df,
+        site = site,
+        qc_threshold = 0.25,
+        leng_threshold = 90,
+        do_plot = TRUE,
+        out_path = file.path(path, "plots")
+      )
+      )
+    )
+  )
+
+  if(inherits(df_seq, "try-error")){
+    message("!!! plotting failed !!!")
+    return(NULL)
+  }
+
+  return(df_seq)
+})
+
+
+fdk_site_fullyearsequence <- bind_rows(list_seq)
+
+# write CSV file
+save(fdk_site_fullyearsequence,
+     file = here::here("data/fdk_site_fullyearsequence.rda"),
+     compress = "xz"
+     )
+
+# write CSV file for upload to Zenodo
+readr::write_csv(
+  fdk_site_fullyearsequence,
+  file = "/data_2/FluxDataKit/v3.4/fdk_site_fullyearsequence.csv"
+)
+
+
+# df <- readRDS("~/data/FluxDataKit/v3/rsofun_driver_data_v3.rds")
+#
+# # screen for missing gpp data
+# screening <- df |>
+#   group_by(sitename) |>
+#   unnest(forcing) |>
+#   summarize(
+#     keep = ifelse(all(is.na(gpp)), FALSE, TRUE)
+#   )
+#
+# df <- left_join(df, screening)
+# df <- df |>
+#   filter(
+#     keep
+#   )
+#
+# # screen manually, drop funky sites
+#
+# screen <- readODS::read_ods("data/rsofun_sites.ods") |>
+#   filter(
+#     drop
+#   )
+#
+# df <- df |>
+#   filter(
+#     !(tolower(sitename) %in% tolower(screen$sitename))
+#   ) |>
+#   select(
+#     -keep
+#   )
+#
+# # screen manually, drop funky years
+# screen <- readODS::read_ods("data/rsofun_sites.ods") |>
+#   filter(
+#     is.na(drop)
+#   )
+#
+# data_fix_years <- df |>
+#   filter(tolower(sitename) %in% tolower(screen$sitename)) |>
+#   group_by(sitename) |>
+#   unnest(forcing) |>
+#   left_join(screen) |>
+#   mutate(
+#     year = as.numeric(format(date, "%Y"))
+#   ) |>
+#   filter(
+#     (year >= start & year <= end)
+#   ) |>
+#   select(
+#     -params_siml,
+#     -site_info,
+#     -year,
+#     -end,
+#     -start,
+#     -drop,
+#     -notes
+#   ) |>
+#   nest() |>
+#   rename(
+#     forcing = data
+#   ) |>
+#   ungroup()
+#
+# df1 <- df |>
+#   filter(tolower(sitename) %in% tolower(screen$sitename)) |>
+#   select(
+#     -forcing
+#   ) |>
+#   left_join(data_fix_years)
+#
+# df2 <- df |>
+#   filter(!(tolower(sitename) %in% tolower(screen$sitename)))
+#
+# data <- bind_rows(df1, df2)
+#
+# # save data
+# saveRDS(data, "data/rsofun_driver_data_clean.rds", compress = "xz")
+#
+# data |>
+#   group_by(sitename) |>
+#   do({
+#
+#     tmp <- .$forcing[[1]] |>
+#       tidyr::pivot_longer(
+#         col = !contains("date"),
+#         names_to = "measurement",
+#         values_to = "value"
+#       )
+#
+#     sitename <- .$sitename[1]
+#
+#     l <- seq(as.Date("1990/1/1"), as.Date("2023/1/1"), "years")
+#
+#     p <- ggplot(data = tmp) +
+#       geom_line(
+#         aes(
+#           date,
+#           value
+#         ),
+#         colour = "red"
+#       ) +
+#       geom_vline(xintercept = l) +
+#       labs(
+#         title = sitename
+#       ) +
+#       theme_bw() +
+#       theme(panel.grid.minor = element_line()
+#       ) +
+#       facet_grid(
+#         measurement ~ .,
+#         scales = "free"
+#         )
+#
+#     ggsave(
+#       paste0("manuscript/rsofun_input/",sitename,".png"),
+#       width = 12,
+#       height = 14,
+#       dpi = 175
+#     )
+#
+#   })
diff --git a/analysis/b_06_p-model_run.R b/analysis/06_p-model_run.R
similarity index 100%
rename from analysis/b_06_p-model_run.R
rename to analysis/06_p-model_run.R
diff --git a/analysis/README.md b/analysis/README.md
index d8ea94c..ce6dc5d 100644
--- a/analysis/README.md
+++ b/analysis/README.md
@@ -9,11 +9,8 @@ Scripts need to be run in chronological order
 - 00_batch_convert_LSM_data.R - converts FLUXNET data (standard CSV files) to LSM data (NetCDF)
  - the output is land surface model (LSM) compatible data in a netcdf format 
 - 01_visualize_fdk_data.R - Plots all converted data for visual checks (not mandatory)
-- 03_screen_data.R - converts LSM data to FLUXNET compatible CSVs
+- 03_batch_convert_to_CSV_data.R - converts LSM data to FLUXNET compatible CSVs
  - this data is downsampled to a daily time step
  - the data sticks to FLUXNET formatting
-- 04_create_zenodo_upload.R
-
-Optional scripts:
-- b_02_batch_format_rsofun_drivers.R
-- b_06_p-model_run.R
+- 04_batch_format_rsofun_drivers.R - converts data to `rsofun` model inputs
+ - conversion to ensure compatibility with the `rsofun` package for modelling
diff --git a/vignettes/03_data_generation.Rmd b/vignettes/03_data_generation.Rmd
index 2730d2a..0733532 100644
--- a/vignettes/03_data_generation.Rmd
+++ b/vignettes/03_data_generation.Rmd
@@ -163,16 +163,13 @@ library(rsofun)
 
 # processing of the half hourly data to
 # p-model input drivers for rsofun
-rsofun_driver <- rsofun::fdk_format_drivers(
+rsofun_data <- fdk_format_drivers(
   site_info = FluxDataKit::fdk_site_info |>
     filter(sitename == "FR-Fon"),
   path = paste0(tempdir(),"/"),
   verbose = TRUE
 )
 
-# Please make sure the parameter names correspond to your version of rsofun.
-# See rsofun documentation
-
 # optimized parameters from previous work
 params_modl <- list(
   kphio           = 0.09423773,
@@ -184,7 +181,7 @@ params_modl <- list(
 
 # run the model for these parameters
 output <- rsofun::runread_pmodel_f(
-  rsofun_driver,
+  rsofun_data,
   par = params_modl
 )
 
@@ -197,7 +194,7 @@ print(head(model_data))
 
 ```{r eval = evaluate}
 
-validation_data <- rsofun_driver |>
+validation_data <- rsofun_data |>
   filter(sitename == "FR-Fon") |>
   tidyr::unnest(forcing)