diff --git a/analysis/04_batch_format_rsofun_drivers.R b/analysis/02_batch_format_rsofun_drivers.R similarity index 96% rename from analysis/04_batch_format_rsofun_drivers.R rename to analysis/02_batch_format_rsofun_drivers.R index ee9312e..bb141ac 100644 --- a/analysis/04_batch_format_rsofun_drivers.R +++ b/analysis/02_batch_format_rsofun_drivers.R @@ -5,6 +5,7 @@ options(dplyr.summarise.inform = FALSE) library(dplyr) library(tidyr) library(ggplot2) +library(ingestr) library(rsofun) lapply(list.files("R/","*.R", full.names = TRUE), source) @@ -12,7 +13,8 @@ input_path <- "/data/scratch/beta-v4/fluxnet/" # read in sites to process sites <- FluxDataKit::fdk_site_info -#site <- c("FR-Pue", "CH-Lae", "CH-Dav") + +#site <- c("FR-Pue", "CH-Lae") # subset sites # sites <- sites |> diff --git a/analysis/05_screen_rsofun_data.R b/analysis/03_screen_rsofun_data.R similarity index 62% rename from analysis/05_screen_rsofun_data.R rename to analysis/03_screen_rsofun_data.R index 7b290c1..155d703 100644 --- a/analysis/05_screen_rsofun_data.R +++ b/analysis/03_screen_rsofun_data.R @@ -60,7 +60,8 @@ data_fix_years <- df |> nest() |> rename( forcing = data - ) + ) |> + ungroup() df1 <- df |> filter(tolower(sitename) %in% tolower(screen$sitename)) |> @@ -76,3 +77,47 @@ data <- bind_rows(df1, df2) # save data saveRDS(data, "data/rsofun_driver_data_clean.rds", compress = "xz") + +data |> + group_by(sitename) |> + do({ + + tmp <- .$forcing[[1]] |> + tidyr::pivot_longer( + col = !contains("date"), + names_to = "measurement", + values_to = "value" + ) + + sitename <- .$sitename[1] + + l <- seq(as.Date("1990/1/1"), as.Date("2023/1/1"), "years") + + p <- ggplot(data = tmp) + + geom_line( + aes( + date, + value + ), + colour = "red" + ) + + geom_vline(xintercept = l) + + labs( + title = sitename + ) + + theme_bw() + + theme(panel.grid.minor = element_line() + ) + + facet_grid( + measurement ~ ., + scales = "free" + ) + + ggsave( + paste0("manuscript/rsofun_input/",sitename,".png"), + width = 12, + height = 14, + dpi = 175 + ) + + }) diff --git a/analysis/04_create_zenodo_upload.R b/analysis/04_create_zenodo_upload.R new file mode 100644 index 0000000..7444573 --- /dev/null +++ b/analysis/04_create_zenodo_upload.R @@ -0,0 +1,100 @@ +# This script does the final renaming of all the files +# to a consistent format, as well as zipping them up +# for uploading to Zenodo +# +# This script uses bash system calls +# as well as the "rename" function which +# is not installed by default. Run on linux +# or a bash compatible system. +# +# Compressed data should be uploaded to +# the Zenodo repository: +# https://zenodo.org/record/7258291 + +input_path <- "/data/scratch/beta-v4/" +tmp_path <- "/data/scratch/upload"z + +#---- purge old data ----- + +# remove temporary path +system(sprintf("rm -rf %s", tmp_path)) + +# recreate temporary path +dir.create(tmp_path) + +#---- copy new data over ---- +system( + sprintf( + "cp -R %s/lsm %s/lsm", + input_path, + tmp_path + ) +) + +system( + sprintf( + "cp -R %s/fluxnet %s/fluxnet", + input_path, + tmp_path + ) +) + +#---- rename all files in place ---- + +# rename LSM data + +system( + sprintf( + "rename 's/FLUXNET2015/FLUXDATAKIT/g' %s/lsm/*.nc", + tmp_path + ) +) + +system( + sprintf( + "rename 's/OxFlux/FLUXDATAKIT/g' %s/lsm/*.nc", + tmp_path + ) +) + +system( + sprintf( + "rename 's/LaThuile/FLUXDATAKIT/g' %s/lsm/*.nc", + tmp_path + ) +) + +# rename FLUXNET data + +system( + sprintf( + "rename 's/PLUMBER/FLUXDATAKIT/g' %s/fluxnet/*.csv", + tmp_path + ) +) + +#---- zip up all data ----- + +# compress LSM data +system( + sprintf( + " + cd %s/lsm; + tar -czvf %s/FLUXDATAKIT_LSM.tar.gz *.nc + ", + tmp_path, + tmp_path + ) +) + +# compress FLUXNET data +system( + sprintf( + " + cd %s/fluxnet; + tar -czvf %s/FLUXDATAKIT_FLUXNET.tar.gz *.csv + ", + tmp_path, + tmp_path + ) +) diff --git a/analysis/06_p-model_run.R b/analysis/06_p-model_run.R index b4f96b4..7098fc1 100644 --- a/analysis/06_p-model_run.R +++ b/analysis/06_p-model_run.R @@ -28,7 +28,7 @@ for(i in driver_data$sitename){ output <- try(rsofun::runread_pmodel_f( df, par = params_modl, - makecheck = TRUE + makecheck = FALSE )) print(output) @@ -68,7 +68,7 @@ for(i in driver_data$sitename){ ggsave( file.path( - "./manuscript/figures/", + "./manuscript/rsofun_output/", paste0(df$sitename,".png") ), width = 12, diff --git a/analysis/scraps/process_pmodel_data.R b/analysis/scraps/process_pmodel_data.R deleted file mode 100644 index abcbfd2..0000000 --- a/analysis/scraps/process_pmodel_data.R +++ /dev/null @@ -1,141 +0,0 @@ -#' Run the p-model and return summary stats -#' -#' This routine calculates summary statistics -#' from p-model output when provided with -#' an RDS file containing a single or multiple instance -#' of (combined) p-model driver files (ingestr created) -#' -#' @param file an RDS file containing driver data -#' -#' @return summary statistics for modelling purposes - -process_pmodel_data <- function(file){ - - # if input file is a dataframe assume - # rsofun driver file / no classes are assigned - if(is.character(file)){ - # read in file - df <- try(readRDS(file)) - } else { - df <- file - } - - # check succesfull read - if(inherits(df, "try-error")){ - return(NULL) - } - - # set model parameters - params_modl <- list( - kphio = 0.09423773, - soilm_par_a = 0.33349283, - soilm_par_b = 1.45602286, - tau_acclim_tempstress = 10, - par_shape_tempstress = 0.0 - ) - - ## run P-model for the read file - df_output <- try( - suppressWarnings( - suppressMessages( - runread_pmodel_f( - df, - par = params_modl, - makecheck = TRUE, - parallel = FALSE - ) - ) - ) - ) - - # check on success of the run - if(inherits(df, "try-error")){ - return(NULL) - } - - # get basic meta-data (lat lon) - # drop data first - siteinfo <- df_output %>% - dplyr::select(-data) %>% - unnest(cols = c(sitename, siteinfo)) - - # Calculate summary statistics ---- - - # get mean annual AET separately - evapotranspiration <- df_output %>% - unnest(data) %>% - mutate(year = lubridate::year(date)) %>% - group_by(sitename, year) %>% - summarise(transp = sum(transp, na.rm = TRUE)) %>% - ungroup() %>% - group_by(sitename) %>% - summarise(aet = mean(transp, na.rm = TRUE)) - - # photosynthesis parameters - photosynthesis_parameters <- df_output %>% - - # calculate mean over daily AET/PET - mutate(data = purrr::map(data, ~mutate(., alpha = transp / pet))) %>% - - # gpp-weighted mean of vcmax25, jmax25, gs_accl, mean daily AET/PET - mutate( - data = purrr::map(data, ~mutate(., - vcmax25_wgt = vcmax25 * gpp, - jmax25_wgt = jmax25 * gpp, - gs_accl_wgt = gs_accl * gpp))) %>% - mutate( - data = purrr::map(data, ~summarise(., - gpp_sum = sum(gpp), - vcmax25_wgt_sum = sum(vcmax25_wgt), - jmax25_wgt_sum = sum(jmax25_wgt), - gs_accl_wgt_sum = sum(gs_accl_wgt), - alpha = mean(alpha)))) %>% - mutate(data = purrr::map(data, ~mutate(., - vcmax25 = vcmax25_wgt_sum / gpp_sum, - jmax25 = jmax25_wgt_sum / gpp_sum, - gs_accl = gs_accl_wgt_sum / gpp_sum))) %>% - unnest(data) %>% - dplyr::select(sitename, alpha, vcmax25, jmax25, gs_accl) - - - # get climate indices from ddf_watch - climate_indices <- df %>% - mutate( - mat = purrr::map_dbl(forcing, - ~calc_climate_index_mat(.)), - matgs = purrr::map_dbl(forcing, - ~calc_climate_index_matgs(., temp_base = 5.0)), - tmonthmin = purrr::map_dbl(forcing, - ~calc_climate_index_tmonthmin(.)), - tmonthmax = purrr::map_dbl(forcing, - ~calc_climate_index_tmonthmax(.)), - ndaysgs = purrr::map_dbl(forcing, - ~calc_climate_index_ndaysgs(., temp_base = 5.0)), - mai = purrr::map_dbl(forcing, - ~calc_climate_index_mai(.)), - maigs = purrr::map_dbl(forcing, - ~calc_climate_index_maigs(., temp_base = 5.0)), - map = purrr::map_dbl(forcing, - ~calc_climate_index_map(.)), - pmonthmin = purrr::map_dbl(forcing, - ~calc_climate_index_pmonthmin(.)), - mapgs = purrr::map_dbl(forcing, - ~calc_climate_index_mapgs(., temp_base = 5.0)), - mavgs = purrr::map_dbl(forcing, - ~calc_climate_index_mavgs(., temp_base = 5.0)), - mav = purrr::map_dbl(forcing, - ~calc_climate_index_mav(.))) %>% - dplyr::select(-forcing, -siteinfo, -params_siml, -df_soiltexture) - - # bind everything for output - output <- climate_indices %>% - right_join(photosynthesis_parameters, by = "sitename") %>% - left_join(evapotranspiration, by = "sitename") %>% - mutate(ai = map / aet) - - # bind meta-data to summary stats - output <- left_join(siteinfo, output, by = "sitename") - - # return output (tibble/dataframe) - return(output) -} diff --git a/analysis/scraps/qa_qc.R b/analysis/scraps/qa_qc.R deleted file mode 100644 index 957ab7d..0000000 --- a/analysis/scraps/qa_qc.R +++ /dev/null @@ -1,78 +0,0 @@ -# quality control on the compiled data - -library(tidyverse) -#library(ggforce) -library(patchwork) - -# read data -sites <- readRDS("data/flux_data_kit_site-info.rds") -df <- readRDS("data/p_model_drivers/site_based_drivers.rds") -missing_sites <- sites[which(!c(sites$sitename %in% df$sitename)),] - -#----- flatten file ----- -df <- df %>% - filter( - !is.null(forcing) - ) %>% - select( - sitename, - forcing - ) %>% - unnest(cols = c(forcing)) - -# calculate site years -site_years <- df %>% - group_by(sitename) %>% - summarize( - time_diff = max(date) - min(date) # time difference in days - ) %>% - ungroup() %>% - summarize( # summarize time in years - time = round(as.numeric(sum(time_diff)/365)) - ) - -# message("daily site years:") -# message(site_years) -# -# df <- readRDS("~/Dropbox/tmp/site_based_drivers_HH.rds") -# meta_data <- readRDS("data/flux_data_kit_site-info.rds") -# -# df <- left_join(df, meta_data) -# -# # calculate site years -# site_years <- df %>% -# group_by(sitename) %>% -# summarize( -# time_diff = max(date) - min(date) # time difference in days -# ) %>% -# ungroup() %>% -# summarize( # summarize time in years -# time = round(as.numeric(sum(time_diff)/365)) -# ) -# -# message("HH site years:") -# message(site_years) - -#---- plot all GPP time series ---- - -pdf("~/Desktop/series.pdf", 7, 5) -for (i in unique(df$sitename) ) { - - ss <- df %>% - filter(sitename == i) - - print( - ggplot(ss) + - geom_line( - aes( - date, - gpp - ) - ) + - labs( - title = paste(ss$sitename[1], ss$igbp_land_use[1]), - subtitle = ss$product[1] - ) - ) -} -dev.off() diff --git a/analysis/scraps/qa_qc_functions.R b/analysis/scraps/qa_qc_functions.R deleted file mode 100644 index a384d11..0000000 --- a/analysis/scraps/qa_qc_functions.R +++ /dev/null @@ -1,113 +0,0 @@ -Pre <- function(mydata, site){ - my_file <- mydata %>% - select(starts_with("date"), - gpp, - temp, - prec, - ppfd, - sitename) %>% - filter(sitename == site ) - #replace -9999 with NAs - my_file <- my_file %>% - na_if(-9999) - #have the daily data - my_daily_file$date - my_daily<- my_file %>% - group_by(date) %>% - summarise(gpp = mean(gpp, na.rm = TRUE), - temp = mean(temp, na.rm = TRUE), - prec = mean(prec, na.rm = TRUE), - ppdf = mean(ppfd, na.rm = TRUE)) - return(my_daily) -} - -add_outliers <- function(mydata){ - t_lm <- lm(gpp ~ - temp+ - prec+ - ppdf, - data = mydata) - res <- t_lm$residuals - outres <- boxplot.stats(res)$out - #some gpp is not exist, what should be done? i use the na.omit... - n1_mydaily <- na.omit(mydata) %>% - mutate(res = res) %>% - mutate(outline = if_else(res %in% outres,"out", "in")) - # > n1_mydaily <- my_daily %>% mutate(res = res) %>% - # + mutate(outline = if_else(res %in% outres,"out", "in")) - # Error: Problem with `mutate()` column `res`. - # i `res = res`. - # i `res` must be size 2555 or 1, not 2546. - return(n1_mydaily) - } - -add_drift <- function(mydata){ - drift_dat <- mydata %>% - mutate(year_dec = year(date)+ (lubridate::yday(date) - 1)/365) - #create the daily data for using - lm_d <- lm(gpp ~ temp + - prec+ - ppdf+ - year_dec, - data = drift_dat) - s <- summary(lm_d) - drift_dat$tfordrift <- s$coefficients["year_dec",3]#?add the t value - return(drift_dat) -} - -add_spurious <- function(mydata){ - c <- mydata%>% select(gpp) - m <- c$gpp - c1 <- c %>% table() %>% as.data.frame() - #update: add the extra column here for the freq - c_new <- sapply(c$gpp,function(x){m <- c1[c1$.== x, "Freq"]}) - b <- rep(NA,length(c_new)) - for (i in 1:length(c_new)){ - b[i] <- c_new[[i]] - } - mydata <- mydata %>% mutate(rep = b) - return(mydata) -} - -add_step_change <- function(mydata){ - library(strucchange) - test <- mydata %>% mutate(month_year = month(date), year = year(date)) - test <- test %>% mutate(month_dec = year+(month_year-1)/12) - test <- test %>% group_by(month_dec) %>% summarise(gpp = mean(gpp), - prec = mean(prec), - ppdf = mean(ppdf), - temp = mean(temp)) - #here have the monthly data - mlmt <- lm(gpp ~ prec+ppdf+temp, data = test) - nrow(test) - sctest(gpp ~ prec+ppdf+temp, data = test, type = "Chow", point =77)#why 5~77 - test <- test %>% mutate(tp = NA) - for (i in 5:(nrow(test)-7)){#i have some problems with how many points can be done for the sctest - tsc <- sctest(gpp ~ prec+ppdf+temp, data = test, type = "Chow",point = i) - test$tp[i] <- tsc$p.value - } - test <- test %>% mutate(SCpoint = if_else(tp < 0.05,"SC","NON")) %>% - mutate() - a <- 1 - test$SCvalue <- NA - for(j in 1:nrow(test)){ - if(is.na(test$SCpoint[j])){ - test$SCvalue[j] <- a - } else if(test$SCpoint[j] == "SC") { - a <- a+1 - } else { - a <- a - } - - test$SCvalue[j] <- a - } - return(test) -} - -GrowingSeason <- function(mydata){ - get <- quantile(mydata$gpp, probs = c(0.05,0.95),na.rm = TRUE) - datanew <- dat1 %>% mutate(value = (dat1$gpp-get[1])/(get[2]-get[1])) - datanew2 <-datanew %>% - mutate(is_growing_season = if_else(value < 0.05,"F","T")) - return(datanew2) -} diff --git a/analysis/scraps/rscript_format_site_data.R b/analysis/scraps/rscript_format_site_data.R deleted file mode 100755 index de7854f..0000000 --- a/analysis/scraps/rscript_format_site_data.R +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env Rscript -args <- commandArgs(trailingOnly = TRUE) -freq <- args[1] -freq <- "daily" - -# load libraries and -# scripts -library(tidyverse) -library(ingestr) -source("R/collect_drivers_rsofun.R") -source("R/format_site_drivers.R") -source("R/prepare_setup_sofun.R") - -# read sites data frame -df_sites <- readRDS("data/flux_data_kit_site-info.rds") - -data <- df_sites[1:2,] %>% - rowwise() %>% - do({ - - ss <- as.data.frame(.) - - # process data - df <- try( - #suppressWarnings( - format_drivers_site( - ss, - verbose = FALSE, - product = .$product[1], - freq = freq - ) - #) - ) - - if(inherits(df, "try-error")){ - df <- data.frame( - sitename = ss$sitename, - forcing = NA, - params_siml = NA, - site_info = NA, - params_soil = NA - ) - } else { - df - } - }) - -# remove empty locations -# some Ameriflux sites are not -# available -data <- data %>% - filter(!is.null(forcing)) - -if (freq == "hh"){ - - data <- data %>% - unnest() - - saveRDS( - data, - "data/site_based_drivers_HH.rds", - compress = "xz") - - } else { - - saveRDS( - data, - "data/p_model_drivers/site_based_drivers.rds", - compress = "xz") -}