Skip to content

Commit

Permalink
added NEE and wind speed to drivers, interpolate temperature before L…
Browse files Browse the repository at this point in the history
…W gapfilling to avoid remaining gaps
  • Loading branch information
stineb committed Sep 12, 2024
1 parent e827ede commit e71ceab
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 40 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export(fdk_smooth_ts)
export(fill_netrad)
export(interpolate2daily_CO2_F_MDS)
export(interpolate2daily_PA_F)
export(interpolate2daily_TA_F_MDS)
export(interpolate2daily_WS_F)
export(interpolate2daily_fpar)
export(site_exceptions)
5 changes: 5 additions & 0 deletions R/fdk_downsample_fluxnet.R
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@ fdk_downsample_fluxnet <- function(
dplyr::filter(value > 0) |>
dplyr::pull(name)

# Air temperature: interpolate linearly, if gap <30 days
if ("TA_F_MDS" %in% missing){
df <- interpolate2daily_TA_F_MDS(df)
}

# Shortwave radiation: impute with KNN
if ("SW_IN_F_MDS" %in% missing){
df <- fdk_impute_knn(
Expand Down
14 changes: 12 additions & 2 deletions R/fdk_format_drivers.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,20 +103,27 @@ fdk_format_drivers <- function(
# maximum daily temperature (deg C)
tmax = TMAX_F_MDS,

# wind speed (m/s)
vwind = WS_F,

# fraction of absorbed photosynthetically active radiation
fapar = FPAR,

# atmospheric CO2 concentration in ppmv
co2 = CO2_F_MDS,

# used as target data for rsofun, not forcing
# Variables below are used as target data for rsofun, not forcing
# gross primary production
gpp = GPP_NT_VUT_REF,
gpp_qc = NEE_VUT_REF_QC,

# energy balance-corrected latent heat flux (~ evapotranspiration)
le = LE_CORR,
le_qc = LE_F_MDS_QC
le_qc = LE_F_MDS_QC,

# net ecosystem exchange
nee = NEE_VUT_REF,
nee_qc = NEE_VUT_REF_QC
)

# fill missing net radiation data
Expand Down Expand Up @@ -198,11 +205,14 @@ fdk_format_drivers <- function(
rain,
tmin,
tmax,
vwind,
fapar,
co2,
ccov,
gpp,
gpp_qc,
nee,
nee_qc,
le,
le_qc
) |>
Expand Down
77 changes: 77 additions & 0 deletions R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,80 @@ interpolate2daily_WS_F <- function(df){
return(df)
}


#' Interpolate missing wind speed data
#'
#' Interpolate missing air temperature
#'
#' @param df a data frame containing missing values for TA_F_MDS
#'
#' @return a gap filled data frame
#' @export

interpolate2daily_TA_F_MDS <- function(df){

# determine fraction of missing values
df <- df |>
dplyr::mutate(
TA_F_MDS = ifelse(is.nan(TA_F_MDS), NA, TA_F_MDS)
)
frac_missing <- sum(is.na(df$TA_F_MDS))/nrow(df)

# if less than a quarter is missing, fill it, with maximum gaps to be filled:
# 30 days
if (frac_missing < 0.25){

df <- df |>
dplyr::mutate(
TA_F_MDS = zoo::na.approx(TA_F_MDS, na.rm = FALSE, maxgap = 30)
)

# fill remaining with mean seasonal cycle
meandf <- df |>
dplyr::mutate(doy = lubridate::yday(TIMESTAMP)) |>
dplyr::group_by(doy) |>
dplyr::summarise(TA_F_MDS_meandoy = mean(TA_F_MDS, na.rm = TRUE))

df <- df |>
dplyr::mutate(doy = lubridate::yday(TIMESTAMP)) |>
dplyr::left_join(
meandf,
by = "doy"
) |>
dplyr::mutate(TA_F_MDS = ifelse(is.na(TA_F_MDS), TA_F_MDS_meandoy, TA_F_MDS)) |>
dplyr::select(-TA_F_MDS_meandoy, -doy)

# still missing?
frac_missing <- sum(is.na(df$TA_F_MDS))/nrow(df)
if (frac_missing > 0){

# pad then interpolate
len <- nrow(df)
df <- dplyr::bind_rows(
dplyr::slice(meandf, 1:365),
df,
dplyr::slice(meandf, 1:365)
) |>
dplyr::mutate(
TA_F_MDS = zoo::na.approx(TA_F_MDS, na.rm = FALSE, maxgap = 30)
) |>
dplyr::slice(366:(365+len))

# still missing?
frac_missing <- sum(is.na(df$TA_F_MDS))/nrow(df)
if (frac_missing > 0){
# fill by mean
df$TA_F_MDS[which(is.na(df$TA_F_MDS))] <- mean(df$TA_F_MDS, na.rm = TRUE)
}

}

} else {

message("Fraction of missing TA_F_MDS data too large (>0.25). Not interpolating.")

}

return(df)
}

57 changes: 29 additions & 28 deletions analysis/00_batch_convert_LSM_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,48 +11,49 @@ library(ingestr)
library(rsofun)

input_path <- "/data_2/FluxDataKit/FDK_inputs/"
output_path <- "/data_2/FluxDataKit/v3.3/"
output_path <- "/data_2/FluxDataKit/v3.4/"

sites <- FluxDataKit::fdk_site_info |>
mutate(
data_path = file.path(input_path, "flux_data/")
)

# # site subset------------------
# # xxx debug
# # # missing patm
# # use_sites <- c(
# # "BE-Maa", "CH-Aws", "CH-Cha", "CH-Dav", "CH-Fru", "CH-Oe2", "CZ-Lnz", "CZ-wet",
# # "DE-Akm", "DE-Geb", "DE-Gri", "DE-Hzd", "DE-Kli", "DE-Obe", "DE-Tha", "FI-Hyy",
# # "FI-Ken", "FI-Sii", "FR-FBn", "FR-Lam", "GF-Guy", "GL-Dsk", "IT-Lav", "IT-MBo",
# # "IT-Tor", "RU-Fyo"
# # )
# site subset------------------
# xxx debug
# # missing patm
# use_sites <- c(
# "CH-Oe2"
# # "FI-Hyy", # Boreal Forests/Taiga
# # "US-SRM", # Deserts & Xeric Shrublands
# # "FR-Pue", # Mediterranean Forests, Woodlands & Scrub
# # "DE-Hai", # Temperate Broadleaf & Mixed Forests
# # "DE-Gri",
# # "DE-Tha"
# # "US-Tw1", # Temperate Grasslands, Savannas & Shrublands
# # "AU-How", # Tropical & Subtropical Grasslands, Savannas & Shrubland
# # "BR-Sa3", # Tropical
# # "ZM-Mon", # Tropical deciduous forest (xeric woodland)
# # "US-ICh" # Tundra
# "BE-Maa", "CH-Aws", "CH-Cha", "CH-Dav", "CH-Fru", "CH-Oe2", "CZ-Lnz", "CZ-wet",
# "DE-Akm", "DE-Geb", "DE-Gri", "DE-Hzd", "DE-Kli", "DE-Obe", "DE-Tha", "FI-Hyy",
# "FI-Ken", "FI-Sii", "FR-FBn", "FR-Lam", "GF-Guy", "GL-Dsk", "IT-Lav", "IT-MBo",
# "IT-Tor", "RU-Fyo"
# )
# # use_sites <- readRDS(here::here("data/failed_sites.rds"))
# sites <- sites |>
# filter(sitename %in% use_sites)
# # ----------------------------
use_sites <- c(
"AR-TF1"
# "FI-Hyy", # Boreal Forests/Taiga
# "US-SRM", # Deserts & Xeric Shrublands
# "FR-Pue", # Mediterranean Forests, Woodlands & Scrub
# "DE-Hai", # Temperate Broadleaf & Mixed Forests
# "DE-Gri",
# "DE-Tha"
# "US-Tw1", # Temperate Grasslands, Savannas & Shrublands
# "AU-How", # Tropical & Subtropical Grasslands, Savannas & Shrubland
# "BR-Sa3", # Tropical
# "ZM-Mon", # Tropical deciduous forest (xeric woodland)
# "US-ICh" # Tundra
)

# use_sites <- readRDS(here::here("data/failed_sites.rds"))
sites <- sites |>
filter(sitename %in% use_sites)
# ----------------------------

#---- create a new release ----
fdk_release(
df = sites,
input_path = input_path,
output_path = output_path,
overwrite_lsm = FALSE,
overwrite_fluxnet = TRUE
overwrite_fluxnet = FALSE
)

#---- create matching plots ----
Expand All @@ -69,7 +70,7 @@ failed_sites <- lapply(sites$sitename, function(site){
fluxnet_format = TRUE,
path = file.path(output_path, "lsm"),
out_path = file.path(output_path, "fluxnet"),
overwrite = FALSE
overwrite = TRUE
)
)
)
Expand Down
40 changes: 40 additions & 0 deletions analysis/01_visualize_fdk_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,43 @@ failed_sites <- lapply(sites$sitename, function(site){

})

# determine missing values in daily data
output_path <- "/data_2/FluxDataKit/v3.4/"
sites <- readr::read_csv(paste0(output_path, "fdk_site_info.csv"))

# read all daily data for all sites
read_onesite <- function(site, path){
filename <- list.files(path = path,
pattern = paste0(site, "_PLUMBER_FULLSET_DD"),
full.names = TRUE
)
out <- read_csv(filename) |>
mutate(sitename = site)
return(out)
}

ddf <- purrr::map_dfr(
sites$sitename,
~read_onesite(., paste0(output_path, "/fluxnet/"))
)

df_missing <- ddf |>
dplyr::summarise(
dplyr::across(
where(is.numeric),
~sum(is.na(.))
)) %>%
tidyr::pivot_longer(everything())

print(df_missing)

gg <- ddf %>%
visdat::vis_miss(warn_large_data = FALSE)

ggsave(paste0(output_path, "plots", "/missing_daily.pdf"), plot = gg)

# sites with missing LW_IN_F_MDS
ddf %>%
dplyr::filter(is.na(LW_IN_F_MDS)) %>%
dplyr::select(sitename) %>%
unique()
4 changes: 2 additions & 2 deletions analysis/02_batch_format_rsofun_drivers.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ library(ingestr)
library(rsofun)
# lapply(list.files("R/","*.R", full.names = TRUE), source)

input_path <- "/data_2/FluxDataKit/v3.3/"
input_path <- "/data_2/FluxDataKit/v3.4/"
failed_sites <- readRDS(here::here("data/failed_sites.rds"))

# read in sites to process
Expand Down Expand Up @@ -72,7 +72,7 @@ driver_data <- dplyr::bind_rows(driver_data)
# apply compression to minimize space
saveRDS(
driver_data,
"/data_2/FluxDataKit/v3.3/rsofun_driver_data_v3.3.rds",
"/data_2/FluxDataKit/v3.4/rsofun_driver_data_v3.4.rds",
compress = "xz"
)

4 changes: 2 additions & 2 deletions analysis/03_screen_rsofun_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
library(tidyverse)
library(FluxDataKit)

path <- "/data_2/FluxDataKit/v3.3"
path <- "/data_2/FluxDataKit/v3.4"

failed_sites <- readRDS(here::here("data/failed_sites.rds"))

Expand Down Expand Up @@ -75,7 +75,7 @@ save(fdk_site_fullyearsequence,
# write CSV file for upload to Zenodo
readr::write_csv(
fdk_site_fullyearsequence,
file = "/data_2/FluxDataKit/v3.3/fdk_site_fullyearsequence.csv"
file = "/data_2/FluxDataKit/v3.4/fdk_site_fullyearsequence.csv"
)


Expand Down
10 changes: 5 additions & 5 deletions analysis/04_create_zenodo_upload.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
# the Zenodo repository:
# https://zenodo.org/record/7258291

input_path <- "/data_2/FluxDataKit/v3.3/"
tmp_path <- "/data_2/FluxDataKit/v3.3/zenodo_upload/"
drivers_filnam <- "rsofun_driver_data_v3.3.rds"
siteinfo_filnam <- "/data_2/FluxDataKit/v3.3/fdk_site_info.csv"
fullyearseq_filnam <- "/data_2/FluxDataKit/v3.3/fdk_site_fullyearsequence.csv"
input_path <- "/data_2/FluxDataKit/v3.4/"
tmp_path <- "/data_2/FluxDataKit/v3.4/zenodo_upload/"
drivers_filnam <- "rsofun_driver_data_v3.4.rds"
siteinfo_filnam <- "/data_2/FluxDataKit/v3.4/fdk_site_info.csv"
fullyearseq_filnam <- "/data_2/FluxDataKit/v3.4/fdk_site_fullyearsequence.csv"

#---- purge old data -----

Expand Down
2 changes: 1 addition & 1 deletion data-raw/02_compile_final_site_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ library(raster)
library(stringr)
# library(MODISTools)

output_path <- "/data_2/FluxDataKit/v3.3"
output_path <- "/data_2/FluxDataKit/v3.4"

# read site data RDS files
# append site types (ICOS, PLUMBER etc)
Expand Down

0 comments on commit e71ceab

Please sign in to comment.