diff --git a/analysis/04_batch_format_rsofun_drivers.R b/analysis/02_batch_format_rsofun_drivers.R
similarity index 96%
rename from analysis/04_batch_format_rsofun_drivers.R
rename to analysis/02_batch_format_rsofun_drivers.R
index ee9312e..bb141ac 100644
--- a/analysis/04_batch_format_rsofun_drivers.R
+++ b/analysis/02_batch_format_rsofun_drivers.R
@@ -5,6 +5,7 @@ options(dplyr.summarise.inform = FALSE)
 library(dplyr)
 library(tidyr)
 library(ggplot2)
+library(ingestr)
 library(rsofun)
 lapply(list.files("R/","*.R", full.names = TRUE), source)
 
@@ -12,7 +13,8 @@ input_path <- "/data/scratch/beta-v4/fluxnet/"
 
 # read in sites to process
 sites <- FluxDataKit::fdk_site_info
-#site <- c("FR-Pue", "CH-Lae", "CH-Dav")
+
+#site <- c("FR-Pue", "CH-Lae")
 
 # subset sites
 # sites <- sites |>
diff --git a/analysis/05_screen_rsofun_data.R b/analysis/03_screen_rsofun_data.R
similarity index 62%
rename from analysis/05_screen_rsofun_data.R
rename to analysis/03_screen_rsofun_data.R
index 7b290c1..155d703 100644
--- a/analysis/05_screen_rsofun_data.R
+++ b/analysis/03_screen_rsofun_data.R
@@ -60,7 +60,8 @@ data_fix_years <- df |>
   nest() |>
   rename(
     forcing = data
-  )
+  ) |>
+  ungroup()
 
 df1 <- df |>
   filter(tolower(sitename) %in% tolower(screen$sitename)) |>
@@ -76,3 +77,47 @@ data <- bind_rows(df1, df2)
 
 # save data
 saveRDS(data, "data/rsofun_driver_data_clean.rds", compress = "xz")
+
+data |>
+  group_by(sitename) |>
+  do({
+
+    tmp <- .$forcing[[1]] |>
+      tidyr::pivot_longer(
+        col = !contains("date"),
+        names_to = "measurement",
+        values_to = "value"
+      )
+
+    sitename <- .$sitename[1]
+
+    l <- seq(as.Date("1990/1/1"), as.Date("2023/1/1"), "years")
+
+    p <- ggplot(data = tmp) +
+      geom_line(
+        aes(
+          date,
+          value
+        ),
+        colour = "red"
+      ) +
+      geom_vline(xintercept = l) +
+      labs(
+        title = sitename
+      ) +
+      theme_bw() +
+      theme(panel.grid.minor = element_line()
+      ) +
+      facet_grid(
+        measurement ~ .,
+        scales = "free"
+        )
+
+    ggsave(
+      paste0("manuscript/rsofun_input/",sitename,".png"),
+      width = 12,
+      height = 14,
+      dpi = 175
+    )
+
+  })
diff --git a/analysis/04_create_zenodo_upload.R b/analysis/04_create_zenodo_upload.R
new file mode 100644
index 0000000..7444573
--- /dev/null
+++ b/analysis/04_create_zenodo_upload.R
@@ -0,0 +1,100 @@
+# This script does the final renaming of all the files
+# to a consistent format, as well as zipping them up
+# for uploading to Zenodo
+#
+# This script uses bash system calls
+# as well as the "rename" function which
+# is not installed by default. Run on linux
+# or a bash compatible system.
+#
+# Compressed data should be uploaded to
+# the Zenodo repository:
+# https://zenodo.org/record/7258291
+
+input_path <- "/data/scratch/beta-v4/"
+tmp_path <- "/data/scratch/upload"z
+
+#---- purge old data -----
+
+# remove temporary path
+system(sprintf("rm -rf %s", tmp_path))
+
+# recreate temporary path
+dir.create(tmp_path)
+
+#---- copy new data over ----
+system(
+  sprintf(
+    "cp -R %s/lsm %s/lsm",
+  input_path,
+  tmp_path
+  )
+)
+
+system(
+  sprintf(
+    "cp -R %s/fluxnet %s/fluxnet",
+    input_path,
+    tmp_path
+  )
+)
+
+#---- rename all files in place ----
+
+# rename LSM data
+
+system(
+  sprintf(
+    "rename 's/FLUXNET2015/FLUXDATAKIT/g' %s/lsm/*.nc",
+    tmp_path
+  )
+)
+
+system(
+  sprintf(
+    "rename 's/OxFlux/FLUXDATAKIT/g' %s/lsm/*.nc",
+    tmp_path
+  )
+)
+
+system(
+  sprintf(
+    "rename 's/LaThuile/FLUXDATAKIT/g' %s/lsm/*.nc",
+    tmp_path
+  )
+)
+
+# rename FLUXNET data
+
+system(
+  sprintf(
+    "rename 's/PLUMBER/FLUXDATAKIT/g' %s/fluxnet/*.csv",
+    tmp_path
+  )
+)
+
+#---- zip up all data -----
+
+# compress LSM data
+system(
+  sprintf(
+  "
+  cd %s/lsm;
+  tar -czvf %s/FLUXDATAKIT_LSM.tar.gz *.nc
+  ",
+  tmp_path,
+  tmp_path
+  )
+)
+
+# compress FLUXNET data
+system(
+  sprintf(
+    "
+  cd %s/fluxnet;
+  tar -czvf %s/FLUXDATAKIT_FLUXNET.tar.gz *.csv
+  ",
+    tmp_path,
+    tmp_path
+  )
+)
diff --git a/analysis/06_p-model_run.R b/analysis/06_p-model_run.R
index b4f96b4..7098fc1 100644
--- a/analysis/06_p-model_run.R
+++ b/analysis/06_p-model_run.R
@@ -28,7 +28,7 @@ for(i in driver_data$sitename){
       output <- try(rsofun::runread_pmodel_f(
         df,
         par = params_modl,
-        makecheck = TRUE
+        makecheck = FALSE
       ))
 
       print(output)
@@ -68,7 +68,7 @@ for(i in driver_data$sitename){
 
       ggsave(
         file.path(
-          "./manuscript/figures/",
+          "./manuscript/rsofun_output/",
           paste0(df$sitename,".png")
         ),
         width = 12,
diff --git a/analysis/scraps/process_pmodel_data.R b/analysis/scraps/process_pmodel_data.R
deleted file mode 100644
index abcbfd2..0000000
--- a/analysis/scraps/process_pmodel_data.R
+++ /dev/null
@@ -1,141 +0,0 @@
-#' Run the p-model and return summary stats
-#'
-#' This routine calculates summary statistics
-#' from p-model output when provided with
-#' an RDS file containing a single or multiple instance
-#' of (combined) p-model driver files (ingestr created)
-#'
-#' @param file an RDS file containing driver data
-#'
-#' @return summary statistics for modelling purposes
-
-process_pmodel_data <- function(file){
-
-  # if input file is a dataframe assume
-  # rsofun driver file / no classes are assigned
-  if(is.character(file)){
-    # read in file
-    df <- try(readRDS(file))
-  } else {
-    df <- file
-  }
-
-  # check succesfull read
-  if(inherits(df, "try-error")){
-    return(NULL)
-  }
-
-  # set model parameters
-  params_modl <- list(
-    kphio           = 0.09423773,
-    soilm_par_a     = 0.33349283,
-    soilm_par_b     = 1.45602286,
-    tau_acclim_tempstress = 10,
-    par_shape_tempstress  = 0.0
-  )
-
-  ## run P-model for the read file
-  df_output <- try(
-    suppressWarnings(
-      suppressMessages(
-        runread_pmodel_f(
-          df,
-          par = params_modl,
-          makecheck = TRUE,
-          parallel = FALSE
-        )
-      )
-    )
-  )
-
-  # check on success of the run
-  if(inherits(df, "try-error")){
-    return(NULL)
-  }
-
-  # get basic meta-data (lat lon)
-  # drop data first
-  siteinfo <- df_output %>%
-    dplyr::select(-data) %>%
-    unnest(cols = c(sitename, siteinfo))
-
-  # Calculate summary statistics ----
-
-  # get mean annual AET separately
-  evapotranspiration <- df_output %>%
-    unnest(data) %>%
-    mutate(year = lubridate::year(date)) %>%
-    group_by(sitename, year) %>%
-    summarise(transp = sum(transp, na.rm = TRUE)) %>%
-    ungroup() %>%
-    group_by(sitename) %>%
-    summarise(aet = mean(transp, na.rm = TRUE))
-
-  # photosynthesis parameters
-  photosynthesis_parameters <- df_output %>%
-
-    # calculate mean over daily AET/PET
-    mutate(data = purrr::map(data, ~mutate(., alpha = transp / pet))) %>%
-
-    # gpp-weighted mean of vcmax25, jmax25, gs_accl, mean daily AET/PET
-    mutate(
-      data = purrr::map(data, ~mutate(.,
-      vcmax25_wgt = vcmax25 * gpp,
-      jmax25_wgt = jmax25 * gpp,
-      gs_accl_wgt = gs_accl * gpp))) %>%
-    mutate(
-      data = purrr::map(data, ~summarise(.,
-        gpp_sum = sum(gpp),
-        vcmax25_wgt_sum = sum(vcmax25_wgt),
-        jmax25_wgt_sum = sum(jmax25_wgt),
-        gs_accl_wgt_sum = sum(gs_accl_wgt),
-        alpha = mean(alpha)))) %>%
-    mutate(data = purrr::map(data, ~mutate(.,
-        vcmax25 = vcmax25_wgt_sum / gpp_sum,
-        jmax25 = jmax25_wgt_sum / gpp_sum,
-        gs_accl = gs_accl_wgt_sum / gpp_sum))) %>%
-    unnest(data) %>%
-    dplyr::select(sitename, alpha, vcmax25, jmax25, gs_accl)
-
-
-  # get climate indices from ddf_watch
-  climate_indices <- df %>%
-    mutate(
-      mat = purrr::map_dbl(forcing,
-                           ~calc_climate_index_mat(.)),
-      matgs = purrr::map_dbl(forcing,
-                             ~calc_climate_index_matgs(., temp_base = 5.0)),
-      tmonthmin = purrr::map_dbl(forcing,
-                                 ~calc_climate_index_tmonthmin(.)),
-      tmonthmax = purrr::map_dbl(forcing,
-                                 ~calc_climate_index_tmonthmax(.)),
-      ndaysgs = purrr::map_dbl(forcing,
-                               ~calc_climate_index_ndaysgs(., temp_base = 5.0)),
-      mai = purrr::map_dbl(forcing,
-                           ~calc_climate_index_mai(.)),
-      maigs = purrr::map_dbl(forcing,
-                             ~calc_climate_index_maigs(., temp_base = 5.0)),
-      map = purrr::map_dbl(forcing,
-                           ~calc_climate_index_map(.)),
-      pmonthmin = purrr::map_dbl(forcing,
-                                 ~calc_climate_index_pmonthmin(.)),
-      mapgs = purrr::map_dbl(forcing,
-                             ~calc_climate_index_mapgs(., temp_base = 5.0)),
-      mavgs = purrr::map_dbl(forcing,
-                             ~calc_climate_index_mavgs(., temp_base = 5.0)),
-      mav = purrr::map_dbl(forcing,
-                           ~calc_climate_index_mav(.))) %>%
-    dplyr::select(-forcing, -siteinfo, -params_siml, -df_soiltexture)
-
-  # bind everything for output
-  output <- climate_indices %>%
-    right_join(photosynthesis_parameters, by = "sitename") %>%
-    left_join(evapotranspiration, by = "sitename") %>%
-    mutate(ai = map / aet)
-
-  # bind meta-data to summary stats
-  output <- left_join(siteinfo, output, by = "sitename")
-
-  # return output (tibble/dataframe)
-  return(output)
-}
diff --git a/analysis/scraps/qa_qc.R b/analysis/scraps/qa_qc.R
deleted file mode 100644
index 957ab7d..0000000
--- a/analysis/scraps/qa_qc.R
+++ /dev/null
@@ -1,78 +0,0 @@
-# quality control on the compiled data
-
-library(tidyverse)
-#library(ggforce)
-library(patchwork)
-
-# read data
-sites <- readRDS("data/flux_data_kit_site-info.rds")
-df <- readRDS("data/p_model_drivers/site_based_drivers.rds")
-missing_sites <- sites[which(!c(sites$sitename %in% df$sitename)),]
-
-#----- flatten file -----
-df <- df %>%
-  filter(
-    !is.null(forcing)
-  ) %>%
-  select(
-    sitename,
-    forcing
-  ) %>%
-  unnest(cols = c(forcing))
-
-# calculate site years
-site_years <- df %>%
-  group_by(sitename) %>%
-  summarize(
-    time_diff = max(date) - min(date) # time difference in days
-  ) %>%
-  ungroup() %>%
-  summarize( # summarize time in years
-    time = round(as.numeric(sum(time_diff)/365))
-  )
-
-# message("daily site years:")
-# message(site_years)
-#
-# df <- readRDS("~/Dropbox/tmp/site_based_drivers_HH.rds")
-# meta_data <- readRDS("data/flux_data_kit_site-info.rds")
-#
-# df <- left_join(df, meta_data)
-#
-# # calculate site years
-# site_years <- df %>%
-#   group_by(sitename) %>%
-#   summarize(
-#     time_diff = max(date) - min(date) # time difference in days
-#   ) %>%
-#   ungroup() %>%
-#   summarize( # summarize time in years
-#     time = round(as.numeric(sum(time_diff)/365))
-#   )
-#
-# message("HH site years:")
-# message(site_years)
-
-#---- plot all GPP time series ----
-
-pdf("~/Desktop/series.pdf", 7, 5)
-for (i in unique(df$sitename) ) {
-
-  ss <- df %>%
-    filter(sitename == i)
-
-  print(
-    ggplot(ss) +
-      geom_line(
-        aes(
-          date,
-          gpp
-        )
-      ) +
-      labs(
-        title = paste(ss$sitename[1], ss$igbp_land_use[1]),
-        subtitle = ss$product[1]
-      )
-  )
-}
-dev.off()
diff --git a/analysis/scraps/qa_qc_functions.R b/analysis/scraps/qa_qc_functions.R
deleted file mode 100644
index a384d11..0000000
--- a/analysis/scraps/qa_qc_functions.R
+++ /dev/null
@@ -1,113 +0,0 @@
-Pre <- function(mydata, site){
-  my_file <- mydata %>%
-    select(starts_with("date"),
-           gpp,
-           temp,
-           prec,
-           ppfd,
-           sitename) %>%
-    filter(sitename == site )
-  #replace -9999 with NAs
-  my_file <- my_file %>%
-    na_if(-9999)
-  #have the daily data
-  my_daily_file$date
-  my_daily<- my_file %>%
-    group_by(date) %>%
-    summarise(gpp = mean(gpp, na.rm = TRUE),
-              temp = mean(temp, na.rm = TRUE),
-              prec = mean(prec, na.rm = TRUE),
-              ppdf = mean(ppfd, na.rm = TRUE))
-  return(my_daily)
-}
-
-add_outliers <- function(mydata){
-  t_lm <- lm(gpp ~
-               temp+
-               prec+
-               ppdf,
-             data = mydata)
-  res <- t_lm$residuals
-  outres <- boxplot.stats(res)$out
-  #some gpp is not exist, what should be done? i use the na.omit...
-  n1_mydaily <- na.omit(mydata) %>%
-    mutate(res = res) %>%
-    mutate(outline = if_else(res %in% outres,"out", "in"))
-  # > n1_mydaily <- my_daily %>% mutate(res = res) %>%
-  #   +   mutate(outline = if_else(res %in% outres,"out", "in"))
-  # Error: Problem with `mutate()` column `res`.
-  # i `res = res`.
-  # i `res` must be size 2555 or 1, not 2546.
-  return(n1_mydaily)
-  }
-
-add_drift <- function(mydata){
-  drift_dat <- mydata %>%
-    mutate(year_dec = year(date)+ (lubridate::yday(date) - 1)/365)
-  #create the daily data for using
-  lm_d <- lm(gpp ~ temp +
-               prec+
-               ppdf+
-               year_dec,
-             data = drift_dat)
-  s <- summary(lm_d)
-  drift_dat$tfordrift <- s$coefficients["year_dec",3]#?add the t value
-  return(drift_dat)
-}
-
-add_spurious <- function(mydata){
-  c <- mydata%>% select(gpp)
-  m <- c$gpp
-  c1 <- c %>% table() %>% as.data.frame()
-  #update: add the extra column here for the freq
-  c_new <- sapply(c$gpp,function(x){m <- c1[c1$.== x, "Freq"]})
-  b <- rep(NA,length(c_new))
-  for (i in 1:length(c_new)){
-    b[i] <- c_new[[i]]
-  }
-  mydata <- mydata %>% mutate(rep = b)
-  return(mydata)
-}
-
-add_step_change <- function(mydata){
-  library(strucchange)
-  test <- mydata %>% mutate(month_year = month(date), year = year(date))
-  test <- test %>% mutate(month_dec = year+(month_year-1)/12)
-  test <- test %>% group_by(month_dec) %>% summarise(gpp = mean(gpp),
-                                                     prec = mean(prec),
-                                                     ppdf = mean(ppdf),
-                                                     temp = mean(temp))
-  #here have the  monthly data
-  mlmt <- lm(gpp ~ prec+ppdf+temp, data = test)
-  nrow(test)
-  sctest(gpp ~ prec+ppdf+temp, data = test, type = "Chow", point =77)#why 5~77
-  test <- test %>% mutate(tp = NA)
-  for (i in 5:(nrow(test)-7)){#i have some problems with how many points can be done for the sctest
-    tsc <- sctest(gpp ~ prec+ppdf+temp, data = test, type = "Chow",point = i)
-    test$tp[i] <- tsc$p.value
-  }
-  test <- test %>% mutate(SCpoint = if_else(tp < 0.05,"SC","NON")) %>%
-    mutate()
-  a <- 1
-  test$SCvalue <- NA
-  for(j in 1:nrow(test)){
-    if(is.na(test$SCpoint[j])){
-      test$SCvalue[j] <- a
-    } else if(test$SCpoint[j] == "SC") {
-      a <- a+1
-    } else {
-      a <- a
-    }
-
-    test$SCvalue[j] <- a
-  }
-  return(test)
-}
-
-GrowingSeason <- function(mydata){
-  get <-  quantile(mydata$gpp, probs = c(0.05,0.95),na.rm = TRUE)
-  datanew <- dat1 %>% mutate(value = (dat1$gpp-get[1])/(get[2]-get[1]))
-  datanew2 <-datanew %>%
-    mutate(is_growing_season = if_else(value < 0.05,"F","T"))
-  return(datanew2)
-}
diff --git a/analysis/scraps/rscript_format_site_data.R b/analysis/scraps/rscript_format_site_data.R
deleted file mode 100755
index de7854f..0000000
--- a/analysis/scraps/rscript_format_site_data.R
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env Rscript
-args <- commandArgs(trailingOnly = TRUE)
-freq <- args[1]
-freq <- "daily"
-
-# load libraries and
-# scripts
-library(tidyverse)
-library(ingestr)
-source("R/collect_drivers_rsofun.R")
-source("R/format_site_drivers.R")
-source("R/prepare_setup_sofun.R")
-
-# read sites data frame
-df_sites <- readRDS("data/flux_data_kit_site-info.rds")
-
-data <- df_sites[1:2,] %>%
-  rowwise() %>%
-  do({
-
-    ss <- as.data.frame(.)
-
-    # process data
-    df <- try(
-      #suppressWarnings(
-      format_drivers_site(
-        ss,
-        verbose = FALSE,
-        product = .$product[1],
-        freq = freq
-      )
-      #)
-    )
-
-    if(inherits(df, "try-error")){
-      df <- data.frame(
-          sitename = ss$sitename,
-          forcing = NA,
-          params_siml = NA,
-          site_info = NA,
-          params_soil = NA
-        )
-    } else {
-      df
-    }
-  })
-
-# remove empty locations
-# some Ameriflux sites are not
-# available
-data <- data %>%
- filter(!is.null(forcing))
-
-if (freq == "hh"){
-
-  data <- data %>%
-    unnest()
-
-  saveRDS(
-    data,
-    "data/site_based_drivers_HH.rds",
-    compress = "xz")
-
-  } else {
-
-  saveRDS(
-    data,
-    "data/p_model_drivers/site_based_drivers.rds",
-    compress = "xz")
-}