Skip to content

Commit

Permalink
fix #146 for Triennial and AFSC.Slope data
Browse files Browse the repository at this point in the history
- only apply the filters if the initial bio_pull exists
- add info print statements about what is being removed
- fix the data list for Triennial and AFSC.Slope to not remove the age data pull if there are no ages if otoliths were collected

The function was failing in the situations where no otoliths were collected from a species across all survey years.  However, if there was otolith data no ages the function worked but did not report the age data frame.  Even if there were no ages, one may want to know that there were otoliths collected so this has been fixed
- move the majority of the special if statements for the Triennial and AFSC.Slope to a single section where possible
  • Loading branch information
chantelwetzel-noaa committed Aug 29, 2024
1 parent 669f6a2 commit b9e92f3
Showing 1 changed file with 75 additions and 85 deletions.
160 changes: 75 additions & 85 deletions R/pull_bio.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,70 +136,77 @@ pull_bio <- function(
)
}

# Filter out non-standard samples
# Some early entries are NA for standard sample indicators. These should be retained.
standard_lengths <- bio_pull[, "standard_survey_length_or_width_indicator"] %in% c(NA, "NA", "Standard Survey Length or Width")
if (length(standard_lengths) != dim(bio_pull)[1]) {
if (verbose) {
n <- dim(bio_pull)[1] - length(standard_lengths)
cli::cli_alert_info(
"There were {n} lengths removed because the were not within the standard sampling protocal."
)
# This check is needed to proceed on for species where there were no age from
# the AFSC.Slope and Triennial survey since lengths are checked later in the
# length_fact data table.
if (!is.null(dim(bio_pull))) {
# Filter out non-standard samples
# Some early entries are NA for standard sample indicators. These should be retained.
standard_lengths <- bio_pull[, "standard_survey_length_or_width_indicator"] %in% c(NA, "NA", "Standard Survey Length or Width")
if (length(standard_lengths) != dim(bio_pull)[1]) {
if (verbose) {
n <- dim(bio_pull)[1] - length(standard_lengths)
cli::cli_alert_info(
"There were {n} lengths removed because the were not within the standard sampling protocal."
)
}
bio_pull <- bio_pull[standard_lengths, ]
}
bio_pull <- bio_pull[standard_lengths, ]
}

# Remove non-standard ages
nonstandard_age <- which(bio_pull[, "standard_survey_age_indicator"] == "Not Standard Survey Age")
if (length(nonstandard_age) > 0) {
if (verbose) {
n <- length(nonstandard_age)
cli::cli_alert_info(
"There were {n} ages removed because the were not within the standard sampling protocal."
)
# Remove non-standard ages
nonstandard_age <- which(bio_pull[, "standard_survey_age_indicator"] == "Not Standard Survey Age")
if (length(nonstandard_age) > 0) {
if (verbose) {
n <- length(nonstandard_age)
cli::cli_alert_info(
"There were {n} ages removed because the were not within the standard sampling protocal."
)
}
bio_pull[nonstandard_age, "age_years"] <- NA
}
bio_pull[nonstandard_age, "age_years"] <- NA
}

# Remove non-standard weights
nonstandard_wgt <- which(bio_pull[, "standard_survey_weight_indicator"] == "Not Standard Survey Weight")
if (length(nonstandard_wgt) > 0) {
if (verbose) {
n <- length(nonstandard_wgt)
cli::cli_alert_info(
"There were {n} weights removed because there were not collected at a standard survey location."
)
# Remove non-standard weights
nonstandard_wgt <- which(bio_pull[, "standard_survey_weight_indicator"] == "Not Standard Survey Weight")
if (length(nonstandard_wgt) > 0) {
if (verbose) {
n <- length(nonstandard_wgt)
cli::cli_alert_info(
"There were {n} weights removed because there were not collected at a standard survey location."
)
}
bio_pull[nonstandard_wgt, "weight_kg"] <- NA
}
bio_pull[nonstandard_wgt, "weight_kg"] <- NA
}

# Remove water hauls
water_hauls <- is.na(bio_pull[, "operation_dim$legacy_performance_code"])
if (sum(water_hauls) > 0) {
if (survey =="Triennial" & verbose == TRUE) {
n <- sum(water_hauls)
cli::cli_alert_info(
"There were {n} tows removed because they were determined to be water hauls (net not on the bottom)."
)
fill_in <- is.na(bio_pull[, "operation_dim$legacy_performance_code"])
if (sum(fill_in) > 0) {
bio_pull[fill_in, "operation_dim$legacy_performance_code"] <- -999
}
bio_pull[water_hauls, "operation_dim$legacy_performance_code"] <- -999
}
good_tows <- bio_pull[, "operation_dim$legacy_performance_code"] != 8
if (length(good_tows) != dim(bio_pull)[1]) {
if (verbose) {
n <- dim(bio_pull)[1] - length(good_tows)
cli::cli_alert_info(
"There were {n} tows removed because they were deemed bad tows."
)
# A value of 8 in the Triennial data indicates a water haul
good_tows <- bio_pull[, "operation_dim$legacy_performance_code"] != 8
if (sum(good_tows) != dim(bio_pull)[1]) {
if (verbose) {
n <- dim(bio_pull)[1] - sum(good_tows)
cli::cli_alert_info(
"There were {n} tows removed because they were determined to be water hauls (e.g., net not on the bottom)."
)
}
bio_pull <- bio_pull[good_tows, ]
}
bio_pull <- bio_pull[good_tows, ]
}
find <- colnames(bio_pull) == "ageing_laboratory_dim$laboratory"
colnames(bio_pull)[find] <- "ageing_lab"

find <- colnames(bio_pull) == "ageing_laboratory_dim$laboratory"
colnames(bio_pull)[find] <- "ageing_lab"
# Remove the extra columns now that they are not needed
bio_pull <- bio_pull[, vars_short]
# These two columns are added for functionality since the older data
# pulls were weight and age and the subsequent functions have not been
# revised.
bio_pull$weight <- bio_pull$weight_kg
bio_pull$age <- bio_pull$age_years
bio_pull$date <- chron::chron(format(as.POSIXlt(bio_pull$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d")
bio_pull$trawl_id <- as.character(bio_pull$trawl_id)
}

# Remove the extra columns now that they are not needed
bio_pull <- bio_pull[, vars_short]
bio <- bio_pull

if (survey %in% c("Triennial", "AFSC.Slope")) {
url_text <- get_url(
Expand All @@ -209,43 +216,29 @@ pull_bio <- function(
years = years,
vars_long = vars_long
)

len_pull <- try(get_json(url = url_text))

# Remove water hauls
if (is.data.frame(len_pull)) {
fill_in <- is.na(len_pull[, "operation_dim$legacy_performance_code"])
if (sum(fill_in) > 0) {
len_pull[fill_in, "operation_dim$legacy_performance_code"] <- -999
}
# Remove water hauls
good_tows <- len_pull[, "operation_dim$legacy_performance_code"] != 8
if (length(good_tows) != dim(len_pull)[1]) {
if (sum(good_tows) != dim(len_pull)[1]) {
if (verbose) {
n <- dim(len_pull)[1] - length(good_tows)
n <- dim(len_pull)[1] - sum(good_tows)
cli::cli_alert_info(
"There were {n} tows removed because they were deemed bad tows."
"There were {n} tows removed because they were determined to be water hauls (e.g., net not on the bottom)."
)
}
len_pull <- len_pull[good_tows, ]
}
len_pull <- len_pull[good_tows, ]

len_pull$weight_kg <- NA
len_pull$weight_kg <- len_pull$weight <- NA
len_pull$date <- chron::chron(format(as.POSIXlt(len_pull$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d")
len_pull$trawl_id <- as.character(len_pull$trawl_id)
}
}

if (nrow(bio_pull) > 0) {
bio_pull$date <- chron::chron(format(as.POSIXlt(bio_pull$datetime_utc_iso, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d")
bio_pull$trawl_id <- as.character(bio_pull$trawl_id)
bio <- bio_pull
}

if (survey %in% c("Triennial", "AFSC.Slope")) {
if (!is.null(bio_pull) & sum(is.na(bio_pull$age_years)) != length(bio_pull$age_years)) {
age_data <- bio_pull
} else {
age_data <- NULL
len_pull <- len_pull[, colnames(len_pull) != "operation_dim$legacy_performance_code"]
}

bio <- list()
Expand All @@ -254,8 +247,8 @@ pull_bio <- function(
} else {
bio$length_data <- "no_lengths_available"
}
if (!is.null(age_data)) {
bio$age_data <- age_data
if (is.data.frame(bio_pull)) {
bio$age_data <- bio_pull
} else {
bio$age_data <- "no_ages_available"
}
Expand All @@ -265,20 +258,17 @@ pull_bio <- function(
}

if (convert) {
bio$age <- bio$age_years
bio$weight <- bio$weight_kg
firstup <- function(x) {
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
if (survey %in% c("Triennial", "AFSC.Slope")) {
bio[[1]][, "weight"] <- bio[[1]][, "weight_kg"]
colnames(bio[[1]]) <- firstup(colnames(bio[[1]]))
if (!is.null(nrow(bio[["age_data"]]))) {
colnames(bio[["length_data"]]) <- firstup(colnames(bio[["length_data"]]))
}

if (!is.null(nrow(bio[[2]]))) {
bio[[2]][, "age"] <- bio[[2]][, "age_years"]
bio[[2]][, "weight"] <- bio[[2]][, "weight_kg"]
colnames(bio[[2]]) <- firstup(colnames(bio[[2]]))
if (!is.null(nrow(bio[["age_data"]]))) {
colnames(bio[["age_data"]]) <- firstup(colnames(bio[["age_data"]]))
}
} else {
colnames(bio) <- firstup(colnames(bio))
Expand Down

0 comments on commit b9e92f3

Please sign in to comment.