Skip to content

Commit

Permalink
radiator 1.3.0 2024-02-22
Browse files Browse the repository at this point in the history
* Bug fix using coverage and DArT data
  • Loading branch information
thierrygosselin committed Feb 23, 2024
1 parent c92cc45 commit a0c5345
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 32 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: radiator
Type: Package
Title: RADseq Data Exploration, Manipulation and Visualization using R
Version: 1.2.9
Date: 2024-01-25
Version: 1.3.0
Date: 2024-02-22
Encoding: UTF-8
Authors@R: c(
person("Thierry", "Gosselin", email = "[email protected]", role = c("aut", "cre")),
Expand Down Expand Up @@ -56,7 +56,7 @@ License: GPL-3
LazyLoad: yes
VignetteBuilder:
knitr
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
ByteCompile: TRUE
URL: https://thierrygosselin.github.io/radiator/
BugReports: https://github.com/thierrygosselin/radiator/issues
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# radiator 1.3.0 2024-02-22

* Bug fix using coverage and DArT data


# radiator 1.2.9 2024-01-25

* Bug fix stemming from genalex files and genind conversion
Expand Down
4 changes: 3 additions & 1 deletion R/filter_coverage.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ filter_coverage <- function(
verbose = verbose)

# Verify that coverage information is present in the data...
depth.info <- check_coverage(gds = data, stacks.haplo.check = TRUE, dart.check = TRUE)
depth.info <- check_coverage(gds = data, genotypes.metadata.check = TRUE, stacks.haplo.check = TRUE, dart.check = TRUE)
if (is.null(depth.info)) {
message("\n\nCoverate information is not available for this dataset, returning GDS...")
return(data)
Expand All @@ -204,6 +204,8 @@ filter_coverage <- function(
individuals = FALSE,
markers = TRUE,
missing = FALSE,
coverage = TRUE,
# allele.coverage = TRUE,
allele.coverage = FALSE,
mac = FALSE,
heterozygosity = FALSE,
Expand Down
76 changes: 53 additions & 23 deletions R/gds.R
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
# stacks haplotype vcf have the info fields for depth in the VCF header
# but they do not have the info with genotypes...
# it's laziness from stacks...
data.source <- extract_data_source(gds)

if (stacks.haplo.check) {
data.source <- extract_data_source(gds)
biallelic <- radiator::detect_biallelic_markers(data = gds)
biallelic <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(
node = gds, path = "radiator/biallelic", silent = TRUE))
Expand All @@ -989,7 +989,10 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
markers.meta.select = c("AVG_COUNT_REF", "AVG_COUNT_SNP"),
whitelist = TRUE
)
if (!is.null(got.coverage)) got.coverage <- c("AVG_COUNT_REF", "AVG_COUNT_SNP")
if (!is.null(got.coverage)) {
got.coverage <- c("AVG_COUNT_REF", "AVG_COUNT_SNP")
return(got.coverage)
}
}#End DART 1row and 2 rows
}

Expand All @@ -1006,6 +1009,7 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
# this part might generate an error if you actually have genotypes metadata...
# need to run tests...
got.coverage <- geno.index
return(got.coverage)
}
geno.index <- NULL
}
Expand All @@ -1018,13 +1022,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
check = "none", verbose = FALSE)$ID

if (length(have) > 0) {
# if (!exhaustive) {
# want <- c("DP", "CATG")
# } else {
want <- c("DP", "AD", "CATG")
# }

got.coverage <- purrr::keep(.x = have, .p = have %in% want)
return(got.coverage)
} else {
got.coverage <- NULL
}
Expand Down Expand Up @@ -2461,7 +2461,7 @@ generate_stats <- function(
genotypes.metadata.check = TRUE,
dart.check = TRUE
)
if (!"DP" %in% got.coverage) coverage <- FALSE
if (!"DP" %in% got.coverage && !"READ_DEPTH" %in% got.coverage) coverage <- FALSE
if (!"AD" %in% got.coverage) allele.coverage <- FALSE
if (!exhaustive) allele.coverage <- FALSE
got.coverage <- NULL
Expand Down Expand Up @@ -2496,28 +2496,52 @@ generate_stats <- function(
replacement = c("COVERAGE_TOTAL", "COVERAGE_MEAN", "COVERAGE_MEDIAN", "COVERAGE_IQR"),
vectorize_all = FALSE
)
data.source <- radiator::extract_data_source(gds = gds)

if ("dart" %in% data.source) {
dart.data <- radiator::extract_genotypes_metadata(
gds = gds,
genotypes.meta.select = c("M_SEQ", "ID_SEQ", "READ_DEPTH"),
whitelist = TRUE
) %>%
radiator::rad_wide(
x = .,
formula = "ID_SEQ ~ M_SEQ",
values_from = "READ_DEPTH"
) %>%
dplyr::select(-ID_SEQ)
colnames(dart.data) <- NULL
dart.data <- as.matrix(dart.data)
} else {
dart.data <- NULL
}

if (markers) {
dp_f_m <- function(gds, coverage.stats) {

dp_f_m <- function(gds, coverage.stats, dart.data) {
# Using switch instead was not optimal for additional options in the func...
if (coverage.stats == "sum") rad_cov_stats <- function(x) round(sum(x, na.rm = TRUE))
if (coverage.stats == "mean") rad_cov_stats <- function(x) round(mean(x, na.rm = TRUE))
if (coverage.stats == "median") rad_cov_stats <- function(x) round(stats::median(x, na.rm = TRUE))
# if (coverage.stats == "iqr") rad_cov_stats <- function(x) round(abs(diff(stats::quantile(x, probs = c(0.25, 0.75), na.rm = TRUE))))
if (coverage.stats == "iqr") rad_cov_stats <- function(x) round(matrixStats::iqr(x, na.rm = TRUE)) # faster

x <- SeqArray::seqApply(
gdsfile = gds,
var.name = "annotation/format/DP",
FUN = rad_cov_stats,
as.is = "integer",
margin = "by.variant",
parallel = TRUE
)
if (!is.null(dart.data)) {
x <- as.integer(apply(X = dart.data, MARGIN = 2, FUN = rad_cov_stats))
dart.data <- NULL
} else {
x <- SeqArray::seqApply(
gdsfile = gds,
var.name = "annotation/format/DP",
FUN = rad_cov_stats,
as.is = "integer",
margin = "by.variant",
parallel = TRUE
)
}
return(x)
}

dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds)
dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds, dart.data = dart.data)
}

if (individuals) {
Expand All @@ -2527,10 +2551,16 @@ generate_stats <- function(
# Note to myself: the huge speed gain by using other packages robustbse, Rfast, etc.
# is not worth the unreliability of the results check your testing files...

dp.temp <- SeqArray::seqGetData(
gdsfile = gds,
var.name = "annotation/format/DP"
)
if ("dart" %in% data.source) {
dp.temp <- dart.data
dart.data <- NULL
} else {
dp.temp <- SeqArray::seqGetData(
gdsfile = gds,
var.name = "annotation/format/DP"
)
}


dp_f_i <- function(coverage.stats, x) {
if ("sum" %in% coverage.stats) x <- rowSums(x, na.rm = TRUE)
Expand Down
25 changes: 22 additions & 3 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -977,9 +977,28 @@ strip_rad <- function(
) {
objs <- utils::object.size(x)

# STRATA ----------
strata.n <- intersect(colnames(x), c("STRATA", "POP_ID"))
# Check if ID_SEQ, STRATA_SEQ and M_SEQ present...
if (rlang::has_name(x, "ID_SEQ") && rlang::has_name(x, "INDIVIDUALS")) {
x %<>% dplyr::select(-ID_SEQ)
}

if (rlang::has_name(x, "STRATA_SEQ")) {
strata.n <- intersect(colnames(x), c("STRATA", "POP_ID"))
if (length(strata.n) > 0 && strata.n %in% c("STRATA", "POP_ID")) {
x %<>% dplyr::select(-STRATA_SEQ)
}
}

if (rlang::has_name(x, "M_SEQ")) {
markers.n <- intersect(colnames(x), c("VARIANT_ID", "CHROM", "LOCUS", "POS", "MARKERS"))
if (length(markers.n) > 0) {
x %<>% dplyr::select(-M_SEQ)
}
}
strata.n <- markers.n <- NULL


# STRATA ----------
if (rlang::has_name(x, "POP_ID")) {
strata <- radiator::generate_strata(data = x, pop.id = TRUE) %>%
dplyr::mutate(
Expand Down Expand Up @@ -1009,7 +1028,7 @@ strip_rad <- function(
pos = env.arg,
envir = env.arg
)
cm <- keep.strata <- pop.id <- strata.n <- strata <- NULL
cm <- keep.strata <- pop.id <- strata <- NULL

# MARKERS ---------
x %<>%
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ state and is being actively
developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
[![minimal R
version](https://img.shields.io/badge/R%3E%3D-NA-6666ff.svg)](https://cran.r-project.org/)
[![packageversion](https://img.shields.io/badge/Package%20version-1.2.9-orange.svg)](commits/master)
[![Last-changedate](https://img.shields.io/badge/last%20change-2024--01--25-brightgreen.svg)](/commits/master)
[![packageversion](https://img.shields.io/badge/Package%20version-1.3.0-orange.svg)](commits/master)
[![Last-changedate](https://img.shields.io/badge/last%20change-2024--02--22-brightgreen.svg)](/commits/master)
[![R-CMD-check](https://github.com/thierrygosselin/radiator/workflows/R-CMD-check/badge.svg)](https://github.com/thierrygosselin/radiator/actions)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3687060.svg)](https://doi.org/10.5281/zenodo.3687060)
<!-- badges: end -->
Expand Down

0 comments on commit a0c5345

Please sign in to comment.