diff --git a/DESCRIPTION b/DESCRIPTION
index dc58a68..499de46 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: numbat
Title: Haplotype-Aware CNV Analysis from scRNA-Seq
URL: https://github.com/kharchenkolab/numbat/, https://kharchenkolab.github.io/numbat/
-Version: 1.3.1
+Version: 1.3.2
Authors@R: c(person("Teng","Gao", email="tgao@g.harvard.edu", role=c("cre", "aut")), person("Ruslan", "Soldatov", email="soldatr@mskcc.org", role="aut"), person("Hirak", "Sarkar", email="hirak_sarkar@hms.harvard.edu", role="aut"), person("Evan", "Biederstedt", email="evan.biederstedt@gmail.com", role="aut"), person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut"))
Description: A computational method that infers copy number variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor phylogeny. 'numbat' integrates signals from gene expression, allelic ratio, and population haplotype structures to accurately infer allele-specific CNVs in single cells and reconstruct their lineage relationship. 'numbat' can be used to: 1. detect allele-specific copy number variations from single-cells; 2. differentiate tumor versus normal cells in the tumor microenvironment; 3. infer the clonal architecture and evolutionary history of profiled tumors. 'numbat' does not require tumor/normal-paired DNA or genotype data, but operates solely on the donor scRNA-data data (for example, 10x Cell Ranger output). Additional examples and documentations are available at . For details on the method please see Gao et al. Nature Biotechnology (2022) .
License: MIT + file LICENSE
diff --git a/R/diagnostics.R b/R/diagnostics.R
index 1150edc..6f38a2e 100644
--- a/R/diagnostics.R
+++ b/R/diagnostics.R
@@ -80,8 +80,15 @@ check_allele_df = function(df) {
log_error(msg)
stop(msg)
}
+
+ # check chrom prefix
+ if (any(str_detect(df$CHROM[1], '^chr'))) {
+ df = df %>% mutate(CHROM = str_remove(CHROM, 'chr'))
+ }
- df = df %>% mutate(CHROM = factor(CHROM, 1:22))
+ df = df %>%
+ filter(CHROM %in% 1:22) %>%
+ mutate(CHROM = factor(CHROM, 1:22))
return(df)
@@ -143,11 +150,25 @@ check_exp_ref = function(lambdas_ref) {
if (!is.matrix(lambdas_ref)) {
lambdas_ref = as.matrix(lambdas_ref) %>% magrittr::set_colnames('ref')
}
+
+ # check if all entries in the reference profile are integers
+ if (all(lambdas_ref == as.integer(lambdas_ref))) {
+ msg = "The reference expression matrix 'lambdas_ref' should be normalized gene expression magnitudes. Please use aggregate_counts() function to prepare the reference profile from raw counts."
+ log_error(msg)
+ stop(msg)
+ } else if (any(duplicated(rownames(lambdas_ref)))) {
+ msg = "Please remove duplicated genes in reference profile"
+ log_error(msg)
+ stop(msg)
+ }
+
return(lambdas_ref)
}
+
+
#' check inter-individual contamination
#' @param bulk dataframe Pseudobulk profile
#' @return NULL
diff --git a/R/main.R b/R/main.R
index b380bac..b35cb7b 100644
--- a/R/main.R
+++ b/R/main.R
@@ -836,9 +836,8 @@ run_group_hmms = function(
bad = sapply(results, inherits, what = "try-error")
if (any(bad)) {
- log_error(glue('job {paste(which(bad), collapse = ",")} failed'))
log_error(results[bad][[1]])
- message(results[bad][[1]])
+ stop(results[bad][[1]])
}
bulks = results %>% bind_rows() %>%
diff --git a/R/utils.R b/R/utils.R
index 040c49c..893d122 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -924,17 +924,24 @@ phi_hat_roll = function(Y_obs, lambda_ref, d_obs, mu, sig, h) {
#' @return vector of alphabetical postfixes
#' @keywords internal
generate_postfix <- function(n) {
- alphabet <- letters
- postfixes <- sapply(n, function(i) {
- postfix <- character(0)
- while (i > 0) {
- remainder <- (i - 1) %% 26
- i <- (i - 1) %/% 26
- postfix <- c(alphabet[remainder + 1], postfix)
+
+ if (any(is.na(n))) {
+ stop("Segment number cannot contain NA")
}
- paste(postfix, collapse = "")
- })
- return(postfixes)
+
+ alphabet <- letters
+
+ postfixes <- sapply(n, function(i) {
+ postfix <- character(0)
+ while (i > 0) {
+ remainder <- (i - 1) %% 26
+ i <- (i - 1) %/% 26
+ postfix <- c(alphabet[remainder + 1], postfix)
+ }
+ paste(postfix, collapse = "")
+ })
+
+ return(postfixes)
}
#' Annotate copy number segments after HMM decoding
@@ -997,7 +1004,8 @@ annot_haplo_segs = function(bulk) {
#' @return dataframe Pseudobulk profile
#' @keywords internal
smooth_segs = function(bulk, min_genes = 10) {
- bulk %>% group_by(seg) %>%
+
+ bulk = bulk %>% group_by(seg) %>%
mutate(
cnv_state = ifelse(n_genes <= min_genes, NA, cnv_state)
) %>%
@@ -1006,6 +1014,17 @@ smooth_segs = function(bulk, min_genes = 10) {
mutate(cnv_state = zoo::na.locf(cnv_state, fromLast = FALSE, na.rm=FALSE)) %>%
mutate(cnv_state = zoo::na.locf(cnv_state, fromLast = TRUE, na.rm=FALSE)) %>%
ungroup()
+
+ chrom_na = bulk %>% group_by(CHROM) %>% summarise(all_na = all(is.na(cnv_state)))
+
+ if (any(chrom_na$all_na)) {
+ chroms_na = paste0(chrom_na$CHROM[chrom_na$all_na], collapse = ',')
+ msg = glue("No segments containing more than {min_genes} genes for CHROM {chroms_na}.")
+ log_error(msg)
+ stop(msg)
+ }
+
+ return(bulk)
}
#' Annotate a consensus segments on a pseudobulk dataframe
@@ -1101,7 +1120,7 @@ find_common_diploid = function(
}
# define balanced regions in each sample
- bulks = mclapply(
+ results = mclapply(
bulks %>% split(.$sample),
mc.cores = ncores,
function(bulk) {
@@ -1123,8 +1142,16 @@ find_common_diploid = function(
smooth_segs(min_genes = min_genes) %>%
annot_segs(var = 'cnv_state')
- }) %>%
- bind_rows()
+ })
+
+ bad = sapply(results, inherits, what = "try-error")
+
+ if (any(bad)) {
+ log_error(results[bad][[1]])
+ stop(results[bad][[1]])
+ } else {
+ bulks = results %>% bind_rows()
+ }
# always exclude clonal LOH regions if any
if (any(bulks$loh)) {
diff --git a/man/likelihood_allele.Rd b/man/likelihood_allele.Rd
new file mode 100644
index 0000000..93ff3e3
--- /dev/null
+++ b/man/likelihood_allele.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hmm.R
+\name{likelihood_allele}
+\alias{likelihood_allele}
+\title{Only compute total log likelihood from an allele HMM}
+\usage{
+likelihood_allele(hmm)
+}
+\arguments{
+\item{hmm}{HMM object; expect variables x (allele depth), d (total depth),
+logPi (log transition prob matrix), delta (prior for each state),
+alpha (alpha for each state), beta (beta for each state),
+states (states), p_s (phase switch probs)}
+}
+\description{
+Only compute total log likelihood from an allele HMM
+}
+\keyword{internal}