diff --git a/DESCRIPTION b/DESCRIPTION index dc58a68..499de46 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: numbat Title: Haplotype-Aware CNV Analysis from scRNA-Seq URL: https://github.com/kharchenkolab/numbat/, https://kharchenkolab.github.io/numbat/ -Version: 1.3.1 +Version: 1.3.2 Authors@R: c(person("Teng","Gao", email="tgao@g.harvard.edu", role=c("cre", "aut")), person("Ruslan", "Soldatov", email="soldatr@mskcc.org", role="aut"), person("Hirak", "Sarkar", email="hirak_sarkar@hms.harvard.edu", role="aut"), person("Evan", "Biederstedt", email="evan.biederstedt@gmail.com", role="aut"), person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut")) Description: A computational method that infers copy number variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor phylogeny. 'numbat' integrates signals from gene expression, allelic ratio, and population haplotype structures to accurately infer allele-specific CNVs in single cells and reconstruct their lineage relationship. 'numbat' can be used to: 1. detect allele-specific copy number variations from single-cells; 2. differentiate tumor versus normal cells in the tumor microenvironment; 3. infer the clonal architecture and evolutionary history of profiled tumors. 'numbat' does not require tumor/normal-paired DNA or genotype data, but operates solely on the donor scRNA-data data (for example, 10x Cell Ranger output). Additional examples and documentations are available at . For details on the method please see Gao et al. Nature Biotechnology (2022) . License: MIT + file LICENSE diff --git a/R/diagnostics.R b/R/diagnostics.R index 1150edc..6f38a2e 100644 --- a/R/diagnostics.R +++ b/R/diagnostics.R @@ -80,8 +80,15 @@ check_allele_df = function(df) { log_error(msg) stop(msg) } + + # check chrom prefix + if (any(str_detect(df$CHROM[1], '^chr'))) { + df = df %>% mutate(CHROM = str_remove(CHROM, 'chr')) + } - df = df %>% mutate(CHROM = factor(CHROM, 1:22)) + df = df %>% + filter(CHROM %in% 1:22) %>% + mutate(CHROM = factor(CHROM, 1:22)) return(df) @@ -143,11 +150,25 @@ check_exp_ref = function(lambdas_ref) { if (!is.matrix(lambdas_ref)) { lambdas_ref = as.matrix(lambdas_ref) %>% magrittr::set_colnames('ref') } + + # check if all entries in the reference profile are integers + if (all(lambdas_ref == as.integer(lambdas_ref))) { + msg = "The reference expression matrix 'lambdas_ref' should be normalized gene expression magnitudes. Please use aggregate_counts() function to prepare the reference profile from raw counts." + log_error(msg) + stop(msg) + } else if (any(duplicated(rownames(lambdas_ref)))) { + msg = "Please remove duplicated genes in reference profile" + log_error(msg) + stop(msg) + } + return(lambdas_ref) } + + #' check inter-individual contamination #' @param bulk dataframe Pseudobulk profile #' @return NULL diff --git a/R/main.R b/R/main.R index b380bac..b35cb7b 100644 --- a/R/main.R +++ b/R/main.R @@ -836,9 +836,8 @@ run_group_hmms = function( bad = sapply(results, inherits, what = "try-error") if (any(bad)) { - log_error(glue('job {paste(which(bad), collapse = ",")} failed')) log_error(results[bad][[1]]) - message(results[bad][[1]]) + stop(results[bad][[1]]) } bulks = results %>% bind_rows() %>% diff --git a/R/utils.R b/R/utils.R index 040c49c..893d122 100644 --- a/R/utils.R +++ b/R/utils.R @@ -924,17 +924,24 @@ phi_hat_roll = function(Y_obs, lambda_ref, d_obs, mu, sig, h) { #' @return vector of alphabetical postfixes #' @keywords internal generate_postfix <- function(n) { - alphabet <- letters - postfixes <- sapply(n, function(i) { - postfix <- character(0) - while (i > 0) { - remainder <- (i - 1) %% 26 - i <- (i - 1) %/% 26 - postfix <- c(alphabet[remainder + 1], postfix) + + if (any(is.na(n))) { + stop("Segment number cannot contain NA") } - paste(postfix, collapse = "") - }) - return(postfixes) + + alphabet <- letters + + postfixes <- sapply(n, function(i) { + postfix <- character(0) + while (i > 0) { + remainder <- (i - 1) %% 26 + i <- (i - 1) %/% 26 + postfix <- c(alphabet[remainder + 1], postfix) + } + paste(postfix, collapse = "") + }) + + return(postfixes) } #' Annotate copy number segments after HMM decoding @@ -997,7 +1004,8 @@ annot_haplo_segs = function(bulk) { #' @return dataframe Pseudobulk profile #' @keywords internal smooth_segs = function(bulk, min_genes = 10) { - bulk %>% group_by(seg) %>% + + bulk = bulk %>% group_by(seg) %>% mutate( cnv_state = ifelse(n_genes <= min_genes, NA, cnv_state) ) %>% @@ -1006,6 +1014,17 @@ smooth_segs = function(bulk, min_genes = 10) { mutate(cnv_state = zoo::na.locf(cnv_state, fromLast = FALSE, na.rm=FALSE)) %>% mutate(cnv_state = zoo::na.locf(cnv_state, fromLast = TRUE, na.rm=FALSE)) %>% ungroup() + + chrom_na = bulk %>% group_by(CHROM) %>% summarise(all_na = all(is.na(cnv_state))) + + if (any(chrom_na$all_na)) { + chroms_na = paste0(chrom_na$CHROM[chrom_na$all_na], collapse = ',') + msg = glue("No segments containing more than {min_genes} genes for CHROM {chroms_na}.") + log_error(msg) + stop(msg) + } + + return(bulk) } #' Annotate a consensus segments on a pseudobulk dataframe @@ -1101,7 +1120,7 @@ find_common_diploid = function( } # define balanced regions in each sample - bulks = mclapply( + results = mclapply( bulks %>% split(.$sample), mc.cores = ncores, function(bulk) { @@ -1123,8 +1142,16 @@ find_common_diploid = function( smooth_segs(min_genes = min_genes) %>% annot_segs(var = 'cnv_state') - }) %>% - bind_rows() + }) + + bad = sapply(results, inherits, what = "try-error") + + if (any(bad)) { + log_error(results[bad][[1]]) + stop(results[bad][[1]]) + } else { + bulks = results %>% bind_rows() + } # always exclude clonal LOH regions if any if (any(bulks$loh)) { diff --git a/man/likelihood_allele.Rd b/man/likelihood_allele.Rd new file mode 100644 index 0000000..93ff3e3 --- /dev/null +++ b/man/likelihood_allele.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hmm.R +\name{likelihood_allele} +\alias{likelihood_allele} +\title{Only compute total log likelihood from an allele HMM} +\usage{ +likelihood_allele(hmm) +} +\arguments{ +\item{hmm}{HMM object; expect variables x (allele depth), d (total depth), +logPi (log transition prob matrix), delta (prior for each state), +alpha (alpha for each state), beta (beta for each state), +states (states), p_s (phase switch probs)} +} +\description{ +Only compute total log likelihood from an allele HMM +} +\keyword{internal}