Skip to content

Commit

Permalink
Merge branch 'devel' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
teng-gao committed Jun 5, 2023
2 parents a367fa5 + 6989a3e commit 579b2f9
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 18 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: numbat
Title: Haplotype-Aware CNV Analysis from scRNA-Seq
URL: https://github.com/kharchenkolab/numbat/, https://kharchenkolab.github.io/numbat/
Version: 1.3.1
Version: 1.3.2
Authors@R: c(person("Teng","Gao", email="[email protected]", role=c("cre", "aut")), person("Ruslan", "Soldatov", email="[email protected]", role="aut"), person("Hirak", "Sarkar", email="[email protected]", role="aut"), person("Evan", "Biederstedt", email="[email protected]", role="aut"), person("Peter", "Kharchenko", email = "[email protected]", role = "aut"))
Description: A computational method that infers copy number variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor phylogeny. 'numbat' integrates signals from gene expression, allelic ratio, and population haplotype structures to accurately infer allele-specific CNVs in single cells and reconstruct their lineage relationship. 'numbat' can be used to: 1. detect allele-specific copy number variations from single-cells; 2. differentiate tumor versus normal cells in the tumor microenvironment; 3. infer the clonal architecture and evolutionary history of profiled tumors. 'numbat' does not require tumor/normal-paired DNA or genotype data, but operates solely on the donor scRNA-data data (for example, 10x Cell Ranger output). Additional examples and documentations are available at <https://kharchenkolab.github.io/numbat/>. For details on the method please see Gao et al. Nature Biotechnology (2022) <doi:10.1038/s41587-022-01468-y>.
License: MIT + file LICENSE
Expand Down
23 changes: 22 additions & 1 deletion R/diagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,15 @@ check_allele_df = function(df) {
log_error(msg)
stop(msg)
}

# check chrom prefix
if (any(str_detect(df$CHROM[1], '^chr'))) {
df = df %>% mutate(CHROM = str_remove(CHROM, 'chr'))
}

df = df %>% mutate(CHROM = factor(CHROM, 1:22))
df = df %>%
filter(CHROM %in% 1:22) %>%
mutate(CHROM = factor(CHROM, 1:22))

return(df)

Expand Down Expand Up @@ -143,11 +150,25 @@ check_exp_ref = function(lambdas_ref) {
if (!is.matrix(lambdas_ref)) {
lambdas_ref = as.matrix(lambdas_ref) %>% magrittr::set_colnames('ref')
}

# check if all entries in the reference profile are integers
if (all(lambdas_ref == as.integer(lambdas_ref))) {
msg = "The reference expression matrix 'lambdas_ref' should be normalized gene expression magnitudes. Please use aggregate_counts() function to prepare the reference profile from raw counts."
log_error(msg)
stop(msg)
} else if (any(duplicated(rownames(lambdas_ref)))) {
msg = "Please remove duplicated genes in reference profile"
log_error(msg)
stop(msg)
}


return(lambdas_ref)

}



#' check inter-individual contamination
#' @param bulk dataframe Pseudobulk profile
#' @return NULL
Expand Down
3 changes: 1 addition & 2 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -836,9 +836,8 @@ run_group_hmms = function(
bad = sapply(results, inherits, what = "try-error")

if (any(bad)) {
log_error(glue('job {paste(which(bad), collapse = ",")} failed'))
log_error(results[bad][[1]])
message(results[bad][[1]])
stop(results[bad][[1]])
}

bulks = results %>% bind_rows() %>%
Expand Down
55 changes: 41 additions & 14 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -924,17 +924,24 @@ phi_hat_roll = function(Y_obs, lambda_ref, d_obs, mu, sig, h) {
#' @return vector of alphabetical postfixes
#' @keywords internal
generate_postfix <- function(n) {
alphabet <- letters
postfixes <- sapply(n, function(i) {
postfix <- character(0)
while (i > 0) {
remainder <- (i - 1) %% 26
i <- (i - 1) %/% 26
postfix <- c(alphabet[remainder + 1], postfix)

if (any(is.na(n))) {
stop("Segment number cannot contain NA")
}
paste(postfix, collapse = "")
})
return(postfixes)

alphabet <- letters

postfixes <- sapply(n, function(i) {
postfix <- character(0)
while (i > 0) {
remainder <- (i - 1) %% 26
i <- (i - 1) %/% 26
postfix <- c(alphabet[remainder + 1], postfix)
}
paste(postfix, collapse = "")
})

return(postfixes)
}

#' Annotate copy number segments after HMM decoding
Expand Down Expand Up @@ -997,7 +1004,8 @@ annot_haplo_segs = function(bulk) {
#' @return dataframe Pseudobulk profile
#' @keywords internal
smooth_segs = function(bulk, min_genes = 10) {
bulk %>% group_by(seg) %>%

bulk = bulk %>% group_by(seg) %>%
mutate(
cnv_state = ifelse(n_genes <= min_genes, NA, cnv_state)
) %>%
Expand All @@ -1006,6 +1014,17 @@ smooth_segs = function(bulk, min_genes = 10) {
mutate(cnv_state = zoo::na.locf(cnv_state, fromLast = FALSE, na.rm=FALSE)) %>%
mutate(cnv_state = zoo::na.locf(cnv_state, fromLast = TRUE, na.rm=FALSE)) %>%
ungroup()

chrom_na = bulk %>% group_by(CHROM) %>% summarise(all_na = all(is.na(cnv_state)))

if (any(chrom_na$all_na)) {
chroms_na = paste0(chrom_na$CHROM[chrom_na$all_na], collapse = ',')
msg = glue("No segments containing more than {min_genes} genes for CHROM {chroms_na}.")
log_error(msg)
stop(msg)
}

return(bulk)
}

#' Annotate a consensus segments on a pseudobulk dataframe
Expand Down Expand Up @@ -1101,7 +1120,7 @@ find_common_diploid = function(
}

# define balanced regions in each sample
bulks = mclapply(
results = mclapply(
bulks %>% split(.$sample),
mc.cores = ncores,
function(bulk) {
Expand All @@ -1123,8 +1142,16 @@ find_common_diploid = function(
smooth_segs(min_genes = min_genes) %>%
annot_segs(var = 'cnv_state')

}) %>%
bind_rows()
})

bad = sapply(results, inherits, what = "try-error")

if (any(bad)) {
log_error(results[bad][[1]])
stop(results[bad][[1]])
} else {
bulks = results %>% bind_rows()
}

# always exclude clonal LOH regions if any
if (any(bulks$loh)) {
Expand Down
18 changes: 18 additions & 0 deletions man/likelihood_allele.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 579b2f9

Please sign in to comment.