diff --git a/clusters/data/data.json b/clusters/data/data.json index 786f5a8..fd3d721 100644 --- a/clusters/data/data.json +++ b/clusters/data/data.json @@ -2,6 +2,14 @@ "joint_mouse": [ + { + + "file": "all_mm10_genes.txt", + "path": "selin.jessa/from_hydra/atlas/data/references/all_mm10_genes.txt", + "description": "Names of all genes in the annotation", + "contents": "Plain text (.txt) file with one gene name on each line of the file" + + }, { "file": "ID_20190715_dendrogram_order.Rda", diff --git a/clusters/functions.R b/clusters/functions.R index 6f43c39..3b5ccbc 100644 --- a/clusters/functions.R +++ b/clusters/functions.R @@ -796,16 +796,20 @@ noTicks <- function() { #' Determine if a background colour is dark enough to warrant white text #' -#' @param hex_color String, colour in hex colour format e.g. #000000 +#' @param hex_color String, colour in hex colour format e.g. "#000000" #' #' @return TRUE if the colour is dark enough (arbitrary) dark <- function(hex_color) { + # Store the first, third, and fifth character in the string (after "#") red <- substr(hex_color, 2, 2) green <- substr(hex_color, 4, 4) blue <- substr(hex_color, 6, 6) - dark_nums <- c(0:8) + # Output that a colour is "dark" if any of these 3 characters is an + # integer between 0 and 8 inclusively. (Higher hexadecimal numbers + # indicate less color e.g. #FFFFFF is white.) + dark_nums <- c(0:8) if ((red %in% dark_nums && blue %in% dark_nums) || (red %in% dark_nums && green %in% dark_nums) || (green %in% dark_nums && blue %in% dark_nums)) { @@ -820,6 +824,7 @@ dark <- function(hex_color) { } #' Add ticks below a bar plot to categorize x axis into less granular categories +#' (Adapted from Selin's code) #' #' @param df Dataframe, containing the data to use #' [...] @@ -878,18 +883,46 @@ add_class_ticks <- function(df, classes, height, sep, start, label_x_pos, palett #' Default: FALSE (i.e. check against list of dataset genes, not annotation) #' #' @return A list of inputs that do not match the list of accepted genes - check_genes <- function(user_genes, - n = 20, + n = NULL, annotation = FALSE) { if (!is.null(n)) { user_genes <- head(user_genes, n) } - if (!(all(user_genes %in% genes_mouse))) { - return(user_genes[!(user_genes %in% genes_mouse)]) + if(annotation){ + check_against <- genes_anno + } else { + check_against <- genes_mouse + } + + if (!(all(user_genes %in% check_against))) { + return(user_genes[!(user_genes %in% check_against)]) } else { return(NULL) } } + +makePheatmapAnno <- function(palette, column) { + + palette <- palette[unique(names(palette))] + + anno_row <- data.frame(cluster = names(palette)) + names(anno_row) <- column + rownames(anno_row) <- anno_row[[1]] + side_colors <- list(cluster = palette) + names(side_colors) <- column + + return(list(anno_row = anno_row, + side_colors = side_colors)) + +} + +# Code from https://stackoverflow.com/questions/61874876/get-size-of-plot-in-pixels-in-r +get_plot_dims <- function(heat_map) +{ + plot_height <- sum(sapply(heat_map$gtable$heights, grid::convertHeight, "in")) + plot_width <- sum(sapply(heat_map$gtable$widths, grid::convertWidth, "in")) + return(list(height = plot_height, width = plot_width)) +} diff --git a/clusters/global.R b/clusters/global.R index cde3698..1588e94 100644 --- a/clusters/global.R +++ b/clusters/global.R @@ -10,7 +10,6 @@ library(glue) library(stringr) library(ggplot2) library(ggrepel) -#library(DT) library(purrr) library(readr) library(shinyWidgets) @@ -34,12 +33,28 @@ pons_palette_joint <- readRDS("data/joint_pons/joint_pons.palette_ID_20190715_ # Joint mouse colour palette load("data/joint_mouse/joint_mouse.palette_ID_20190715.Rda") +# General cell type palette +general_palette <- c("Progenitors/cyc." = "#ffaf49", + "Oligodendrocytes" = "#b7dd5f", + "Astrocytes" = "#00a385", + "Ependymal" = "#8ee5cf", + "Neurons" = "#840200", + "Non-neuroect." = "gray40", + "Other" = "gray60") + # Vector specifying the order of clusters in the dendrogram load("data/joint_mouse/ID_20190715_dendrogram_order.Rda") -# Load names of genes detected in mouse to provide choices in input +# Load names of genes detected in mouse - genes for which there is data in atlas genes_mouse <- data.table::fread("data/joint_mouse/joint_mouse.gene_names.tsv", data.table = FALSE)$genes +# Load all genes in mouse annotation - to validate input from users & provide as choices +# Some of these genes may not have corresponding data in the atlas - +# i.e. genes_mouse (above) is a subset of genes_anno +genes_anno <- data.table::fread("data/all_mm10_genes.txt", header = FALSE, data.table=FALSE) +names(genes_anno) <- "Genes" +genes_anno <- genes_anno[['Genes']] + # ---- Shiny settings ---- # Enable bookmarking diff --git a/clusters/server.R b/clusters/server.R index 7415a37..bcda5e7 100644 --- a/clusters/server.R +++ b/clusters/server.R @@ -14,6 +14,9 @@ ggplot2::theme_set(theme_min()) server <- function(input, output, session) { + updateSelectizeInput(session, inputId = "gene", choices = genes_anno, + server = TRUE) + # Capture all input from this tab as a list in case we want to add # more options in the future input_new <- eventReactive(input$update, { @@ -30,7 +33,10 @@ server <- function(input, output, session) { tsv = scan(input$genelist$datapath, what = "string", sep = "\t", encoding = "UTF-8", fileEncoding = "UTF-8-BOM"), - validate("\n\n\nInvalid file; Please upload a .csv or .tsv file") + txt = scan(input$genelist$datapath, + what = "string", sep = "\t", + encoding = "UTF-8", fileEncoding = "UTF-8-BOM"), + validate("\n\n\nInvalid file; Please upload a .txt, .csv, or .tsv file") ) }) @@ -51,7 +57,8 @@ server <- function(input, output, session) { "ft_palette" = input$feature_palette, "vln_points" = input$vln_points, "plotly_ribbon" = input$plotly_ribbon, - "mean_exp" = input$mean_exp + "mean_exp" = input$mean_exp, + "heatmap_cells" = input$heatmap_cells ) # Get the columns for the appropriate type of dim red @@ -108,13 +115,26 @@ server <- function(input, output, session) { need(length(input_new()$gene) > 0, "\n\n\nPlease enter a gene.") ) - # Check first 20 inputs against the dataset genes - error_genes <- check_genes(input_new()$gene, 20) + # Check first 20 gene inputs against the dataset & annotations + not_data_genes <- check_genes(input_new()$gene, 20, annotation = FALSE) + not_anno_genes <- check_genes(input_new()$gene, 20, annotation = TRUE) + + # Store genes that are within the annotation but not in dataset + if (setequal(not_data_genes, not_anno_genes)){ + anno_genes <- NULL + } else { + anno_genes <- setdiff(not_data_genes, not_anno_genes) + } + validate( - need(is.null(error_genes), - glue("\n\n\nThe input gene \"{error_genes}\" does not exist in the dataset.")) + need(is.null(anno_genes), + glue("\n\n\nThe input gene \"{anno_genes}\" is in the gene annotation but was not detected in this dataset.")) ) + validate( + need(is.null(not_anno_genes), + glue("\n\n\nThe input gene \"{not_anno_genes}\" was not found in the gene annotation.")) + ) # Only display mean if more than one gene is given AND the user requested it valid_mean <- FALSE @@ -247,6 +267,9 @@ server <- function(input, output, session) { # .after = last_col()) # } + # Create palette for expression level + orange_pal <- function(x) rgb(colorRamp(c("#ffe4cc", "#ffb54d"))(x), maxColorValue = 255) + # Produce a data table reactable(table, rownames = FALSE, @@ -256,8 +279,13 @@ server <- function(input, output, session) { showSortable = TRUE, fullWidth = FALSE, showPageSizeOptions = TRUE, pageSizeOptions = c(10, 20, 40), defaultPageSize = 10, - defaultColDef = colDef(minWidth = 80), - # Override colDef manually for the first few rows + # Formatting for gene columns - color based on expression level + defaultColDef = colDef(minWidth = 80, + style = function(value) { + color <- orange_pal(value) + list(background = color) + }), + # Override colDef manually for the first few rows (not genes) columns = list( Cluster = colDef(minWidth = 110, style = function(index){ @@ -276,10 +304,10 @@ server <- function(input, output, session) { # headerStyle = # list(position = "sticky", left = 0, background = "#fff", zIndex = 1) ), - Sample = colDef(minWidth = 125), - "Cell type" = colDef(minWidth = 200), - "Cell class" = colDef(minWidth = 150), - "Number of cells" = colDef(minWidth = 100) + Sample = colDef(minWidth = 125, style = list(background = "#FFFFFF")), + "Cell type" = colDef(minWidth = 200, style = list(background = "#FFFFFF")), + "Cell class" = colDef(minWidth = 150, style = list(background = "#FFFFFF")), + "Number of cells" = colDef(minWidth = 100, style = list(background = "#FFFFFF")) ) ) }) @@ -336,17 +364,32 @@ server <- function(input, output, session) { need(length(input_new()$gene) > 0, "\n\n\nPlease enter a gene.") ) - # Check user-selected input against the dataset genes - error_genes <- check_genes(input$pick_timecourse, 1) + # Check the selected gene against the dataset & annotations + not_data_genes <- check_genes(input$pick_timecourse, 1, annotation = FALSE) + not_anno_genes <- check_genes(input$pick_timecourse, 1, annotation = TRUE) + + # Store genes that are within the annotation but not in dataset + if (setequal(not_data_genes, not_anno_genes)){ + anno_genes <- NULL + } else { + anno_genes <- setdiff(not_data_genes, not_anno_genes) + } + + validate( + need(is.null(anno_genes), + glue("\n\n\nThe input gene \"{anno_genes}\" is in the gene annotation but was not detected in this dataset.")) + ) + validate( - need(is.null(error_genes), - glue("\n\n\nThe input gene \"{error_genes}\" does not exist in the dataset.")) + need(is.null(not_anno_genes), + glue("\n\n\nThe input gene \"{not_anno_genes}\" was not found in the gene annotation.")) ) + # Check if expression is all zero in the brain region + # TODO: fix this to make it work all_zero <- ribbon_plot(gene = input$pick_timecourse, region = input_new()$region)$zero - # Display message to the user instead of plot if 0 expression throughout region validate( need(all_zero == FALSE, "This gene has no detected expression in the selected brain region.") ) @@ -384,17 +427,32 @@ server <- function(input, output, session) { need(length(input_new()$gene) > 0, "\n\n\nPlease enter a gene.") ) - # Check first input against the dataset genes - error_genes <- check_genes(input$pick_timecourse, 1) + # Check the selected gene against the dataset & annotations + not_data_genes <- check_genes(input$pick_timecourse, 1, annotation = FALSE) + not_anno_genes <- check_genes(input$pick_timecourse, 1, annotation = TRUE) + + # Store genes that are within the annotation but not in dataset + if (setequal(not_data_genes, not_anno_genes)){ + anno_genes <- NULL + } else { + anno_genes <- setdiff(not_data_genes, not_anno_genes) + } + validate( - need(is.null(error_genes), - glue("\n\n\nThe input gene \"{error_genes}\" does not exist in the dataset.")) + need(is.null(anno_genes), + glue("\n\n\nThe input gene \"{anno_genes}\" is in the gene annotation but was not detected in this dataset.")) ) + validate( + need(is.null(not_anno_genes), + glue("\n\n\nThe input gene \"{not_anno_genes}\" was not found in the gene annotation.")) + ) + + # Check if expression is all zero in the brain region + # TODO: fix this to make it work all_zero = ribbon_plot(gene = input$pick_timecourse, region = input_new()$region)$zero - # Display message to the user if there is 0 expression throughout region validate( need(all_zero == FALSE, "This gene has no detected expression in the selected brain region.") ) @@ -437,11 +495,25 @@ server <- function(input, output, session) { need(length(input_new()$gene) > 0, "\n\n\nPlease enter a gene.") ) - # Check ALL inputs against the dataset genes - error_genes <- check_genes(input_new()$gene) + # Check ALL gene inputs against the dataset & annotations + not_data_genes <- check_genes(input_new()$gene, annotation = FALSE) + not_anno_genes <- check_genes(input_new()$gene, annotation = TRUE) + + # Store genes that are within the annotation but not in dataset + if (setequal(not_data_genes, not_anno_genes)){ + anno_genes <- NULL + } else { + anno_genes <- setdiff(not_data_genes, not_anno_genes) + } + validate( - need(is.null(error_genes), - glue("\n\n\nThe input gene \"{error_genes}\" does not exist in the dataset.")) + need(is.null(anno_genes), + glue("\n\n\nThe input gene \"{anno_genes}\" is in the gene annotation but was not detected in this dataset.")) + ) + + validate( + need(is.null(not_anno_genes), + glue("\n\n\nThe input gene \"{not_anno_genes}\" was not found in the gene annotation.")) ) # Load the Cell barcode, 2D coordinates, and selected clustering solution @@ -721,24 +793,32 @@ server <- function(input, output, session) { if(input_new()$mean_exp){ # Check ALL inputs against the dataset genes - error_genes <- check_genes(input_new()$gene) + num_genes <- NULL } else{ # Check only first input against the dataset genes - error_genes <- check_genes(input_new()$gene, 1) + num_genes <- 1 + } + + # Check gene inputs against the dataset & annotations + not_data_genes <- check_genes(input_new()$gene, num_genes, annotation = FALSE) + not_anno_genes <- check_genes(input_new()$gene, num_genes, annotation = TRUE) + + # Store genes that are within the annotation but not in dataset + if (setequal(not_data_genes, not_anno_genes)){ + anno_genes <- NULL + } else { + anno_genes <- setdiff(not_data_genes, not_anno_genes) } validate( - need(is.null(error_genes), - glue("\n\n\nThe input gene \"{error_genes}\" does not exist in the dataset.")) + need(is.null(anno_genes), + glue("\n\n\nThe input gene \"{anno_genes}\" is in the gene annotation but was not detected in this dataset.")) ) - palette_tick_plot <- c("Progenitors/cyc." = "#ffaf49", - "Oligodendrocytes" = "#b7dd5f", - "Astrocytes" = "#00a385", - "Ependymal" = "#8ee5cf", - "Neurons" = "#840200", - "Non-neuroect." = "gray40", - "Other" = "gray60") + validate( + need(is.null(not_anno_genes), + glue("\n\n\nThe input gene \"{not_anno_genes}\" was not found in the gene annotation.")) + ) if (input_new()$mean_exp){ df <- bubble_prep(gene = input_new()$gene, @@ -782,7 +862,7 @@ server <- function(input, output, session) { ylab(y_axis_text) ticks <- ggplot() + add_class_ticks(df, unique(df$Cell_class), - palette = palette_tick_plot, + palette = general_palette, start = -5, sep = 5, height = 30, label_x_pos = -16, fontsize = 3) + # Make sure to expand to the same value that's in p1 expand_limits(x = -18) + @@ -799,5 +879,130 @@ server <- function(input, output, session) { plot_grid(p1, ticks, ncol = 1, align = "v") }) + #### ---- Cell types clustered by expression tab content ---- + + # Initial values, will show up briefly while plot is loading, so set + # large height value so the plot that shows briefly is out of view + heatmap_dims <- reactiveValues(height = "100in", width = "12in") + + output$heatmap <- renderPlot({ + + # Check whether a gene was provided or not + validate( + need(length(input_new()$gene) > 0, "\n\n\n Please enter a gene.") + ) + + # Check if number of genes > 1 - need at least 2 genes to cluster + validate( + need(length(input_new()$gene) > 1, "\n\n\n Please enter more than one gene. Heatmap clustering requires at least two genes.") + ) + + # Check ALL gene inputs against the dataset & annotations + not_data_genes <- check_genes(input_new()$gene, annotation = FALSE) + not_anno_genes <- check_genes(input_new()$gene, annotation = TRUE) + + # Store genes that are within the annotation but not in dataset + if (setequal(not_data_genes, not_anno_genes)){ + anno_genes <- NULL + } else { + anno_genes <- setdiff(not_data_genes, not_anno_genes) + } + + validate( + need(is.null(anno_genes), + glue("\n\n\n The input gene \"{anno_genes}\" is in the gene annotation but was not detected in this dataset.")) + ) + + validate( + need(is.null(not_anno_genes), + glue("\n\n\n The input gene \"{not_anno_genes}\" was not found in the gene annotation.")) + ) + + # Check that at least one cell type has been chosen by the user + validate( + need(length(input_new()$heatmap_cells) > 0, + glue("\n\n\n Please select at least one cell type.")) + ) + + # Get gene expression values for input genes + df <- bubble_prep(gene = input_new()$gene) + + df <- df %>% + # Rename cell classes to more general names + mutate(Cell_class = case_when( + grepl("RGC", Cell_class) | grepl("-P$", Cluster) ~ "Progenitors/cyc.", + grepl("Olig", Cell_class) ~ "Oligodendrocytes", + grepl("Epen", Cell_class) ~ "Ependymal", + grepl("Astr", Cell_class) ~ "Astrocytes", + grepl("[Nn]euron", Cell_class) ~ "Neurons", + grepl("Non-neuro|Immune", Cell_class) ~ "Non-neuroect.", + TRUE ~ "Other" + )) + + # Store this version of the df for later reference + df_for_cellclass <- df + + # Store mean expression for each cluster + df <- df %>% + group_by(Gene, Cluster, Cell_class) %>% + summarize(Expression = mean(Expression)) + + df <- df %>% filter(Cell_class %in% input_new()$heatmap_cells) + + # Select only certain clusters that can be categorized + # (from Selin's code, to be confirmed) + # df <- df %>% filter(grepl("ASTR|EPEN|OL|OPC|EXN", Cluster) & !grepl("^B", Cluster)) + # df <- df %>% filter(!grepl("^B", Cluster)) + + # Pivot the cluster rows to columns + df <- df %>% + select(Gene, Cluster, Expression) %>% + mutate(Expression = as.numeric(Expression)) %>% + tidyr::pivot_wider(names_from = "Cluster", values_from = "Expression") + + # Set NA values in df to 0 (from Selin's code) + df[is.na(df)] <- 0 + + # Flip dataframe over to match the order of user input (bubble_prep does reverse) + df <- df[nrow(df):1,] + + # Convert dataframe values to matrix, set rownames to genes for labeling purposes + mat <- df[,-1] %>% + data.matrix() + rownames(mat) <- df$Gene + + # Set up values for heatmap annotation (cell class bar at top) + hm_anno <- makePheatmapAnno(general_palette, "Cell_class") + hm_anno$anno_row <- left_join(hm_anno$anno_row, + unique(select(df_for_cellclass, Cluster, Cell_class)), by = "Cell_class") + rownames(hm_anno$anno_row) <- hm_anno$anno_row$Cluster + hm_anno$anno_row$Cluster <- NULL # Prevent individual clusters from showing in plot + + # Plot heatmap, store dimensions for dynamically plotting full width + hm <- mat %>% + apply(1, scales::rescale) %>% + t() %>% + pheatmap::pheatmap(border_color = NA, + color = colorRampPalette(c("blue", "white", "red"))(100), + scale = "none", + cluster_rows = TRUE, + cluster_cols = TRUE, + cellwidth = 13, + cellheight = 13, + fontsize = 13, + annotation_col = hm_anno$anno_row, + annotation_colors = hm_anno$side_colors, + na_col = "#e5e5e5") + + heatmap_dims$width <- glue("{get_plot_dims(hm)$width}in") + heatmap_dims$height <- glue("{get_plot_dims(hm)$height}in") + hm + }) + + # Plot the heatmap using dynamic width and height values stored above + output$heatmapUI <- renderUI({ + plotOutput("heatmap", width = heatmap_dims$width, height = heatmap_dims$height) + }) + } diff --git a/clusters/ui.R b/clusters/ui.R index cc612bb..b67f6b4 100644 --- a/clusters/ui.R +++ b/clusters/ui.R @@ -2,7 +2,7 @@ source("../www/ui_functions.R") ui <- function(request){ - + bootstrapPage( # Custom styling @@ -23,15 +23,15 @@ ui <- function(request){ conditionalPanel(condition = '!input.upload', # Gene input field, shared across tabs - selectInput("gene", "Gene", choices = genes_mouse, + selectizeInput(inputId = "gene", label = "Gene", choices = NULL, multiple = TRUE)), conditionalPanel(condition = 'input.upload', # Gene list input with a file, shared across tabs - fileInput(inputId = "genelist", label = "Gene list", + fileInput(inputId = "genelist", label = "Gene list (.txt, .csv, or .tsv)", buttonLabel = "Browse...", multiple = FALSE, - accept = c(".csv", ".tsv"), + accept = c(".txt", ".csv", ".tsv"), placeholder = "No file selected")), materialSwitch("upload", "Use gene list from file", @@ -40,7 +40,7 @@ ui <- function(request){ value = FALSE, right = TRUE), - # Input for dendrogram tab and expression table tab + # Input for dendrogram tab, expression table, and ranked clusters tab conditionalPanel(condition = "(input.tabs == 'dendrogram' || input.tabs == 'exp_table' || input.tabs == 'rank_exp') && (input.gene.length > 1 || input.upload)", materialSwitch("mean_exp", "Display mean expression over the selected genes", @@ -64,9 +64,9 @@ ui <- function(request){ ), - # Input for all tabs other than dendrogram, ranked plot, & table + # Input for all tabs other than dendrogram, expression table, ranked clusters, and heatmap conditionalPanel(condition = "input.tabs != 'dendrogram' && input.tabs != 'exp_table' - && input.tabs != 'rank_exp'", + && input.tabs != 'rank_exp' && input.tabs != 'heatmap'", # Specify the visible label as well as the internal # strings used to refer to each region, matching @@ -76,6 +76,22 @@ ui <- function(request){ "Pons" = "joint_pons")) ), + # Input for heatmap tab + conditionalPanel(condition = "input.tabs == 'heatmap'", + checkboxGroupInput("heatmap_cells", label = "Cell type(s)", + choices = list("Progenitors/cycling" = "Progenitors/cyc.", + "Oligodendrocytes", + "Ependymal", + "Astrocytes", + "Neurons", + "Non-neuroectoderm" = "Non-neuroect.", + "Other"), + selected = c("Oligodendrocytes", + "Ependymal", + "Astrocytes", + "Neurons")) + ), + # Input for tabs on joint analysis by region or by sample conditionalPanel(condition = "input.tabs == 'joint' || input.tabs == 'sample'", @@ -420,6 +436,33 @@ ui <- function(request){ value = "rank_exp" ), + #### ---- Cell types clustered by expression tab output ---- + + tabPanel("Cell types clustered by expression", + + tags$br(), + tags$b("This plot is a heatmap clustering input genes and cell types together based on their mean expression within clusters."), + tags$br(), + tags$br(), + p("• The heatmap's hierarchical clustering method requires at least two genes as input. An error message will display if only one gene is provided"), + + p("• At least one cell type must be selected: an error message will display if none are checked off"), + + p("• The coloured bar above the heatmap provides a categorization of cell clusters by general cell type"), + + p("• The tree to the left of the heatmap indicates the clustering of genes, and the tree above the heatmap indicates the clustering of cell types"), + + # Enable horizontal scrolling for a very wide plot, but no vertical scroll + div(style = "width: 1500px; overflow-x: visible; overflow-y: visible;", + fluidRow( + uiOutput("heatmapUI") %>% ws + ) + ), + + # Specify the value to use when checking if this tab is selected + value = "heatmap" + ), + id = "tabs" ))), diff --git a/www/minimal.css b/www/minimal.css index 056292b..2a9d7a0 100644 --- a/www/minimal.css +++ b/www/minimal.css @@ -120,3 +120,25 @@ a { display: block; } +/* SCROLLBARS - code from https://www.digitalocean.com/community/tutorials/css-scrollbars */ + +/* Works on Firefox */ +* { + scrollbar-width: thin; + scrollbar-color: grey white; +} + +/* Works on Chrome, Edge, and Safari */ +*::-webkit-scrollbar { + width: 12px; +} + +*::-webkit-scrollbar-track { + background: white; +} + +*::-webkit-scrollbar-thumb { + background-color: grey; + border-radius: 20px; + border: 3px solid white; +} \ No newline at end of file