diff --git a/NAMESPACE b/NAMESPACE index c23fd5a..0e30d25 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ export(gintervals.shift_left) export(gintervals.shift_right) export(gintervals.zoom_in) export(gintervals.zoom_out) +export(group_to_hclust) export(gset_genome) export(has_cell_type) export(has_cell_type_colors) diff --git a/R/McCounts.R b/R/McCounts.R index 8906701..de6fa93 100755 --- a/R/McCounts.R +++ b/R/McCounts.R @@ -252,6 +252,7 @@ mcc_read <- function(path, id = NULL, description = NULL, verbose = TRUE) { summarise_bin <- function(mat, bin, intervs, metacells = NULL) { metacells <- metacells %||% colnames(mat) intervs <- as.data.frame(intervs) + if (!has_name(intervs, "peak_name")) { cli_abort("The {.var intervs} must have a column called {.field peak_name}") } @@ -615,8 +616,6 @@ mcc_to_tracks <- function(mc_counts, track_prefix, metacells = NULL, overwrite = gc() }, .parallel = getOption("mcatac.parallel")) - gdb.reload() - cli_alert_success("Created {length(metacells)} tracks at {track_prefix}") mct <- mct_create(genome = mc_counts@genome, tracks = glue("{track_prefix}.mc{metacells}"), metacells = metacells, id = mc_counts@id, description = mc_counts@description, path = mc_counts@path, metadata = mc_counts@metadata, resolution = resolution, window_size = window_size, marginal_track = marginal_track) diff --git a/R/plot-region.R b/R/plot-region.R index ea8700c..e4f3c9d 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -1,3 +1,36 @@ +get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), ...) { + gset_genome(mct@genome) + raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) + if (!is.null(metacells)) { + if (any(metacells %!in% mct@metacells)) { + cli_abort("The following metacells are not in the McTracks object: {.val {metacells}}") + } + raw_mat <- raw_mat[, intersect(metacells, colnames(raw_mat)), drop = FALSE] + } + + mat <- raw_mat[, intersect(mct@metacells[mct@order], colnames(raw_mat)), drop = FALSE] + + if (detect_dca && is.null(hc)) { + if (!has_rna(mct)) { + cli_abort("Cannot detect DCA without either an hclust object or RNA data.") + } + mct <- mct_subset_metacells(mct, colnames(mat)) + hc <- mc_hclust_rna(mct, force_cell_type = force_cell_type) + } + + if (!is.null(hc)) { + if (any(hc$label %!in% colnames(mat))) { + missing_mcs <- setdiff(hc$label, colnames(mat)) + cli_warn("The following metacells are present in the hclust object, but are missing in the matrix (this is probably due to downsampling): {.val {missing_mcs}}") + hc <- dendextend::prune(hc, missing_mcs) + } + mat <- mat[, hc$label] + } + mat_smooth = RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) + rownames(mat_smooth) = rownames(mat) + mat_smooth +} + #' Plot a genomic region #' #' @param mct an McTracks object. @@ -38,7 +71,7 @@ #' } #' #' @export -mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, ...) { +mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), min_atac_sum=10, color_max_by_type=FALSE,...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -72,9 +105,10 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU dca_mat <- mct_diff_access_on_hc(mat, hc = hc, ...) dca_mat <- dca_mat[, hc$order, drop = FALSE] } - + y_seps <- NULL if (!is.null(hc)) { mat <- mat[, hc$order, drop = FALSE] + y_seps <- cutree(hc, h = 0.1) } if (has_cell_type(mct) && has_cell_type_colors(mct)) { @@ -83,8 +117,24 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU } else { mc_colors <- NULL } - - plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels) + if (!is.null(genes_correlations) && has_rna(mct)) { + overlapping_types <- intersect(colnames(mct@rna_egc), colnames(mat)) + rna_legc <- log2(1e-5 + mct@rna_egc) + mat_smooth <- RcppRoll::roll_sum(mat[, overlapping_types], n = n_smooth, fill = c(0,0,0)) + cors <- tgstat::tgs_cor( + t(mat[, overlapping_types]), + t(rna_legc[genes_correlations, overlapping_types, drop = FALSE]), + pairwise.complete.obs = T + ) + mat <- cors + mc_colors <- rep("white", length(genes_correlations)) + colors <- cor_colors + color_breaks <- cor_color_breaks + y_seps <- NULL + dca_mat <- NULL + n_smooth <- 1 + } + plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, color_max_by_type=color_max_by_type) } #' Plot a genomic region given a matrix @@ -98,46 +148,82 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU #' @param dca_mat a matrix with the differential cluster accessibility (DCA) for the plotted regions (optional). Output of \code{mct_diff_access_on_hc}. #' @param n_smooth number of genomic bins to use for smoothing the signal. Signal is smoothed by a rolling sum for each metacell (optional). Default is 20. #' @param n_pixels number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional). -#' @param gene_annot whether to add gene annotations; these annotations rely on the existence of an \code{annots/refGene.txt} file in the genome's misha directory, and on the existence of an intervals set called "intervs.global.tss" in the genome's misha directory. (optional) +#' @param gene_annot whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional) +#' @param gene_annot_pos the position of the gene annotations ("top" or "bottom") #' #' @export -plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE) { +plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", color_max_by_type=FALSE) { mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) if (gene_annot) { if (is.null(intervals) || is.null(resolution)) { cli_abort("If gene annotations are requested, intervals and resolution must be specified") } - layout(cbind(c(0, 0, 3), c(1, 2, 4)), widths = c(1, 20), heights = c(3, 0.5, 15)) - par(mar = c(0, 0, 2, 2)) + # Different layouts depending on annotation position + if (gene_annot_pos == "top") { + layout(cbind(c(0, 0, 3), c(1, 2, 4)), widths = c(1, 20), heights = c(2, 0.5, 15)) + } else if (gene_annot_pos == "bottom") { + layout(cbind(c(3, 0, 0, 0), c(4, 1, 2, 0)), widths = c(1, 20), heights = c(15, 2, 0.5, 0.5)) + } + + par(mar = c(0, 0, 1, 2)) plot_tss_strip(intervals) - par(mar = c(0, 0, 0, 2)) + if (gene_annot_pos == "top") { + par(mar = c(0, 0, 0, 2)) + } else { + par(mar = c(0, 0, 0, 2)) + } gene_annots <- make_gene_annot(intervals, resolution) + if (is.null(gene_annots[["exon_coords"]])) { image(as.matrix(rep(0, ncol(mat)), nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } else { - image(as.matrix(gene_annots[["exon_coords"]], nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) + exon_mat <- as.matrix(gene_annots[["exon_coords"]]) + + image(exon_mat, col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } top_mar <- 0 left_mar <- 2 } else { layout(matrix(1:2, nrow = 1), w = c(1, 20)) - top_mar <- 2 - left_mar <- 1 + top_mar <- 0 + left_mar <- 2 + } + + if (plot_x_axis_ticks && gene_annot_pos == "bottom") { + plot_x_axis_ticks <- FALSE + cli_alert_warning("Gene annotations are at the bottom, so x-axis ticks are disabled.") + } + + if (plot_x_axis_ticks) { + bottom_mar <- 4 + } else { + bottom_mar <- 0 } - par(mar = c(4, left_mar, top_mar, 0)) - image(t(as.matrix(1:length(mc_colors), nrow = 1)), col = mc_colors, yaxt = "n", xaxt = "n") + par(mar = c(bottom_mar, left_mar, top_mar, 0)) + image(t(as.matrix(seq_along(mc_colors), nrow = 1)), col = mc_colors, yaxt = "n", xaxt = "n") - par(mar = c(4, 0, top_mar, 2)) + par(mar = c(bottom_mar, 0, top_mar, 2)) shades <- colorRampPalette(colors)(1000) - mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) - shades_breaks <- approx(color_breaks, n = 1001)$y + if(color_max_by_type){ + shades_breaks <- approx(color_breaks, n = 1001)$y + for(i in 1:length(mc_colors)){ + coli = mat_smooth[,i] + coli[coli > max(color_breaks)] <- max(color_breaks)-1+i + mat_smooth[,i] = coli + } + shades_breaks = c(shades_breaks, max(color_breaks) - 1 + 1:length(mc_colors)) + shades = c(shades, mc_colors) + } else{ + mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) + shades_breaks <- approx(color_breaks, n = 1001)$y + } image(mat_smooth, col = shades, breaks = shades_breaks, yaxt = "n", xaxt = "n") - if (!is.null(intervals)) { + if (!is.null(intervals) && plot_x_axis_ticks) { axis(1, at = seq(0, 1, l = 11), round(seq(intervals$start[1], intervals$end[1], l = 11) / 1e+6, 3), las = 2) } @@ -148,6 +234,13 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " dca_cols <- c(rgb(0, 0, 1, 0.4), rgb(0, 0, 1, 0.15), rgb(0, 0, 0, 0), rgb(1, 0, 0, 0.15), rgb(1, 0, 0, 0.4)) image(dca_mat, col = dca_cols, add = TRUE, breaks = c(-3, -1.5, -0.5, 0.5, 1.5, 3)) } + if (!is.null(y_seps)) { + n <- length(colnames(mat_smooth)) + a <- y_seps[colnames(mat_smooth)] + y_seps <- unname(sapply(split(seq_along(a), a), max)) + 1 + y_bords <- seq(-1 / (2 * (n - 1)), 1 + 1 / (2 * (n - 1)), length = n + 1)[y_seps] + abline(h = y_bords, lty = y_seps_lty, lwd = y_seps_lwd, add = TRUE, col = "grey") + } } plot_tss_strip <- function(intervals) { @@ -161,11 +254,12 @@ plot_tss_strip <- function(intervals) { yaxt = "n" ) + tss_df <- gintervals.neighbors1("intervs.global.tss", intervals) %>% filter(dist == 0) %>% arrange(chrom, start, end, strand, geneSymbol) %>% - distinct(geneSymbol, strand, .keep_all = TRUE) %>% select(chrom, tss = start, strand, gene = geneSymbol) + if (nrow(tss_df) > 0) { text(x = tss_df$tss, y = rep(0.35, length(tss_df$tss)), labels = tss_df$gene, las = 1, cex = 1) line_len <- (intervals$end - intervals$start) * 0.01 @@ -176,6 +270,7 @@ plot_tss_strip <- function(intervals) { filter(strand == -1) %>% pull(tss) + segments(x0 = tss_df$tss, x1 = tss_df$tss, y0 = 0, y1 = 0.2) if (length(plus_tss) > 0) { arrows(x0 = plus_tss, x1 = plus_tss + line_len, y0 = 0.2, y1 = 0.2, length = 0.05) @@ -220,7 +315,7 @@ extend_dca_mat <- function(dca_mat, n_peak_smooth) { #' \code{peak_lf_thresh1}, and a value of 2 means the log fold change was above \code{peak_lf_thresh2}. The same with -1 and -2 for troughs. #' #' @export -mct_diff_access_on_hc <- function(mat, hc, sz_frac_for_peak = 0.25, u_reg = 4, peak_lf_thresh1 = 1.2, peak_lf_thresh2 = 2, trough_lf_thresh1 = -1, trough_lf_thresh2 = -2) { +mct_diff_access_on_hc <- function(mat, hc, sz_frac_for_peak = 0.25, u_reg = 4, peak_lf_thresh1 = 1.2, peak_lf_thresh2 = 2, trough_lf_thresh1 = -1, trough_lf_thresh2 = -2, ...) { if (length(hc$order) != ncol(mat)) { cli_abort("The number of metacells in the matrix and the hclust object do not match.") } diff --git a/R/shiny.R b/R/shiny.R index 320027f..5cafdb3 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -19,7 +19,7 @@ app_ui <- function(request) { titlePanel(title), fluidRow( column( - 12, + 10, wellPanel( fluidRow( align = "left", @@ -112,6 +112,15 @@ app_ui <- function(request) { ) ) ) + ), + column( + 2, + plotOutput( + "scatter_plot", + height = "300px", + width = "300px", + fill = FALSE, + ) ) ), fluidRow( @@ -119,6 +128,12 @@ app_ui <- function(request) { plotOutput( "region_plot", height = "50vh", + fill = FALSE, + hover = hoverOpts( + id = "region_hover", + delayType = "throttle", + delay = 250 + ), brush = brushOpts( id = "region_brush", direction = "x", @@ -126,15 +141,27 @@ app_ui <- function(request) { delay = 1000 ) ) + ), + shinycssloaders::withSpinner( + plotOutput( + "region_plot_cor", + height = "20px", + fill = FALSE, + click = "region_plot_cor_click" + ) + ), ) - ) - ) + ), + uiOutput("hover_info", style = "pointer-events: none") ) } app_server <- function(input, output, session) { intervals <- reactiveVal() globals <- reactiveValues() + mat <- reactiveVal() + mat2 <- reactiveVal() + scatter_data <- reactiveVal() if (!is.null(shiny_hc)) { hc <- shiny_hc @@ -224,6 +251,64 @@ app_server <- function(input, output, session) { } }) + observeEvent(input$region_plot_cor_click, { + mat_x <- input$region_plot_cor_click$x + req(mat_x) + mat_y <- input$region_plot_cor_click$y + req(mat_y) + m <- mat() + req(m) + mat_idxs <- get_heatmap_idx(mat_x, mat_y, m) + + gene <- promoters[promoters$coords == input$genes, ]$geneSymbol + req(gene %in% rownames(shiny_mct@rna_egc)) + + mcs <- intersect(colnames(shiny_mct@rna_egc), colnames(m)) + + data <- data.frame(atac = m[mat_idxs[1], mcs], rna = shiny_mct@rna_egc[gene, mcs], metacell = mcs) %>% + mutate(rna = log2(rna + 1e-5)) %>% + left_join(shiny_mct@metadata, by = "metacell") %>% + mutate(coords = rownames(m)[mat_idxs[1]]) + + scatter_data(data) + }) + + output$scatter_plot <- renderPlot({ + data <- scatter_data() + req(data) + + data %>% + ggplot(aes(x = atac, y = rna, color = color)) + + geom_point(size = 4) + + scale_color_identity() + + theme_classic() + + xlab("ATAC") + + ylab("RNA") + + ggtitle(glue("Correlation: {round(cor(data$atac, data$rna), digits = 2)} Coverage:{sum(data$atac)}"), subtitle = data$coords[1]) + }) + + observe({ + req(intervals()) + req(input$n_smooth) + req(input$n_smooth >= 1) + n_smooth <- max(round(input$n_smooth / shiny_mct@resolution), 1) + raw_mat <- get_raw_mat( + shiny_mct, intervals(), + downsample = TRUE, + downsample_n = 20000000, + detect_dca = input$detect_dca %||% FALSE, + gene_annot = TRUE, + hc = shiny_hc, + peak_lf_thresh1 = input$dca_peak_lf_thresh, + trough_lf_thresh1 = input$dca_trough_lf_thresh, + sz_frac_for_peak = input$dca_sz_frac_for_peak, + color_breaks = color_breaks, + n_smooth = n_smooth, + plot_x_axis_ticks = FALSE + ) + mat(raw_mat) + }) + output$region_plot <- renderPlot( { req(intervals()) @@ -235,6 +320,7 @@ app_server <- function(input, output, session) { req(input$min_color < input$max_color) req(input$n_smooth) req(input$n_smooth >= 1) + req(mat()) color_breaks <- c(0, seq( input$min_color, input$max_color, @@ -245,6 +331,9 @@ app_server <- function(input, output, session) { mct_plot_region( shiny_mct, intervals(), + downsample = TRUE, + downsample_n = 20000000, + colors = c("white", "gray", "darkgrey", "black"), detect_dca = input$detect_dca %||% FALSE, gene_annot = TRUE, hc = hc, @@ -252,7 +341,60 @@ app_server <- function(input, output, session) { trough_lf_thresh1 = input$dca_trough_lf_thresh, sz_frac_for_peak = input$dca_sz_frac_for_peak, color_breaks = color_breaks, - n_smooth = n_smooth + n_smooth = n_smooth, + plot_x_axis_ticks = FALSE, + color_max_by_type = TRUE + ) + }, + res = 96 + ) %>% + bindCache( + intervals(), + input$detect_dca, + input$dca_peak_lf_thresh, + input$dca_trough_lf_thresh, + input$dca_sz_frac_for_peak, + input$min_color, + input$max_color, + input$n_smooth + ) + output$region_plot_cor <- renderPlot( + { + req(intervals()) + req(input$dca_peak_lf_thresh) + req(input$dca_trough_lf_thresh) + req(input$dca_sz_frac_for_peak) + req(has_rna(shiny_mct)) + req(input$min_color) + req(input$max_color) + req(input$min_color < input$max_color) + req(input$n_smooth) + req(input$n_smooth >= 1) + color_breaks <- c(0, seq( + input$min_color, + input$max_color, + length.out = 4 + )) + + n_smooth <- max(round(input$n_smooth / shiny_mct@resolution), 1) + + mct_plot_region( + shiny_mct, intervals(), + downsample = TRUE, + downsample_n = 20000000, + detect_dca = input$detect_dca %||% FALSE, + gene_annot = FALSE, + hc = NULL, + peak_lf_thresh1 = input$dca_peak_lf_thresh, + trough_lf_thresh1 = input$dca_trough_lf_thresh, + sz_frac_for_peak = input$dca_sz_frac_for_peak, + color_breaks = color_breaks, + n_smooth = n_smooth, + plot_x_axis_ticks = FALSE, + roll_mean = TRUE, + genes_correlations = promoters[promoters$coords == input$genes, ]$geneSymbol, + cor_color_breaks = c(-1.0, -0.5, 0, 0.5, 1.0), + cor_colors = c("blue", "white", "white", "white", "red") ) }, res = 96 @@ -268,6 +410,7 @@ app_server <- function(input, output, session) { input$n_smooth ) + color_max_by_type = TRUE output$current_coords <- renderText({ if (is.null(intervals())) { return("Please enter valid genomic coordinates (e.g. \"chr3:34300000-35000020\")") @@ -314,6 +457,41 @@ app_server <- function(input, output, session) { update_intervals(zoom_intervals) }) + output$hover_info <- renderUI({ + hover <- input$region_hover + req(hover) + raw_mat <- mat() + req(raw_mat) + mat_idxs <- get_heatmap_idx(hover$x, hover$y, raw_mat) + + mc <- colnames(raw_mat)[mat_idxs[2]] + req(mc) + mc_metadata <- mct@metadata %>% + filter(metacell == mc) %>% + slice(1) + mc_color <- mc_metadata$color + mc_cell_type <- mc_metadata$cell_type + coords <- rownames(raw_mat)[mat_idxs[1]] + + # taken from https://gitlab.com/-/snippets/16220 + left_px <- hover$coords_css$x + top_px <- hover$coords_css$y + style <- glue( + "background-color: {grDevices::rgb(t(grDevices::col2rgb(mc_color) / 255))}; position:absolute; z-index:100; left: {left_px + 2}px; top: {top_px + 200}px;" + ) + tooltip <- paste( + glue("Metacell: {mc}"), + glue("Cell type: {mc_cell_type}"), + glue("Coordinates: {coords}"), + sep = "
" + ) + + wellPanel( + style = style, + p(HTML(tooltip)) + ) + }) + observe({ shinyjs::toggle(id = "dca_peak_lf_thresh", condition = input$detect_dca && input$show_controls) shinyjs::toggle(id = "dca_trough_lf_thresh", condition = input$detect_dca && input$show_controls) @@ -355,6 +533,7 @@ parse_coordinate_text <- function(text) { #' #' @param mct MCT object #' @param hc an hclust object with clustering of the metacells (optional, see \code{mct_plot_region} +#' @param annotations an intervals object with annotations for the second genome (optional) #' #' @examples #' \dontrun{ @@ -366,6 +545,7 @@ run_app <- function(mct, hc = NULL, port = NULL, host = NULL, + annotations = NULL, launch.browser = FALSE) { library(misha) library(shiny) @@ -377,6 +557,8 @@ run_app <- function(mct, shiny_hc <<- NULL } gset_genome(mct@genome, force = FALSE) + shiny_annotations <<- annotations + shiny::shinyApp( ui = app_ui, server = app_server, diff --git a/R/utils-hclust.R b/R/utils-hclust.R new file mode 100644 index 0000000..92e442b --- /dev/null +++ b/R/utils-hclust.R @@ -0,0 +1,55 @@ +#' Generate an hclust object from a data.frame with cell_type, group and order columns +#' +#' @param df data.frame with cell_type, group and order +#' +#' @return hclust object with cell_type as labels and order as order of the leaves +#' +#' @examples +#' df <- data.frame( +#' cell_type = c( +#' "PGC", "Surface ectoderm", "Neural crest", +#' "Neural tube/Floor plate", "Forebrain/Midbrain/Hindbrain", "Caudal neural plate", +#' "Rostral neural plate", "Neural plate boundary", "Definitive ectoderm", +#' "Epiblast", "Primitive streak", "Caudal epiblast", "Tail bud - neural", +#' "Tail bud - mesoderm", "Early nascent mesoderm" +#' ), order = 1:15, +#' group = c( +#' "ecto", "ecto", "ecto", "ecto", "ecto", "ecto", +#' "ecto", "ecto", "ecto", "early", "early", "early", "early", +#' "Meso", "Meso" +#' ) +#' ) +#' +#' group_to_hclust(df) +#' +#' @export +group_to_hclust <- function(df) { + # Create an empty matrix of 1s + distance_matrix <- matrix(1, nrow = nrow(df), ncol = nrow(df)) + rownames(distance_matrix) <- df$cell_type + colnames(distance_matrix) <- df$cell_type + + # set the distance between elements the same group to 0 (using df$group) + for (grp in unique(df$group)) { + distance_matrix[df$group == grp, df$group == grp] <- 0 + } + + # Hierarchical clustering + hc <- hclust(as.dist(distance_matrix)) + + # Label leaves with cell_type + hc$labels <- df$cell_type + + hc <- as.hclust(reorder(as.dendrogram(hc), df$order, agglo.FUN = mean)) + + return(hc) +} + +get_heatmap_idx <- function(x, y, m) { + n_x <- nrow(m) + n_y <- ncol(m) + x_idx <- floor(x * n_x) + 1 + + y_idx <- n_y - max(floor(y * n_y), 0) + return(c(x_idx, y_idx)) +} diff --git a/man/group_to_hclust.Rd b/man/group_to_hclust.Rd new file mode 100644 index 0000000..d104ed4 --- /dev/null +++ b/man/group_to_hclust.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-multi-genomes.R +\name{group_to_hclust} +\alias{group_to_hclust} +\title{Generate an hclust object from a data.frame with cell_type, group and order columns} +\usage{ +group_to_hclust(df) +} +\arguments{ +\item{df}{data.frame with cell_type, group and order} +} +\value{ +hclust object with cell_type as labels and order as order of the leaves +} +\description{ +Generate an hclust object from a data.frame with cell_type, group and order columns +} +\examples{ +df <- data.frame( + cell_type = c( + "PGC", "Surface ectoderm", "Neural crest", + "Neural tube/Floor plate", "Forebrain/Midbrain/Hindbrain", "Caudal neural plate", + "Rostral neural plate", "Neural plate boundary", "Definitive ectoderm", + "Epiblast", "Primitive streak", "Caudal epiblast", "Tail bud - neural", + "Tail bud - mesoderm", "Early nascent mesoderm" + ), order = 1:15, + group = c( + "ecto", "ecto", "ecto", "ecto", "ecto", "ecto", + "ecto", "ecto", "ecto", "early", "early", "early", "early", + "Meso", "Meso" + ) +) + +group_to_hclust(df) + +} diff --git a/man/mct_diff_access_on_hc.Rd b/man/mct_diff_access_on_hc.Rd index a1f3f76..e48aab1 100644 --- a/man/mct_diff_access_on_hc.Rd +++ b/man/mct_diff_access_on_hc.Rd @@ -12,7 +12,8 @@ mct_diff_access_on_hc( peak_lf_thresh1 = 1.2, peak_lf_thresh2 = 2, trough_lf_thresh1 = -1, - trough_lf_thresh2 = -2 + trough_lf_thresh2 = -2, + ... ) } \arguments{ diff --git a/man/mct_plot_region.Rd b/man/mct_plot_region.Rd index cd9ffd3..2055717 100644 --- a/man/mct_plot_region.Rd +++ b/man/mct_plot_region.Rd @@ -18,6 +18,13 @@ mct_plot_region( gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, + plot_x_axis_ticks = TRUE, + gene_annot_pos = "top", + flip = FALSE, + genes_correlations = NULL, + cor_colors = c("blue", "white", "white", "white", "red"), + cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), + roll_mean = FALSE, ... ) } @@ -42,12 +49,16 @@ mct_plot_region( \item{force_cell_type}{do not split cell types when ordering the metacells. Default: TRUE} -\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of an \code{annots/refGene.txt} file in the genome's misha directory, and on the existence of an intervals set called "intervs.global.tss" in the genome's misha directory. (optional)} +\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional)} \item{n_smooth}{number of genomic bins to use for smoothing the signal. Signal is smoothed by a rolling sum for each metacell (optional). Default is 20.} \item{n_pixels}{number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional).} +\item{gene_annot_pos}{the position of the gene annotations ("top" or "bottom")} + +\item{flip}{whether to flip the coordinates (optional)} + \item{...}{ Arguments passed on to \code{\link[=mct_diff_access_on_hc]{mct_diff_access_on_hc}} \describe{ diff --git a/man/plot_region_mat.Rd b/man/plot_region_mat.Rd index 35a7670..5830a72 100644 --- a/man/plot_region_mat.Rd +++ b/man/plot_region_mat.Rd @@ -12,9 +12,17 @@ plot_region_mat( intervals = NULL, resolution = NULL, dca_mat = NULL, + y_seps = NULL, + y_seps_lty = 2, + y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, - gene_annot = FALSE + gene_annot = FALSE, + genome = NULL, + plot_x_axis_ticks = TRUE, + gene_annot_pos = "top", + flip = FALSE, + roll_mean = FALSE ) } \arguments{ @@ -36,7 +44,13 @@ plot_region_mat( \item{n_pixels}{number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional).} -\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of an \code{annots/refGene.txt} file in the genome's misha directory, and on the existence of an intervals set called "intervs.global.tss" in the genome's misha directory. (optional)} +\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional)} + +\item{genome}{the genome to use for the gene annotations (optional)} + +\item{gene_annot_pos}{the position of the gene annotations ("top" or "bottom")} + +\item{flip}{whether to flip the coordinates (optional)} } \description{ Plot a genomic region given a matrix