From 4e651cc6cdba2a329bd99ff68a601c94c5966cdd Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Mon, 7 Aug 2023 10:28:43 +0300 Subject: [PATCH 01/16] show alternative TSSs --- R/plot-region.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot-region.R b/R/plot-region.R index ea8700c..0d63f0a 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -164,7 +164,7 @@ plot_tss_strip <- function(intervals) { tss_df <- gintervals.neighbors1("intervs.global.tss", intervals) %>% filter(dist == 0) %>% arrange(chrom, start, end, strand, geneSymbol) %>% - distinct(geneSymbol, strand, .keep_all = TRUE) %>% +# distinct(geneSymbol, strand, .keep_all = TRUE) %>% select(chrom, tss = start, strand, gene = geneSymbol) if (nrow(tss_df) > 0) { text(x = tss_df$tss, y = rep(0.35, length(tss_df$tss)), labels = tss_df$gene, las = 1, cex = 1) From 3fbdbbde0d959d8b531fe83f6622a80565c0e28f Mon Sep 17 00:00:00 2001 From: aviezerl Date: Tue, 8 Aug 2023 14:28:07 +0300 Subject: [PATCH 02/16] Added group_to_hclust function --- NAMESPACE | 1 + R/utils-multi-genomes.R | 283 ++++++++++++++++++++++++++++++++++++++++ man/group_to_hclust.Rd | 36 +++++ 3 files changed, 320 insertions(+) create mode 100644 R/utils-multi-genomes.R create mode 100644 man/group_to_hclust.Rd diff --git a/NAMESPACE b/NAMESPACE index c23fd5a..0e30d25 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ export(gintervals.shift_left) export(gintervals.shift_right) export(gintervals.zoom_in) export(gintervals.zoom_out) +export(group_to_hclust) export(gset_genome) export(has_cell_type) export(has_cell_type_colors) diff --git a/R/utils-multi-genomes.R b/R/utils-multi-genomes.R new file mode 100644 index 0000000..359c3a3 --- /dev/null +++ b/R/utils-multi-genomes.R @@ -0,0 +1,283 @@ +flipStrandTricky <- function(strand, flip) { + strandCodes <- c("+" = 1L, "-" = -1L, "*" = 0L) + strandInt <- strandCodes[as.vector(strand)] + flipped <- ifelse(flip, strandInt * -1L, strandInt) + 2L + strandRevCodes <- factor(c("-", "*", "+"), levels(rtracklayer::strand())) + strandRevCodes[as.vector(flipped)] +} + +custom_lift <- function(x, chain) { + liftOverSpace <- function(gr, chain, subind) { + r <- rtracklayer::ranges(gr) + ol <- IRanges::findOverlaps(r, rtracklayer::ranges(chain)) + shits <- S4Vectors::subjectHits(ol) + r <- IRanges::overlapsRanges(r, rtracklayer::ranges(chain), ol) + rev <- as.vector(rtracklayer::reversed(chain)[shits]) + starts <- ifelse(rev, + start(IRanges::reflect(r, rtracklayer::ranges(chain)[shits])), + start(r) + ) + strand <- flipStrandTricky(rtracklayer::strand(gr)[S4Vectors::queryHits(ol)], rev) + r <- IRanges::IRanges(starts, width = rtracklayer::width(r)) + offsets <- rtracklayer::offset(chain)[shits] + spaces <- rtracklayer::space(chain)[shits] + chainscore <- rtracklayer::score(chain)[shits] + ind[[as.character(GenomeInfoDb::seqnames(gr)[1])]] <<- subind[S4Vectors::queryHits(ol)] + GenomicRanges::GRanges(spaces, + IRanges::IRanges(start(r) - offsets, end(r) - offsets), + strand = strand, + rtracklayer::values(gr)[S4Vectors::queryHits(ol), , drop = FALSE], + chainscore + ) + } + rl <- split(x, GenomeInfoDb::seqnames(x), drop = TRUE) + unchainedNames <- setdiff(names(rl), names(chain)) + if (length(unchainedNames)) { + message( + "Discarding unchained sequences: ", + paste(unchainedNames, collapse = ", ") + ) + } + sharedNames <- intersect(names(rl), names(chain)) + ind <- split( + seq_len(length(x)), + as.vector(GenomeInfoDb::seqnames(x)) + )[sharedNames] + liftedList <- mapply(liftOverSpace, rl[sharedNames], + chain[sharedNames], ind, + SIMPLIFY = FALSE + ) + lifted <- unlist(GenomicRanges::GRangesList(liftedList), use.names = FALSE) + + f <- structure(as.integer(unlist(ind, use.names = FALSE)), + levels = seq_len(length(x)), class = "factor" + ) + + GenomicRanges::GRangesList(setNames(split(lifted, f), names(x))) +} + + +custom_lift2 <- function(x, chain, maxgap = 1000) { + test2 <- custom_lift(x, chain) + test3 <- as.data.frame(test2) %>% + group_by(across(c(-start, -end, -width))) %>% + summarise(start = min(start), end = max(end), widthsum = sum(width)) %>% + ungroup() + test3$alt_start <- test3$start - 1 + test3$match <- test3$widthsum / (test3$end - test3$alt_start) + atest2 <- as.data.frame(test2) %>% + left_join( + ungroup(test3) %>% + select(seqnames, strand, chainscore, lstart = alt_start, lend = end, widthsum, match) %>% + mutate(lrow_id = row_number()), + by = join_by(seqnames, strand, chainscore) + ) + btest2 <- atest2 %>% + group_by( + group, group_name, seqnames, strand, chainscore, lrow_id + ) %>% + arrange( + start, + .by_group = TRUE + ) %>% + mutate( + gap = start - lag(end) + ) %>% + mutate( + nbreaks = cumsum(ifelse(is.na(gap > maxgap), 0, gap > maxgap)) + ) %>% + group_by( + nbreaks, + .add = TRUE + ) %>% + summarise( + start = min(start), end = max(end), widthsum = sum(width) + ) %>% + ungroup() + btest2$alt_start <- btest2$start - 1 + btest2$match <- btest2$widthsum / (btest2$end - btest2$alt_start) + return(list(full = test2, gapped = btest2, cont = test3)) +} + +translate_intervals <- function(intervals, chain, chainscore = NULL) { + r <- regioneR::toGRanges(intervals %>% rowid_to_column("row_ID") %>% select(chrom, start, end, row_ID)) + lifted_list <- custom_lift2(r, chain) + + if (is.null(chainscore)) { + chainscore <- max(lifted_list$cont$chainscore) + } + gset_genome(chain@metadata[["genome2"]]) + ff_lifted_list <- gintervals.force_range( + lifted_list$cont %>% + filter(chainscore == !!chainscore) %>% + select(chrom = seqnames, start, end, row_ID) %>% + as.data.frame() %>% + select(chrom, start, end, everything()) + ) + gset_genome(chain@metadata[["genome1"]]) + ff_lifted_list +} + +translate_and_center <- function(intervals, chain, chainscore = NULL) { + r <- regioneR::toGRanges(intervals) + lifted_list <- custom_lift2(r, chain) + if (!is.null(chainscore)) { + intervs2 <- lifted_list$cont %>% + filter(chainscore == !!chainscore) %>% + select(chrom = seqnames, start, end) %>% + as.data.frame() + } else { + intervs2 <- lifted_list$cont %>% + arrange(desc(chainscore)) %>% + ungroup() %>% + slice(1) %>% + select(chrom = seqnames, start, end) %>% + as.data.frame() + } + expand <- round(abs(intervals$end - intervals$start) / 2) + intervs2 <- misha.ext::gintervals.centers(intervs2) %>% + mutate(start = start - expand, end = end + expand) + gset_genome(chain@metadata[["genome2"]]) + intervs2 <- gintervals.force_range(intervs2) + gset_genome(chain@metadata[["genome1"]]) + return(intervs2) +} + + +orth_one_per_element <- function(comparison_obj, width = 10) { + comparison_obj$mid1 <- comparison_obj$start1 + as.integer((comparison_obj$end1 - comparison_obj$start1) / 2) + comparison_obj$mid2 <- comparison_obj$start2 + as.integer((comparison_obj$end2 - comparison_obj$start2) / 2) + comparison_obj$start1 <- comparison_obj$mid1 + comparison_obj$end1 <- comparison_obj$mid1 + width + comparison_obj$start2 <- comparison_obj$mid2 + comparison_obj$end2 <- comparison_obj$mid2 + width + comparison_obj[, 1:6] +} + +load_chain <- function(chain, genome1 = NULL, genome2 = NULL) { + if (is.character(chain)) { + cli::cli_alert_info("Loading {genome1}->{genome2} chain {.file {chain}}") + chain <- rtracklayer::import.chain(chain) + cli::cli_alert_success("Loaded chain {.file {chain}} successfully") + } + + chain@metadata <- list(genome1 = genome1, genome2 = genome2) + + return(chain) +} + +compute_annotation_comparison <- function(annot1, annot2, intervals, intervals2, chain_1_to_2, chain_2_to_1, selected_chain_chainscore = NULL) { + annot1 <- annot1 %>% + gintervals.neighbors1(intervals) %>% + filter(dist == 0) + annot1_colors <- chameleon::distinct_colors(nrow(annot1))$name + + lifted_annot1 <- translate_intervals(as.data.frame(annot1), chain = chain_1_to_2, chainscore = selected_chain_chainscore) %>% + arrange(row_ID) + + annot2 <- annot2 %>% + filter( + chrom == intervals2$chrom, + start >= intervals2$start, + end <= intervals2$end + ) + annot2_colors <- chameleon::distinct_colors(nrow(annot1) + nrow(annot2))$name[(nrow(annot1) + 1):(nrow(annot1) + nrow(annot2))] + lifted_annot2 <- translate_intervals(as.data.frame(annot2), chain = chain_2_to_1, chainscore = selected_chain_chainscore) %>% + arrange(row_ID) + list( + annot1 = annot1 %>% mutate(x1 = (start - intervals$start) / (intervals$end - intervals$start)), + lifted_annot1 = lifted_annot1 %>% mutate(x2 = (start - intervals2$start) / (intervals2$end - intervals2$start)), + annot1_colors = annot1_colors, + annot2 = annot2 %>% mutate(x2 = (start - intervals2$start) / (intervals2$end - intervals2$start)), + lifted_annot2 = lifted_annot2 %>% mutate(x1 = (start - intervals$start) / (intervals$end - intervals$start)), + annot2_colors = annot2_colors + ) +} + + +compute_intervals_comparison <- function(intervals, intervals2, chain, chainscore = NULL, grid_resolution = 100) { + grid_resolution <- round((intervals$end - intervals$start) / grid_resolution) + + # create a grid iterator for the first intervals set + i1 <- tibble( + chrom = intervals$chrom, + end = seq(intervals$start, intervals$end, grid_resolution), + start = end - grid_resolution + ) %>% + mutate(end = pmin(end, intervals$end)) %>% + as.data.frame() + + + i12 <- translate_intervals(i1, chain, chainscore = chainscore) + + i12 <- i1 %>% + rowid_to_column("row_ID") %>% + left_join(i12 %>% rename(chrom1 = chrom, start1 = start, end1 = end), by = join_by(row_ID)) %>% + as_tibble() + + + # translate start1 and start2 to relative coordinates (from 0 to 1) according to intervals1 and intervals2 + i12 <- i12 %>% + mutate( + x1 = (start - intervals$start) / (intervals$end - intervals$start), + x2 = (start1 - intervals2$start) / (intervals2$end - intervals2$start), + x2 = ifelse(is.na(chrom1) | as.character(chrom1) != as.character(intervals2$chrom), NA, x2) + ) + + return(i12) +} + +is_comparison_flipped <- function(intervals_comparison) { + i12 <- intervals_comparison + + cr <- i12 %>% + mutate( + mid1 = start + (end - start) / 2, + mid2 = start1 + (end1 - start1) / 2 + ) %>% + summarise(cr = cor(mid1, mid2, use = "pairwise.complete.obs")) + + return(cr < 0) +} + +plot_intervals_comparison <- function(intervals_comparison, annotations = NULL, grid_resolution = 100) { + i12 <- intervals_comparison + + if (is_comparison_flipped(i12)) { + i12$x2 <- 1 - i12$x2 + } + # Set the plot parameters + plot(1, 1, xlim = c(0, 1), ylim = c(-0.02, 1.02), type = "n", ann = FALSE, axes = FALSE) + + segments(x0 = i12$x1, y0 = 1, x1 = i12$x1, y1 = 0.99, lwd = 1) + text(x = i12$x1, y = 0.96, labels = round(i12$start / 1e+6, 3), srt = 90, adj = c(1, 0.5), xpd = TRUE, cex = 0.5) + segments(x0 = i12$x1, y0 = 0.75, x1 = i12$x1, y1 = 0.7, lwd = 1, col = ifelse(1:length(i12$x1) %% round(grid_resolution / 10) == 0, "black", "grey")) + segments(x0 = i12$x1, y0 = 0.7, x1 = i12$x2, y1 = 0.27, lwd = 1, col = ifelse(1:length(i12$x1) %% round(grid_resolution / 10) == 0, "black", "grey")) + segments(x0 = i12$x2, y0 = 0.27, x1 = i12$x2, y1 = 0.25, lwd = 1, col = ifelse(1:length(i12$x1) %% round(grid_resolution / 10) == 0, "black", "grey")) + text(x = i12$x2[i12$x2 <= 1 & i12$x2 >= 0], y = 0.2, labels = round(i12$start1[i12$x2 <= 1 & i12$x2 >= 0] / 1e+6, 3), srt = 90, adj = c(1, 0.5), xpd = TRUE, cex = 0.5) + segments(x0 = i12$x2, y0 = 0, x1 = i12$x2, y1 = 0.01, lwd = 1) + if (!is.null(annotations)) { + annot1 <- annotations[["annot1"]] + lifted_annot1 <- annotations[["lifted_annot1"]] + annot1_colors <- annotations[["annot1_colors"]] + annot2 <- annotations[["annot2"]] + lifted_annot2 <- annotations[["lifted_annot2"]] + annot2_colors <- annotations[["annot2_colors"]] + if (is_comparison_flipped(i12)) { + annot2$x2 <- 1 - annot2$x2 + lifted_annot1$x2 <- 1 - lifted_annot1$x2 + } + points( + x = c(annot1$x1, lifted_annot2$x1), + y = c(rnorm(nrow(annot1), sd = 0.001) + 1.019, rnorm(nrow(lifted_annot2), sd = 0.001) + 0.981), + col = c(annot1_colors, annot2_colors), + pch = 3, cex = 0.5 + ) + points( + x = c(annot2$x2, lifted_annot1$x2), + y = c(rnorm(nrow(annot2), sd = 0.001) - 0.019, rnorm(nrow(lifted_annot1), sd = 0.001) + 0.019), + col = c(annot2_colors, annot1_colors), + pch = 3, cex = 0.5 + ) + } +} diff --git a/man/group_to_hclust.Rd b/man/group_to_hclust.Rd new file mode 100644 index 0000000..d104ed4 --- /dev/null +++ b/man/group_to_hclust.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-multi-genomes.R +\name{group_to_hclust} +\alias{group_to_hclust} +\title{Generate an hclust object from a data.frame with cell_type, group and order columns} +\usage{ +group_to_hclust(df) +} +\arguments{ +\item{df}{data.frame with cell_type, group and order} +} +\value{ +hclust object with cell_type as labels and order as order of the leaves +} +\description{ +Generate an hclust object from a data.frame with cell_type, group and order columns +} +\examples{ +df <- data.frame( + cell_type = c( + "PGC", "Surface ectoderm", "Neural crest", + "Neural tube/Floor plate", "Forebrain/Midbrain/Hindbrain", "Caudal neural plate", + "Rostral neural plate", "Neural plate boundary", "Definitive ectoderm", + "Epiblast", "Primitive streak", "Caudal epiblast", "Tail bud - neural", + "Tail bud - mesoderm", "Early nascent mesoderm" + ), order = 1:15, + group = c( + "ecto", "ecto", "ecto", "ecto", "ecto", "ecto", + "ecto", "ecto", "ecto", "early", "early", "early", "early", + "Meso", "Meso" + ) +) + +group_to_hclust(df) + +} From f5eb5c36bdcca628143f704c75d128a537c56bc4 Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Wed, 9 Aug 2023 13:09:17 +0300 Subject: [PATCH 03/16] added horizontal separators on heatmap --- R/plot-region.R | 82 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index 0d63f0a..5e6ae34 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -72,9 +72,10 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU dca_mat <- mct_diff_access_on_hc(mat, hc = hc, ...) dca_mat <- dca_mat[, hc$order, drop = FALSE] } - + y_seps <- NULL if (!is.null(hc)) { mat <- mat[, hc$order, drop = FALSE] + y_seps = cutree(hc, h=0.1) } if (has_cell_type(mct) && has_cell_type_colors(mct)) { @@ -83,8 +84,7 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU } else { mc_colors <- NULL } - - plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels) + plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps=y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, flip = flip) } #' Plot a genomic region given a matrix @@ -99,9 +99,13 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU #' @param n_smooth number of genomic bins to use for smoothing the signal. Signal is smoothed by a rolling sum for each metacell (optional). Default is 20. #' @param n_pixels number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional). #' @param gene_annot whether to add gene annotations; these annotations rely on the existence of an \code{annots/refGene.txt} file in the genome's misha directory, and on the existence of an intervals set called "intervs.global.tss" in the genome's misha directory. (optional) +#' @param gene_annot whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional) +#' @param genome the genome to use for the gene annotations (optional) +#' @param gene_annot_pos the position of the gene annotations ("top" or "bottom") +#' @param flip whether to flip the coordinates (optional) #' #' @export -plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE) { +plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps=NULL, y_seps_lty=2, y_seps_lwd=1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE) { mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) if (gene_annot) { @@ -110,15 +114,35 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " } layout(cbind(c(0, 0, 3), c(1, 2, 4)), widths = c(1, 20), heights = c(3, 0.5, 15)) + # Different layouts depending on annotation position + if (gene_annot_pos == "top") { + layout(cbind(c(0, 0, 3), c(1, 2, 4)), widths = c(1, 20), heights = c(3, 0.5, 15)) + } else if (gene_annot_pos == "bottom") { + layout(cbind(c(3, 0, 0, 0), c(4, 1, 2, 0)), widths = c(1, 20), heights = c(15, 3, 0.5, 0.5)) + } + par(mar = c(0, 0, 2, 2)) - plot_tss_strip(intervals) + plot_tss_strip(intervals, flip = flip) par(mar = c(0, 0, 0, 2)) gene_annots <- make_gene_annot(intervals, resolution) + if (gene_annot_pos == "top") { + par(mar = c(0, 0, 0, 2)) + } else { + par(mar = c(0, 0, 0, 2)) + } + gene_annots <- make_gene_annot(intervals, resolution, genome) + if (is.null(gene_annots[["exon_coords"]])) { image(as.matrix(rep(0, ncol(mat)), nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } else { image(as.matrix(gene_annots[["exon_coords"]], nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) + exon_mat <- t(as.matrix(gene_annots[["exon_coords"]])) + if (flip) { + exon_mat <- exon_mat[, ncol(exon_mat):1, drop = FALSE] + } + + image(exon_mat, col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } top_mar <- 0 @@ -126,18 +150,34 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " } else { layout(matrix(1:2, nrow = 1), w = c(1, 20)) top_mar <- 2 - left_mar <- 1 + left_mar <- 2 } par(mar = c(4, left_mar, top_mar, 0)) image(t(as.matrix(1:length(mc_colors), nrow = 1)), col = mc_colors, yaxt = "n", xaxt = "n") + if (plot_x_axis_ticks && gene_annot_pos == "bottom") { + plot_x_axis_ticks <- FALSE + cli_alert_warning("Gene annotations are at the bottom, so x-axis ticks are disabled.") + } - par(mar = c(4, 0, top_mar, 2)) + if (plot_x_axis_ticks) { + bottom_mar <- 4 + } else { + bottom_mar <- 0 + } + + par(mar = c(bottom_mar, left_mar, top_mar, 0)) + image(t(as.matrix(seq_along(mc_colors), nrow = 1)), col = mc_colors, yaxt = "n", xaxt = "n") + + par(mar = c(bottom_mar, 0, top_mar, 2)) shades <- colorRampPalette(colors)(1000) mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) shades_breaks <- approx(color_breaks, n = 1001)$y - image(mat_smooth, col = shades, breaks = shades_breaks, yaxt = "n", xaxt = "n") - if (!is.null(intervals)) { + if (flip) { + mat_smooth <- mat_smooth[nrow(mat_smooth):1, , drop = FALSE] + } + image(mat_smooth, col = shades, breaks = shades_breaks, yaxt = "n", xaxt = "n", ylim=c(0,1)) + if (!is.null(intervals) && plot_x_axis_ticks) { axis(1, at = seq(0, 1, l = 11), round(seq(intervals$start[1], intervals$end[1], l = 11) / 1e+6, 3), las = 2) } @@ -146,11 +186,21 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " cli_alert_info("Extending DCAs with {.val {n_peak_smooth}} bins. This can be tweaked using the {.field n_pixels} paramater.") dca_mat <- extend_dca_mat(dca_mat, n_peak_smooth) dca_cols <- c(rgb(0, 0, 1, 0.4), rgb(0, 0, 1, 0.15), rgb(0, 0, 0, 0), rgb(1, 0, 0, 0.15), rgb(1, 0, 0, 0.4)) + if (flip) { + dca_mat <- dca_mat[nrow(dca_mat):1, , drop = FALSE] + } image(dca_mat, col = dca_cols, add = TRUE, breaks = c(-3, -1.5, -0.5, 0.5, 1.5, 3)) } + if (!is.null(y_seps)){ + n = length(colnames(mat_smooth)) + a = y_seps[colnames(mat_smooth)] + y_seps = unname(sapply(split(seq_along(a), a),max))+1 + y_bords = seq(0, 1, length=n+1)[y_seps] + abline(h=y_bords, lty=y_seps_lty, lwd = y_seps_lwd, add=TRUE, ylim = c(0, 1), col="grey") + } } -plot_tss_strip <- function(intervals) { +plot_tss_strip <- function(intervals, flip = FALSE) { plot( # empty plot x = intervals$start:intervals$end, y = c(0, rep(0.7, intervals$end - intervals$start)), @@ -161,11 +211,18 @@ plot_tss_strip <- function(intervals) { yaxt = "n" ) + tss_df <- gintervals.neighbors1("intervs.global.tss", intervals) %>% filter(dist == 0) %>% arrange(chrom, start, end, strand, geneSymbol) %>% -# distinct(geneSymbol, strand, .keep_all = TRUE) %>% + # distinct(geneSymbol, strand, .keep_all = TRUE) %>% select(chrom, tss = start, strand, gene = geneSymbol) + + if (flip) { + # flip the coordinates relative to the start end end of the intervals + tss_df$tss <- intervals$end - tss_df$tss + intervals$start + tss_df$strand <- -1 * tss_df$strand + } if (nrow(tss_df) > 0) { text(x = tss_df$tss, y = rep(0.35, length(tss_df$tss)), labels = tss_df$gene, las = 1, cex = 1) line_len <- (intervals$end - intervals$start) * 0.01 @@ -176,6 +233,7 @@ plot_tss_strip <- function(intervals) { filter(strand == -1) %>% pull(tss) + segments(x0 = tss_df$tss, x1 = tss_df$tss, y0 = 0, y1 = 0.2) if (length(plus_tss) > 0) { arrows(x0 = plus_tss, x1 = plus_tss + line_len, y0 = 0.2, y1 = 0.2, length = 0.05) @@ -220,7 +278,7 @@ extend_dca_mat <- function(dca_mat, n_peak_smooth) { #' \code{peak_lf_thresh1}, and a value of 2 means the log fold change was above \code{peak_lf_thresh2}. The same with -1 and -2 for troughs. #' #' @export -mct_diff_access_on_hc <- function(mat, hc, sz_frac_for_peak = 0.25, u_reg = 4, peak_lf_thresh1 = 1.2, peak_lf_thresh2 = 2, trough_lf_thresh1 = -1, trough_lf_thresh2 = -2) { +mct_diff_access_on_hc <- function(mat, hc, sz_frac_for_peak = 0.25, u_reg = 4, peak_lf_thresh1 = 1.2, peak_lf_thresh2 = 2, trough_lf_thresh1 = -1, trough_lf_thresh2 = -2, ...) { if (length(hc$order) != ncol(mat)) { cli_abort("The number of metacells in the matrix and the hclust object do not match.") } From 6b63b6c4020e2204dff588cf4bd707baad1486f4 Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Thu, 10 Aug 2023 09:44:59 +0300 Subject: [PATCH 04/16] fixed y-axis alignments --- R/plot-region.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index 5e6ae34..0f3a839 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -176,7 +176,7 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " if (flip) { mat_smooth <- mat_smooth[nrow(mat_smooth):1, , drop = FALSE] } - image(mat_smooth, col = shades, breaks = shades_breaks, yaxt = "n", xaxt = "n", ylim=c(0,1)) + image(mat_smooth, col = shades, breaks = shades_breaks, yaxt = "n", xaxt = "n") if (!is.null(intervals) && plot_x_axis_ticks) { axis(1, at = seq(0, 1, l = 11), round(seq(intervals$start[1], intervals$end[1], l = 11) / 1e+6, 3), las = 2) } @@ -195,8 +195,8 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " n = length(colnames(mat_smooth)) a = y_seps[colnames(mat_smooth)] y_seps = unname(sapply(split(seq_along(a), a),max))+1 - y_bords = seq(0, 1, length=n+1)[y_seps] - abline(h=y_bords, lty=y_seps_lty, lwd = y_seps_lwd, add=TRUE, ylim = c(0, 1), col="grey") + y_bords = seq(-1/(2*(n-1)), 1+1/(2*(n-1)), length=n+1)[y_seps] + abline(h=y_bords, lty=y_seps_lty, lwd = y_seps_lwd, add=TRUE, col="grey") } } From 0fbe94251596927b907f5ddf962d1c12607e4215 Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Mon, 14 Aug 2023 10:32:52 +0300 Subject: [PATCH 05/16] added ATAC/RNA correlations --- R/plot-region.R | 45 +++++++++++++++++--------- R/shiny.R | 71 +++++++++++++++++++++++++++++++++++++++-- R/utils-multi-genomes.R | 8 +++-- 3 files changed, 104 insertions(+), 20 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index 0f3a839..ef28ae7 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -38,7 +38,7 @@ #' } #' #' @export -mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, ...) { +mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors=c("blue", "white", "white", "white", "red"), cor_color_breaks=c(-1,-0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -84,7 +84,24 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU } else { mc_colors <- NULL } - plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps=y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, flip = flip) + if (!is.null(genes_correlations) && has_rna(mct)){ + overlapping_types = intersect(colnames(mct@rna_egc), colnames(mat)) + atac_egc = t(t(mat)/colSums(mat)) + atac_legc = log2(1e-5+atac_egc) + rna_legc = log2(1e-5+mct@rna_egc) + cors = tgstat::tgs_cor( + t(atac_legc[,overlapping_types]), + t(rna_legc[toupper(genes_correlations),overlapping_types,drop=FALSE]), + pairwise.complete.obs = T + ) + mat = cors + mc_colors = rep("white", length(genes_correlations)) + colors = cor_colors + color_breaks = cor_color_breaks + y_seps <- NULL + dca_mat <- NULL + } + plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps=y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, roll_mean = roll_mean, flip = flip) } #' Plot a genomic region given a matrix @@ -98,21 +115,23 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU #' @param dca_mat a matrix with the differential cluster accessibility (DCA) for the plotted regions (optional). Output of \code{mct_diff_access_on_hc}. #' @param n_smooth number of genomic bins to use for smoothing the signal. Signal is smoothed by a rolling sum for each metacell (optional). Default is 20. #' @param n_pixels number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional). -#' @param gene_annot whether to add gene annotations; these annotations rely on the existence of an \code{annots/refGene.txt} file in the genome's misha directory, and on the existence of an intervals set called "intervs.global.tss" in the genome's misha directory. (optional) #' @param gene_annot whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional) #' @param genome the genome to use for the gene annotations (optional) #' @param gene_annot_pos the position of the gene annotations ("top" or "bottom") #' @param flip whether to flip the coordinates (optional) #' #' @export -plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps=NULL, y_seps_lty=2, y_seps_lwd=1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE) { - mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) +plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps=NULL, y_seps_lty=2, y_seps_lwd=1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, roll_mean=FALSE) { + if(roll_mean){ + mat_smooth <- RcppRoll::roll_mean(mat, n = n_smooth, fill = c(0, 0, 0)) + } else { + mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) + } if (gene_annot) { if (is.null(intervals) || is.null(resolution)) { cli_abort("If gene annotations are requested, intervals and resolution must be specified") } - layout(cbind(c(0, 0, 3), c(1, 2, 4)), widths = c(1, 20), heights = c(3, 0.5, 15)) # Different layouts depending on annotation position if (gene_annot_pos == "top") { @@ -123,9 +142,7 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " par(mar = c(0, 0, 2, 2)) plot_tss_strip(intervals, flip = flip) - - par(mar = c(0, 0, 0, 2)) - gene_annots <- make_gene_annot(intervals, resolution) + if (gene_annot_pos == "top") { par(mar = c(0, 0, 0, 2)) } else { @@ -136,10 +153,9 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " if (is.null(gene_annots[["exon_coords"]])) { image(as.matrix(rep(0, ncol(mat)), nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } else { - image(as.matrix(gene_annots[["exon_coords"]], nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) - exon_mat <- t(as.matrix(gene_annots[["exon_coords"]])) + exon_mat <- as.matrix(gene_annots[["exon_coords"]]) if (flip) { - exon_mat <- exon_mat[, ncol(exon_mat):1, drop = FALSE] + exon_mat <- exon_mat[nrow(exon_mat):1,, drop = FALSE] } image(exon_mat, col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) @@ -148,13 +164,12 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " top_mar <- 0 left_mar <- 2 } else { + #par(mar = c(0,0,0,0)) layout(matrix(1:2, nrow = 1), w = c(1, 20)) - top_mar <- 2 + top_mar <- 0 left_mar <- 2 } - par(mar = c(4, left_mar, top_mar, 0)) - image(t(as.matrix(1:length(mc_colors), nrow = 1)), col = mc_colors, yaxt = "n", xaxt = "n") if (plot_x_axis_ticks && gene_annot_pos == "bottom") { plot_x_axis_ticks <- FALSE cli_alert_warning("Gene annotations are at the bottom, so x-axis ticks are disabled.") diff --git a/R/shiny.R b/R/shiny.R index 320027f..79285e3 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -119,6 +119,7 @@ app_ui <- function(request) { plotOutput( "region_plot", height = "50vh", + fill=FALSE, brush = brushOpts( id = "region_brush", direction = "x", @@ -126,6 +127,14 @@ app_ui <- function(request) { delay = 1000 ) ) + ), + shinycssloaders::withSpinner( + plotOutput( + "region_plot_cor", + height = "20px", + fill=FALSE + ) + ), ) ) ) @@ -169,7 +178,8 @@ app_server <- function(input, output, session) { observe({ shinyWidgets::updateVirtualSelect( "genes", - choices = shinyWidgets::prepare_choices(promoters, label, coords) + choices = shinyWidgets::prepare_choices(promoters, label, coords), + selected = "chrX:60843429-60943430" ) }) @@ -245,6 +255,8 @@ app_server <- function(input, output, session) { mct_plot_region( shiny_mct, intervals(), + downsample = TRUE, + downsample_n = 2500000, detect_dca = input$detect_dca %||% FALSE, gene_annot = TRUE, hc = hc, @@ -252,7 +264,58 @@ app_server <- function(input, output, session) { trough_lf_thresh1 = input$dca_trough_lf_thresh, sz_frac_for_peak = input$dca_sz_frac_for_peak, color_breaks = color_breaks, - n_smooth = n_smooth + n_smooth = n_smooth, + plot_x_axis_ticks = FALSE + ) + }, + res = 96 + ) %>% + bindCache( + intervals(), + input$detect_dca, + input$dca_peak_lf_thresh, + input$dca_trough_lf_thresh, + input$dca_sz_frac_for_peak, + input$min_color, + input$max_color, + input$n_smooth + ) + output$region_plot_cor <- renderPlot( + { + req(intervals()) + req(input$dca_peak_lf_thresh) + req(input$dca_trough_lf_thresh) + req(input$dca_sz_frac_for_peak) + req(input$min_color) + req(input$max_color) + req(input$min_color < input$max_color) + req(input$n_smooth) + req(input$n_smooth >= 1) + color_breaks <- c(0, seq( + input$min_color, + input$max_color, + length.out = 4 + )) + + n_smooth <- max(round(input$n_smooth / shiny_mct@resolution), 1) + + mct_plot_region( + shiny_mct, intervals(), + downsample = TRUE, + downsample_n = 1500000, + detect_dca = input$detect_dca %||% FALSE, + gene_annot = FALSE, + hc = NULL, + peak_lf_thresh1 = input$dca_peak_lf_thresh, + trough_lf_thresh1 = input$dca_trough_lf_thresh, + sz_frac_for_peak = input$dca_sz_frac_for_peak, + color_breaks = color_breaks, + n_smooth = 1, + plot_x_axis_ticks = FALSE, + roll_mean = TRUE, + genes_correlations = promoters[promoters$coords==input$genes,]$geneSymbol, + cor_color_breaks=c(-0.8, -0.4, 0, 0.4, 0.8), + cor_colors = c("blue","white","white","white","red") ) }, res = 96 @@ -355,6 +418,7 @@ parse_coordinate_text <- function(text) { #' #' @param mct MCT object #' @param hc an hclust object with clustering of the metacells (optional, see \code{mct_plot_region} +#' @param annotations an intervals object with annotations for the second genome (optional) #' #' @examples #' \dontrun{ @@ -366,6 +430,7 @@ run_app <- function(mct, hc = NULL, port = NULL, host = NULL, + annotations = NULL, launch.browser = FALSE) { library(misha) library(shiny) @@ -377,6 +442,8 @@ run_app <- function(mct, shiny_hc <<- NULL } gset_genome(mct@genome, force = FALSE) + shiny_annotations <<- annotations + shiny::shinyApp( ui = app_ui, server = app_server, diff --git a/R/utils-multi-genomes.R b/R/utils-multi-genomes.R index 359c3a3..63d54cf 100644 --- a/R/utils-multi-genomes.R +++ b/R/utils-multi-genomes.R @@ -240,7 +240,7 @@ is_comparison_flipped <- function(intervals_comparison) { return(cr < 0) } -plot_intervals_comparison <- function(intervals_comparison, annotations = NULL, grid_resolution = 100) { +plot_intervals_comparison <- function(intervals_comparison, annotations = NULL, correlations = NULL, grid_resolution = 100) { i12 <- intervals_comparison if (is_comparison_flipped(i12)) { @@ -256,6 +256,8 @@ plot_intervals_comparison <- function(intervals_comparison, annotations = NULL, segments(x0 = i12$x2, y0 = 0.27, x1 = i12$x2, y1 = 0.25, lwd = 1, col = ifelse(1:length(i12$x1) %% round(grid_resolution / 10) == 0, "black", "grey")) text(x = i12$x2[i12$x2 <= 1 & i12$x2 >= 0], y = 0.2, labels = round(i12$start1[i12$x2 <= 1 & i12$x2 >= 0] / 1e+6, 3), srt = 90, adj = c(1, 0.5), xpd = TRUE, cex = 0.5) segments(x0 = i12$x2, y0 = 0, x1 = i12$x2, y1 = 0.01, lwd = 1) + if (!is.null(correlations)) { + } if (!is.null(annotations)) { annot1 <- annotations[["annot1"]] lifted_annot1 <- annotations[["lifted_annot1"]] @@ -271,13 +273,13 @@ plot_intervals_comparison <- function(intervals_comparison, annotations = NULL, x = c(annot1$x1, lifted_annot2$x1), y = c(rnorm(nrow(annot1), sd = 0.001) + 1.019, rnorm(nrow(lifted_annot2), sd = 0.001) + 0.981), col = c(annot1_colors, annot2_colors), - pch = 3, cex = 0.5 + pch = 3, cex = 1 ) points( x = c(annot2$x2, lifted_annot1$x2), y = c(rnorm(nrow(annot2), sd = 0.001) - 0.019, rnorm(nrow(lifted_annot1), sd = 0.001) + 0.019), col = c(annot2_colors, annot1_colors), - pch = 3, cex = 0.5 + pch = 3, cex = 1 ) } } From ac273d070a2120643c86613a304a99e473134ffa Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Mon, 14 Aug 2023 12:44:02 +0300 Subject: [PATCH 06/16] adapted hights --- R/plot-region.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index ef28ae7..4f485fc 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -135,12 +135,12 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " # Different layouts depending on annotation position if (gene_annot_pos == "top") { - layout(cbind(c(0, 0, 3), c(1, 2, 4)), widths = c(1, 20), heights = c(3, 0.5, 15)) + layout(cbind(c(0, 0, 3), c(1, 2, 4)), widths = c(1, 20), heights = c(2, 0.5, 15)) } else if (gene_annot_pos == "bottom") { - layout(cbind(c(3, 0, 0, 0), c(4, 1, 2, 0)), widths = c(1, 20), heights = c(15, 3, 0.5, 0.5)) + layout(cbind(c(3, 0, 0, 0), c(4, 1, 2, 0)), widths = c(1, 20), heights = c(15, 2, 0.5, 0.5)) } - par(mar = c(0, 0, 2, 2)) + par(mar = c(0, 0, 1, 2)) plot_tss_strip(intervals, flip = flip) if (gene_annot_pos == "top") { From 6eba1777aed0c36935f7a339bc7e18fb3929f568 Mon Sep 17 00:00:00 2001 From: aviezerl Date: Mon, 14 Aug 2023 13:16:12 +0300 Subject: [PATCH 07/16] added hover --- R/shiny.R | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/R/shiny.R b/R/shiny.R index 79285e3..9d4e075 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -120,6 +120,11 @@ app_ui <- function(request) { "region_plot", height = "50vh", fill=FALSE, + hover = hoverOpts( + id = "region_hover", + delayType = "throttle", + delay = 250 + ), brush = brushOpts( id = "region_brush", direction = "x", @@ -136,7 +141,8 @@ app_ui <- function(request) { ) ), ) - ) + ), + uiOutput("hover_info", style = "pointer-events: none") ) ) } @@ -286,6 +292,7 @@ app_server <- function(input, output, session) { req(input$dca_peak_lf_thresh) req(input$dca_trough_lf_thresh) req(input$dca_sz_frac_for_peak) + req(has_rna(shiny_mct)) req(input$min_color) req(input$max_color) req(input$min_color < input$max_color) @@ -377,6 +384,36 @@ app_server <- function(input, output, session) { update_intervals(zoom_intervals) }) + output$hover_info <- renderUI({ + + hover <- input$region_hover + req(hover) + + # convert y to metacell + mc <- mct@metacells[1] + mc_metadata <- mct@metadata %>% filter(metacell == mc) %>% slice(1) + mc_color <- mc_metadata$color + mc_cell_type <- mc_metadata$cell_type + + # taken from https://gitlab.com/-/snippets/16220 + left_px <- hover$coords_css$x + top_px <- hover$coords_css$y + style <- glue( + "background-color: {grDevices::rgb(t(grDevices::col2rgb(mc_color) / 255))}; position:absolute; z-index:100; left: {left_px + 2}px; top: {top_px + 2}px;" + ) + + tooltip <- paste( + glue("Metacell: {mc}"), + glue("Cell type: {mc_cell_type}"), + sep = "
" + ) + + wellPanel( + style = style, + p(HTML(tooltip)) + ) +}) + observe({ shinyjs::toggle(id = "dca_peak_lf_thresh", condition = input$detect_dca && input$show_controls) shinyjs::toggle(id = "dca_trough_lf_thresh", condition = input$detect_dca && input$show_controls) From da04958757ff06e24cbc195dd15908a3777e3ba2 Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Mon, 14 Aug 2023 15:12:55 +0300 Subject: [PATCH 08/16] hover WIP --- R/shiny.R | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/R/shiny.R b/R/shiny.R index 9d4e075..1c1c8e6 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -390,21 +390,53 @@ app_server <- function(input, output, session) { req(hover) # convert y to metacell - mc <- mct@metacells[1] + gset_genome(mct@genome) + raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) + if (!is.null(metacells)) { + if (any(metacells %!in% mct@metacells)) { + cli_abort("The following metacells are not in the McTracks object: {.val {metacells}}") + } + raw_mat <- raw_mat[, intersect(metacells, colnames(raw_mat)), drop = FALSE] + } + + mat <- raw_mat[, intersect(mct@metacells[mct@order], colnames(raw_mat)), drop = FALSE] + + if (detect_dca && is.null(hc)) { + if (!has_rna(mct)) { + cli_abort("Cannot detect DCA without either an hclust object or RNA data.") + } + mct <- mct_subset_metacells(mct, colnames(mat)) + hc <- mc_hclust_rna(mct, force_cell_type = force_cell_type) + } + + if (!is.null(hc)) { + if (any(hc$label %!in% colnames(mat))) { + missing_mcs <- setdiff(hc$label, colnames(mat)) + cli_warn("The following metacells are present in the hclust object, but are missing in the matrix (this is probably due to downsampling): {.val {missing_mcs}}") + hc <- dendextend::prune(hc, missing_mcs) + } + mat <- mat[, hc$label] + } + + n = length(hc$label) + range_padding = 1/(2*(n-1)) + mat_y = hover$y*(1+2*range_padding) - range_padding + mc_idx = hc$label[floor(mat_y*n)+1] + mc <- mct@metacells[mc_idx] mc_metadata <- mct@metadata %>% filter(metacell == mc) %>% slice(1) mc_color <- mc_metadata$color mc_cell_type <- mc_metadata$cell_type - + message(round(mat_y*n), mc_idx, mc) # taken from https://gitlab.com/-/snippets/16220 left_px <- hover$coords_css$x top_px <- hover$coords_css$y style <- glue( - "background-color: {grDevices::rgb(t(grDevices::col2rgb(mc_color) / 255))}; position:absolute; z-index:100; left: {left_px + 2}px; top: {top_px + 2}px;" + "background-color: {grDevices::rgb(t(grDevices::col2rgb(mc_color) / 255))}; position:absolute; z-index:100; left: {left_px + 2}px; top: {top_px + 150}px;" ) - tooltip <- paste( glue("Metacell: {mc}"), glue("Cell type: {mc_cell_type}"), + style, sep = "
" ) From 5b0cdbee51d41920c376a4af3a01ee8a4a2f3100 Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Tue, 15 Aug 2023 08:10:44 +0300 Subject: [PATCH 09/16] tooltip fixed --- R/plot-region.R | 31 ++++++++++++++++++ R/shiny.R | 84 ++++++++++++++++++++++++++----------------------- 2 files changed, 75 insertions(+), 40 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index 4f485fc..75ef100 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -1,3 +1,34 @@ +get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors=c("blue", "white", "white", "white", "red"), cor_color_breaks=c(-1,-0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { + gset_genome(mct@genome) + raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) + if (!is.null(metacells)) { + if (any(metacells %!in% mct@metacells)) { + cli_abort("The following metacells are not in the McTracks object: {.val {metacells}}") + } + raw_mat <- raw_mat[, intersect(metacells, colnames(raw_mat)), drop = FALSE] + } + + mat <- raw_mat[, intersect(mct@metacells[mct@order], colnames(raw_mat)), drop = FALSE] + + if (detect_dca && is.null(hc)) { + if (!has_rna(mct)) { + cli_abort("Cannot detect DCA without either an hclust object or RNA data.") + } + mct <- mct_subset_metacells(mct, colnames(mat)) + hc <- mc_hclust_rna(mct, force_cell_type = force_cell_type) + } + + if (!is.null(hc)) { + if (any(hc$label %!in% colnames(mat))) { + missing_mcs <- setdiff(hc$label, colnames(mat)) + cli_warn("The following metacells are present in the hclust object, but are missing in the matrix (this is probably due to downsampling): {.val {missing_mcs}}") + hc <- dendextend::prune(hc, missing_mcs) + } + mat <- mat[, hc$label] + } + mat +} + #' Plot a genomic region #' #' @param mct an McTracks object. diff --git a/R/shiny.R b/R/shiny.R index 1c1c8e6..b8c654c 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -19,7 +19,7 @@ app_ui <- function(request) { titlePanel(title), fluidRow( column( - 12, + 10, wellPanel( fluidRow( align = "left", @@ -112,7 +112,15 @@ app_ui <- function(request) { ) ) ) - ) + ), + column(2, + plotOutput( + "scatter_plot", + height = "300px", + width = "300px", + fill=FALSE, + ) + ) ), fluidRow( shinycssloaders::withSpinner( @@ -150,6 +158,8 @@ app_ui <- function(request) { app_server <- function(input, output, session) { intervals <- reactiveVal() globals <- reactiveValues() + mat <- reactiveVal() + mat2 <- reactiveVal() if (!is.null(shiny_hc)) { hc <- shiny_hc @@ -239,7 +249,27 @@ app_server <- function(input, output, session) { intervals(globals$history[[globals$history_iterator]]) } }) - + output$region_plot_cor <- renderPlot( + plot(1:5,5:1) + ) + observe({ + req(intervals()) + raw_mat <- get_raw_mat( + shiny_mct, intervals(), + downsample = TRUE, + downsample_n = 1500000, + detect_dca = input$detect_dca %||% FALSE, + gene_annot = TRUE, + hc = hc, + peak_lf_thresh1 = input$dca_peak_lf_thresh, + trough_lf_thresh1 = input$dca_trough_lf_thresh, + sz_frac_for_peak = input$dca_sz_frac_for_peak, + color_breaks = color_breaks, + n_smooth = n_smooth, + plot_x_axis_ticks = FALSE + ) + mat(raw_mat) + }) output$region_plot <- renderPlot( { req(intervals()) @@ -251,6 +281,7 @@ app_server <- function(input, output, session) { req(input$min_color < input$max_color) req(input$n_smooth) req(input$n_smooth >= 1) + req(mat()) color_breaks <- c(0, seq( input$min_color, input$max_color, @@ -262,7 +293,7 @@ app_server <- function(input, output, session) { mct_plot_region( shiny_mct, intervals(), downsample = TRUE, - downsample_n = 2500000, + downsample_n = 1500000, detect_dca = input$detect_dca %||% FALSE, gene_annot = TRUE, hc = hc, @@ -388,45 +419,18 @@ app_server <- function(input, output, session) { hover <- input$region_hover req(hover) - - # convert y to metacell - gset_genome(mct@genome) - raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) - if (!is.null(metacells)) { - if (any(metacells %!in% mct@metacells)) { - cli_abort("The following metacells are not in the McTracks object: {.val {metacells}}") - } - raw_mat <- raw_mat[, intersect(metacells, colnames(raw_mat)), drop = FALSE] - } - - mat <- raw_mat[, intersect(mct@metacells[mct@order], colnames(raw_mat)), drop = FALSE] - - if (detect_dca && is.null(hc)) { - if (!has_rna(mct)) { - cli_abort("Cannot detect DCA without either an hclust object or RNA data.") - } - mct <- mct_subset_metacells(mct, colnames(mat)) - hc <- mc_hclust_rna(mct, force_cell_type = force_cell_type) - } - - if (!is.null(hc)) { - if (any(hc$label %!in% colnames(mat))) { - missing_mcs <- setdiff(hc$label, colnames(mat)) - cli_warn("The following metacells are present in the hclust object, but are missing in the matrix (this is probably due to downsampling): {.val {missing_mcs}}") - hc <- dendextend::prune(hc, missing_mcs) - } - mat <- mat[, hc$label] - } - - n = length(hc$label) - range_padding = 1/(2*(n-1)) - mat_y = hover$y*(1+2*range_padding) - range_padding - mc_idx = hc$label[floor(mat_y*n)+1] - mc <- mct@metacells[mc_idx] + req(mat()) + raw_mat = mat() + n = length(colnames(raw_mat)) + #range_padding = 1/(2*(n-1)) + range_padding = 0 + mat_y = hover$y*(1+2*range_padding)-range_padding + mat_idx = n-max(floor(mat_y*n),0) + mc <- colnames(raw_mat)[mat_idx] mc_metadata <- mct@metadata %>% filter(metacell == mc) %>% slice(1) mc_color <- mc_metadata$color mc_cell_type <- mc_metadata$cell_type - message(round(mat_y*n), mc_idx, mc) + #message(mat_idx, ' ', mc, ' ', mc_color, ' ', mc_cell_type) # taken from https://gitlab.com/-/snippets/16220 left_px <- hover$coords_css$x top_px <- hover$coords_css$y From 5a05698328ae6a2c169a876c239ae5abdfc46a7d Mon Sep 17 00:00:00 2001 From: aviezerl Date: Tue, 15 Aug 2023 10:20:05 +0300 Subject: [PATCH 10/16] Added correlation scatter plot --- R/plot-region.R | 58 +++++++++--------- R/shiny.R | 114 +++++++++++++++++++++++------------ R/utils-multi-genomes.R | 56 +++++++++++++++++ man/mct_diff_access_on_hc.Rd | 3 +- man/mct_plot_region.Rd | 13 +++- man/plot_region_mat.Rd | 18 +++++- 6 files changed, 191 insertions(+), 71 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index 75ef100..8a7b92b 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -1,4 +1,4 @@ -get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors=c("blue", "white", "white", "white", "red"), cor_color_breaks=c(-1,-0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { +get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -69,7 +69,7 @@ get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, d #' } #' #' @export -mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors=c("blue", "white", "white", "white", "red"), cor_color_breaks=c(-1,-0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { +mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -106,7 +106,7 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU y_seps <- NULL if (!is.null(hc)) { mat <- mat[, hc$order, drop = FALSE] - y_seps = cutree(hc, h=0.1) + y_seps <- cutree(hc, h = 0.1) } if (has_cell_type(mct) && has_cell_type_colors(mct)) { @@ -115,24 +115,24 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU } else { mc_colors <- NULL } - if (!is.null(genes_correlations) && has_rna(mct)){ - overlapping_types = intersect(colnames(mct@rna_egc), colnames(mat)) - atac_egc = t(t(mat)/colSums(mat)) - atac_legc = log2(1e-5+atac_egc) - rna_legc = log2(1e-5+mct@rna_egc) - cors = tgstat::tgs_cor( - t(atac_legc[,overlapping_types]), - t(rna_legc[toupper(genes_correlations),overlapping_types,drop=FALSE]), + if (!is.null(genes_correlations) && has_rna(mct)) { + overlapping_types <- intersect(colnames(mct@rna_egc), colnames(mat)) + # atac_egc = t(t(mat)/colSums(mat)) + # atac_legc = log2(1e-5+atac_egc) + rna_legc <- log2(1e-5 + mct@rna_egc) + cors <- tgstat::tgs_cor( + t(mat[, overlapping_types]), + t(rna_legc[toupper(genes_correlations), overlapping_types, drop = FALSE]), pairwise.complete.obs = T ) - mat = cors - mc_colors = rep("white", length(genes_correlations)) - colors = cor_colors - color_breaks = cor_color_breaks + mat <- cors + mc_colors <- rep("white", length(genes_correlations)) + colors <- cor_colors + color_breaks <- cor_color_breaks y_seps <- NULL dca_mat <- NULL } - plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps=y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, roll_mean = roll_mean, flip = flip) + plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, roll_mean = roll_mean, flip = flip) } #' Plot a genomic region given a matrix @@ -152,8 +152,8 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU #' @param flip whether to flip the coordinates (optional) #' #' @export -plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps=NULL, y_seps_lty=2, y_seps_lwd=1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, roll_mean=FALSE) { - if(roll_mean){ +plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, roll_mean = FALSE) { + if (roll_mean) { mat_smooth <- RcppRoll::roll_mean(mat, n = n_smooth, fill = c(0, 0, 0)) } else { mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) @@ -173,20 +173,20 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " par(mar = c(0, 0, 1, 2)) plot_tss_strip(intervals, flip = flip) - + if (gene_annot_pos == "top") { par(mar = c(0, 0, 0, 2)) } else { par(mar = c(0, 0, 0, 2)) } gene_annots <- make_gene_annot(intervals, resolution, genome) - + if (is.null(gene_annots[["exon_coords"]])) { image(as.matrix(rep(0, ncol(mat)), nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } else { exon_mat <- as.matrix(gene_annots[["exon_coords"]]) if (flip) { - exon_mat <- exon_mat[nrow(exon_mat):1,, drop = FALSE] + exon_mat <- exon_mat[nrow(exon_mat):1, , drop = FALSE] } image(exon_mat, col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) @@ -195,7 +195,7 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " top_mar <- 0 left_mar <- 2 } else { - #par(mar = c(0,0,0,0)) + # par(mar = c(0,0,0,0)) layout(matrix(1:2, nrow = 1), w = c(1, 20)) top_mar <- 0 left_mar <- 2 @@ -217,7 +217,7 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " par(mar = c(bottom_mar, 0, top_mar, 2)) shades <- colorRampPalette(colors)(1000) - mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) + # mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) shades_breaks <- approx(color_breaks, n = 1001)$y if (flip) { mat_smooth <- mat_smooth[nrow(mat_smooth):1, , drop = FALSE] @@ -237,12 +237,12 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " } image(dca_mat, col = dca_cols, add = TRUE, breaks = c(-3, -1.5, -0.5, 0.5, 1.5, 3)) } - if (!is.null(y_seps)){ - n = length(colnames(mat_smooth)) - a = y_seps[colnames(mat_smooth)] - y_seps = unname(sapply(split(seq_along(a), a),max))+1 - y_bords = seq(-1/(2*(n-1)), 1+1/(2*(n-1)), length=n+1)[y_seps] - abline(h=y_bords, lty=y_seps_lty, lwd = y_seps_lwd, add=TRUE, col="grey") + if (!is.null(y_seps)) { + n <- length(colnames(mat_smooth)) + a <- y_seps[colnames(mat_smooth)] + y_seps <- unname(sapply(split(seq_along(a), a), max)) + 1 + y_bords <- seq(-1 / (2 * (n - 1)), 1 + 1 / (2 * (n - 1)), length = n + 1)[y_seps] + abline(h = y_bords, lty = y_seps_lty, lwd = y_seps_lwd, add = TRUE, col = "grey") } } diff --git a/R/shiny.R b/R/shiny.R index b8c654c..04ea11f 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -113,26 +113,27 @@ app_ui <- function(request) { ) ) ), - column(2, + column( + 2, plotOutput( "scatter_plot", height = "300px", width = "300px", - fill=FALSE, - ) + fill = FALSE, ) + ) ), fluidRow( shinycssloaders::withSpinner( plotOutput( "region_plot", height = "50vh", - fill=FALSE, + fill = FALSE, hover = hoverOpts( id = "region_hover", delayType = "throttle", delay = 250 - ), + ), brush = brushOpts( id = "region_brush", direction = "x", @@ -145,7 +146,8 @@ app_ui <- function(request) { plotOutput( "region_plot_cor", height = "20px", - fill=FALSE + fill = FALSE, + click = "region_plot_cor_click" ) ), ) @@ -160,6 +162,7 @@ app_server <- function(input, output, session) { globals <- reactiveValues() mat <- reactiveVal() mat2 <- reactiveVal() + scatter_data <- reactiveVal() if (!is.null(shiny_hc)) { hc <- shiny_hc @@ -249,27 +252,62 @@ app_server <- function(input, output, session) { intervals(globals$history[[globals$history_iterator]]) } }) - output$region_plot_cor <- renderPlot( - plot(1:5,5:1) - ) + + observeEvent(input$region_plot_cor_click, { + mat_x <- input$region_plot_cor_click$x + req(mat_x) + mat_y <- input$region_plot_cor_click$y + req(mat_y) + m <- mat() + req(m) + mat_idxs <- get_heatmap_idx(mat_x, mat_y, m) + + gene <- toupper(promoters[promoters$coords == input$genes, ]$geneSymbol) + req(gene %in% rownames(mct@rna_egc)) + + mcs <- intersect(colnames(mct@rna_egc), colnames(m)) + + data <- data.frame(atac = m[mat_idxs[1], mcs], rna = mct@rna_egc[gene, mcs], metacell = mcs) %>% + mutate(rna = log2(rna + 1e-5)) %>% + left_join(mct@metadata, by = "metacell") %>% + mutate(coords = rownames(m)[mat_idxs[1]]) + + scatter_data(data) + }) + + output$scatter_plot <- renderPlot({ + data <- scatter_data() + req(data) + + data %>% + ggplot(aes(x = atac, y = rna, color = color)) + + geom_point(size = 2) + + scale_color_identity() + + theme_classic() + + xlab("ATAC") + + ylab("RNA") + + ggtitle(glue("Correlation: {round(cor(data$atac, data$rna), digits = 4)}"), subtitle = data$coords[1]) + }) + observe({ req(intervals()) raw_mat <- get_raw_mat( - shiny_mct, intervals(), - downsample = TRUE, - downsample_n = 1500000, - detect_dca = input$detect_dca %||% FALSE, - gene_annot = TRUE, - hc = hc, - peak_lf_thresh1 = input$dca_peak_lf_thresh, - trough_lf_thresh1 = input$dca_trough_lf_thresh, - sz_frac_for_peak = input$dca_sz_frac_for_peak, - color_breaks = color_breaks, - n_smooth = n_smooth, - plot_x_axis_ticks = FALSE + shiny_mct, intervals(), + downsample = TRUE, + downsample_n = 1500000, + detect_dca = input$detect_dca %||% FALSE, + gene_annot = TRUE, + hc = hc, + peak_lf_thresh1 = input$dca_peak_lf_thresh, + trough_lf_thresh1 = input$dca_trough_lf_thresh, + sz_frac_for_peak = input$dca_sz_frac_for_peak, + color_breaks = color_breaks, + n_smooth = n_smooth, + plot_x_axis_ticks = FALSE ) mat(raw_mat) }) + output$region_plot <- renderPlot( { req(intervals()) @@ -351,9 +389,9 @@ app_server <- function(input, output, session) { n_smooth = 1, plot_x_axis_ticks = FALSE, roll_mean = TRUE, - genes_correlations = promoters[promoters$coords==input$genes,]$geneSymbol, - cor_color_breaks=c(-0.8, -0.4, 0, 0.4, 0.8), - cor_colors = c("blue","white","white","white","red") + genes_correlations = promoters[promoters$coords == input$genes, ]$geneSymbol, + cor_color_breaks = c(-0.8, -0.4, 0, 0.4, 0.8), + cor_colors = c("blue", "white", "white", "white", "red") ) }, res = 96 @@ -416,31 +454,31 @@ app_server <- function(input, output, session) { }) output$hover_info <- renderUI({ - hover <- input$region_hover req(hover) - req(mat()) - raw_mat = mat() - n = length(colnames(raw_mat)) - #range_padding = 1/(2*(n-1)) - range_padding = 0 - mat_y = hover$y*(1+2*range_padding)-range_padding - mat_idx = n-max(floor(mat_y*n),0) - mc <- colnames(raw_mat)[mat_idx] - mc_metadata <- mct@metadata %>% filter(metacell == mc) %>% slice(1) + raw_mat <- mat() + req(raw_mat) + mat_idxs <- get_heatmap_idx(hover$x, hover$y, raw_mat) + + mc <- colnames(raw_mat)[mat_idxs[2]] + req(mc) + mc_metadata <- mct@metadata %>% + filter(metacell == mc) %>% + slice(1) mc_color <- mc_metadata$color mc_cell_type <- mc_metadata$cell_type - #message(mat_idx, ' ', mc, ' ', mc_color, ' ', mc_cell_type) + coords <- rownames(raw_mat)[mat_idxs[1]] + # taken from https://gitlab.com/-/snippets/16220 left_px <- hover$coords_css$x top_px <- hover$coords_css$y style <- glue( - "background-color: {grDevices::rgb(t(grDevices::col2rgb(mc_color) / 255))}; position:absolute; z-index:100; left: {left_px + 2}px; top: {top_px + 150}px;" + "background-color: {grDevices::rgb(t(grDevices::col2rgb(mc_color) / 255))}; position:absolute; z-index:100; left: {left_px + 2}px; top: {top_px + 200}px;" ) tooltip <- paste( glue("Metacell: {mc}"), glue("Cell type: {mc_cell_type}"), - style, + glue("Coordinates: {coords}"), sep = "
" ) @@ -448,7 +486,7 @@ app_server <- function(input, output, session) { style = style, p(HTML(tooltip)) ) -}) + }) observe({ shinyjs::toggle(id = "dca_peak_lf_thresh", condition = input$detect_dca && input$show_controls) diff --git a/R/utils-multi-genomes.R b/R/utils-multi-genomes.R index 63d54cf..26a486a 100644 --- a/R/utils-multi-genomes.R +++ b/R/utils-multi-genomes.R @@ -283,3 +283,59 @@ plot_intervals_comparison <- function(intervals_comparison, annotations = NULL, ) } } + +#' Generate an hclust object from a data.frame with cell_type, group and order columns +#' +#' @param df data.frame with cell_type, group and order +#' +#' @return hclust object with cell_type as labels and order as order of the leaves +#' +#' @examples +#' df <- data.frame( +#' cell_type = c( +#' "PGC", "Surface ectoderm", "Neural crest", +#' "Neural tube/Floor plate", "Forebrain/Midbrain/Hindbrain", "Caudal neural plate", +#' "Rostral neural plate", "Neural plate boundary", "Definitive ectoderm", +#' "Epiblast", "Primitive streak", "Caudal epiblast", "Tail bud - neural", +#' "Tail bud - mesoderm", "Early nascent mesoderm" +#' ), order = 1:15, +#' group = c( +#' "ecto", "ecto", "ecto", "ecto", "ecto", "ecto", +#' "ecto", "ecto", "ecto", "early", "early", "early", "early", +#' "Meso", "Meso" +#' ) +#' ) +#' +#' group_to_hclust(df) +#' +#' @export +group_to_hclust <- function(df) { + # Create an empty matrix of 1s + distance_matrix <- matrix(1, nrow = nrow(df), ncol = nrow(df)) + rownames(distance_matrix) <- df$cell_type + colnames(distance_matrix) <- df$cell_type + + # set the distance between elements the same group to 0 (using df$group) + for (grp in unique(df$group)) { + distance_matrix[df$group == grp, df$group == grp] <- 0 + } + + # Hierarchical clustering + hc <- hclust(as.dist(distance_matrix)) + + # Label leaves with cell_type + hc$labels <- df$cell_type + + hc <- as.hclust(reorder(as.dendrogram(hc), df$order, agglo.FUN = mean)) + + return(hc) +} + +get_heatmap_idx <- function(x, y, m) { + n_x <- nrow(m) + n_y <- ncol(m) + x_idx <- floor(x * n_x) + 1 + + y_idx <- n_y - max(floor(y * n_y), 0) + return(c(x_idx, y_idx)) +} diff --git a/man/mct_diff_access_on_hc.Rd b/man/mct_diff_access_on_hc.Rd index a1f3f76..e48aab1 100644 --- a/man/mct_diff_access_on_hc.Rd +++ b/man/mct_diff_access_on_hc.Rd @@ -12,7 +12,8 @@ mct_diff_access_on_hc( peak_lf_thresh1 = 1.2, peak_lf_thresh2 = 2, trough_lf_thresh1 = -1, - trough_lf_thresh2 = -2 + trough_lf_thresh2 = -2, + ... ) } \arguments{ diff --git a/man/mct_plot_region.Rd b/man/mct_plot_region.Rd index cd9ffd3..2055717 100644 --- a/man/mct_plot_region.Rd +++ b/man/mct_plot_region.Rd @@ -18,6 +18,13 @@ mct_plot_region( gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, + plot_x_axis_ticks = TRUE, + gene_annot_pos = "top", + flip = FALSE, + genes_correlations = NULL, + cor_colors = c("blue", "white", "white", "white", "red"), + cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), + roll_mean = FALSE, ... ) } @@ -42,12 +49,16 @@ mct_plot_region( \item{force_cell_type}{do not split cell types when ordering the metacells. Default: TRUE} -\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of an \code{annots/refGene.txt} file in the genome's misha directory, and on the existence of an intervals set called "intervs.global.tss" in the genome's misha directory. (optional)} +\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional)} \item{n_smooth}{number of genomic bins to use for smoothing the signal. Signal is smoothed by a rolling sum for each metacell (optional). Default is 20.} \item{n_pixels}{number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional).} +\item{gene_annot_pos}{the position of the gene annotations ("top" or "bottom")} + +\item{flip}{whether to flip the coordinates (optional)} + \item{...}{ Arguments passed on to \code{\link[=mct_diff_access_on_hc]{mct_diff_access_on_hc}} \describe{ diff --git a/man/plot_region_mat.Rd b/man/plot_region_mat.Rd index 35a7670..5830a72 100644 --- a/man/plot_region_mat.Rd +++ b/man/plot_region_mat.Rd @@ -12,9 +12,17 @@ plot_region_mat( intervals = NULL, resolution = NULL, dca_mat = NULL, + y_seps = NULL, + y_seps_lty = 2, + y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, - gene_annot = FALSE + gene_annot = FALSE, + genome = NULL, + plot_x_axis_ticks = TRUE, + gene_annot_pos = "top", + flip = FALSE, + roll_mean = FALSE ) } \arguments{ @@ -36,7 +44,13 @@ plot_region_mat( \item{n_pixels}{number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional).} -\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of an \code{annots/refGene.txt} file in the genome's misha directory, and on the existence of an intervals set called "intervs.global.tss" in the genome's misha directory. (optional)} +\item{gene_annot}{whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional)} + +\item{genome}{the genome to use for the gene annotations (optional)} + +\item{gene_annot_pos}{the position of the gene annotations ("top" or "bottom")} + +\item{flip}{whether to flip the coordinates (optional)} } \description{ Plot a genomic region given a matrix From c0405deb985e30395981ff536d5da5c1b3b5846e Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Thu, 17 Aug 2023 09:48:00 +0300 Subject: [PATCH 11/16] Adapted correlation thresholds Bug fixes: require mouse gene name to be present in rabbit rna expression for cor2 plot fix "burned" color in heatmaps --- R/plot-region.R | 2 +- R/shiny.R | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index 8a7b92b..5a7c1cc 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -217,7 +217,7 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " par(mar = c(bottom_mar, 0, top_mar, 2)) shades <- colorRampPalette(colors)(1000) - # mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) + mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) shades_breaks <- approx(color_breaks, n = 1001)$y if (flip) { mat_smooth <- mat_smooth[nrow(mat_smooth):1, , drop = FALSE] diff --git a/R/shiny.R b/R/shiny.R index 04ea11f..73a51a6 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -263,13 +263,13 @@ app_server <- function(input, output, session) { mat_idxs <- get_heatmap_idx(mat_x, mat_y, m) gene <- toupper(promoters[promoters$coords == input$genes, ]$geneSymbol) - req(gene %in% rownames(mct@rna_egc)) + req(gene %in% rownames(shiny_mct@rna_egc)) - mcs <- intersect(colnames(mct@rna_egc), colnames(m)) + mcs <- intersect(colnames(shiny_mct@rna_egc), colnames(m)) - data <- data.frame(atac = m[mat_idxs[1], mcs], rna = mct@rna_egc[gene, mcs], metacell = mcs) %>% + data <- data.frame(atac = m[mat_idxs[1], mcs], rna = shiny_mct@rna_egc[gene, mcs], metacell = mcs) %>% mutate(rna = log2(rna + 1e-5)) %>% - left_join(mct@metadata, by = "metacell") %>% + left_join(shiny_mct@metadata, by = "metacell") %>% mutate(coords = rownames(m)[mat_idxs[1]]) scatter_data(data) @@ -286,18 +286,21 @@ app_server <- function(input, output, session) { theme_classic() + xlab("ATAC") + ylab("RNA") + - ggtitle(glue("Correlation: {round(cor(data$atac, data$rna), digits = 4)}"), subtitle = data$coords[1]) + ggtitle(glue("Correlation: {round(cor(data$atac, data$rna), digits = 2)} Coverage:{sum(data$atac)}"), subtitle = data$coords[1]) }) observe({ req(intervals()) + req(input$n_smooth) + req(input$n_smooth >= 1) + n_smooth <- max(round(input$n_smooth / shiny_mct@resolution), 1) raw_mat <- get_raw_mat( shiny_mct, intervals(), downsample = TRUE, - downsample_n = 1500000, + downsample_n = 20000000, detect_dca = input$detect_dca %||% FALSE, gene_annot = TRUE, - hc = hc, + hc = shiny_hc, peak_lf_thresh1 = input$dca_peak_lf_thresh, trough_lf_thresh1 = input$dca_trough_lf_thresh, sz_frac_for_peak = input$dca_sz_frac_for_peak, @@ -331,7 +334,7 @@ app_server <- function(input, output, session) { mct_plot_region( shiny_mct, intervals(), downsample = TRUE, - downsample_n = 1500000, + downsample_n = 20000000, detect_dca = input$detect_dca %||% FALSE, gene_annot = TRUE, hc = hc, @@ -378,7 +381,7 @@ app_server <- function(input, output, session) { mct_plot_region( shiny_mct, intervals(), downsample = TRUE, - downsample_n = 1500000, + downsample_n = 20000000, detect_dca = input$detect_dca %||% FALSE, gene_annot = FALSE, hc = NULL, @@ -386,11 +389,11 @@ app_server <- function(input, output, session) { trough_lf_thresh1 = input$dca_trough_lf_thresh, sz_frac_for_peak = input$dca_sz_frac_for_peak, color_breaks = color_breaks, - n_smooth = 1, + n_smooth = n_smooth, plot_x_axis_ticks = FALSE, roll_mean = TRUE, genes_correlations = promoters[promoters$coords == input$genes, ]$geneSymbol, - cor_color_breaks = c(-0.8, -0.4, 0, 0.4, 0.8), + cor_color_breaks = c(-1.0, -0.5, 0, 0.5, 1.0), cor_colors = c("blue", "white", "white", "white", "red") ) }, From ecd5137ed22d1b7fdbc2ebbb40dbf83eca98d96f Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Sun, 20 Aug 2023 17:13:17 +0300 Subject: [PATCH 12/16] Bug fixes Added reqs for ANY lifted elements for the comparison and rabbit plotting Reintroduced concatenated coordinate rownames after applying rollsum --- R/plot-region.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/plot-region.R b/R/plot-region.R index 5a7c1cc..28a7473 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -1,4 +1,4 @@ -get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { +get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), ...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -26,7 +26,9 @@ get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, d } mat <- mat[, hc$label] } - mat + mat_smooth = RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) + rownames(mat_smooth) = rownames(mat) + mat_smooth } #' Plot a genomic region @@ -69,7 +71,7 @@ get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, d #' } #' #' @export -mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), roll_mean = FALSE, ...) { +mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), min_atac_sum=10, ...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -120,6 +122,7 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU # atac_egc = t(t(mat)/colSums(mat)) # atac_legc = log2(1e-5+atac_egc) rna_legc <- log2(1e-5 + mct@rna_egc) + mat_smooth <- RcppRoll::roll_sum(mat[, overlapping_types], n = n_smooth, fill = c(0,0,0)) cors <- tgstat::tgs_cor( t(mat[, overlapping_types]), t(rna_legc[toupper(genes_correlations), overlapping_types, drop = FALSE]), @@ -131,8 +134,9 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU color_breaks <- cor_color_breaks y_seps <- NULL dca_mat <- NULL + n_smooth <- 1 } - plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, roll_mean = roll_mean, flip = flip) + plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, flip = flip) } #' Plot a genomic region given a matrix @@ -152,12 +156,8 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU #' @param flip whether to flip the coordinates (optional) #' #' @export -plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, roll_mean = FALSE) { - if (roll_mean) { - mat_smooth <- RcppRoll::roll_mean(mat, n = n_smooth, fill = c(0, 0, 0)) - } else { - mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) - } +plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE) { + mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) if (gene_annot) { if (is.null(intervals) || is.null(resolution)) { From fc09756552a4dca46e5cd9706374294708702923 Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Mon, 21 Aug 2023 14:33:22 +0300 Subject: [PATCH 13/16] rewrote join_by for backwards compatability --- R/utils-multi-genomes.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils-multi-genomes.R b/R/utils-multi-genomes.R index 26a486a..0fe9af8 100644 --- a/R/utils-multi-genomes.R +++ b/R/utils-multi-genomes.R @@ -70,7 +70,7 @@ custom_lift2 <- function(x, chain, maxgap = 1000) { ungroup(test3) %>% select(seqnames, strand, chainscore, lstart = alt_start, lend = end, widthsum, match) %>% mutate(lrow_id = row_number()), - by = join_by(seqnames, strand, chainscore) + by = c('seqnames', 'strand', 'chainscore') ) btest2 <- atest2 %>% group_by( @@ -212,7 +212,7 @@ compute_intervals_comparison <- function(intervals, intervals2, chain, chainscor i12 <- i1 %>% rowid_to_column("row_ID") %>% - left_join(i12 %>% rename(chrom1 = chrom, start1 = start, end1 = end), by = join_by(row_ID)) %>% + left_join(i12 %>% rename(chrom1 = chrom, start1 = start, end1 = end), by = "row_ID") %>% as_tibble() From befdcd7d4b768f6ece030ecda2f7cd10ee7840bf Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Sun, 3 Sep 2023 09:19:09 +0300 Subject: [PATCH 14/16] bug fixes --- R/shiny.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/shiny.R b/R/shiny.R index 73a51a6..8e47781 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -262,7 +262,7 @@ app_server <- function(input, output, session) { req(m) mat_idxs <- get_heatmap_idx(mat_x, mat_y, m) - gene <- toupper(promoters[promoters$coords == input$genes, ]$geneSymbol) + gene <- promoters[promoters$coords == input$genes, ]$geneSymbol req(gene %in% rownames(shiny_mct@rna_egc)) mcs <- intersect(colnames(shiny_mct@rna_egc), colnames(m)) From 6d9f457fefe0fae62d635e93242cce48f4552e3b Mon Sep 17 00:00:00 2001 From: ofirr Date: Mon, 11 Sep 2023 11:23:57 +0300 Subject: [PATCH 15/16] bug fixes, optimization and color by type Added drop=FALSE in McCounts (bugfix) Revised scc_to_peaks (runtime optimization) Added color_max_by_type feature to mct_plot_region Updated runserver.r --- R/McCounts.R | 2 ++ R/plot-region.R | 21 ++++++++++++++++----- R/shiny.R | 7 +++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/R/McCounts.R b/R/McCounts.R index 8906701..00a0b2b 100755 --- a/R/McCounts.R +++ b/R/McCounts.R @@ -252,6 +252,7 @@ mcc_read <- function(path, id = NULL, description = NULL, verbose = TRUE) { summarise_bin <- function(mat, bin, intervs, metacells = NULL) { metacells <- metacells %||% colnames(mat) intervs <- as.data.frame(intervs) + if (!has_name(intervs, "peak_name")) { cli_abort("The {.var intervs} must have a column called {.field peak_name}") } @@ -619,6 +620,7 @@ mcc_to_tracks <- function(mc_counts, track_prefix, metacells = NULL, overwrite = cli_alert_success("Created {length(metacells)} tracks at {track_prefix}") + gdb.reload() mct <- mct_create(genome = mc_counts@genome, tracks = glue("{track_prefix}.mc{metacells}"), metacells = metacells, id = mc_counts@id, description = mc_counts@description, path = mc_counts@path, metadata = mc_counts@metadata, resolution = resolution, window_size = window_size, marginal_track = marginal_track) return(mct) } diff --git a/R/plot-region.R b/R/plot-region.R index 28a7473..259c252 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -71,7 +71,7 @@ get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, d #' } #' #' @export -mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), min_atac_sum=10, ...) { +mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), min_atac_sum=10, color_max_by_type=FALSE,...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -136,7 +136,7 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU dca_mat <- NULL n_smooth <- 1 } - plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, flip = flip) + plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, flip = flip, color_max_by_type=color_max_by_type) } #' Plot a genomic region given a matrix @@ -156,7 +156,7 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU #' @param flip whether to flip the coordinates (optional) #' #' @export -plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE) { +plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, color_max_by_type=FALSE) { mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) if (gene_annot) { @@ -217,8 +217,19 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " par(mar = c(bottom_mar, 0, top_mar, 2)) shades <- colorRampPalette(colors)(1000) - mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) - shades_breaks <- approx(color_breaks, n = 1001)$y + if(color_max_by_type){ + shades_breaks <- approx(color_breaks, n = 1001)$y + for(i in 1:length(mc_colors)){ + coli = mat_smooth[,i] + coli[coli > max(color_breaks)] <- max(color_breaks)-1+i + mat_smooth[,i] = coli + } + shades_breaks = c(shades_breaks, max(color_breaks) - 1 + 1:length(mc_colors)) + shades = c(shades, mc_colors) + } else{ + mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) + shades_breaks <- approx(color_breaks, n = 1001)$y + } if (flip) { mat_smooth <- mat_smooth[nrow(mat_smooth):1, , drop = FALSE] } diff --git a/R/shiny.R b/R/shiny.R index 8e47781..85b898a 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -281,7 +281,7 @@ app_server <- function(input, output, session) { data %>% ggplot(aes(x = atac, y = rna, color = color)) + - geom_point(size = 2) + + geom_point(size = 4) + scale_color_identity() + theme_classic() + xlab("ATAC") + @@ -335,6 +335,7 @@ app_server <- function(input, output, session) { shiny_mct, intervals(), downsample = TRUE, downsample_n = 20000000, + colors = c("white", "gray", "darkgrey", "black"), detect_dca = input$detect_dca %||% FALSE, gene_annot = TRUE, hc = hc, @@ -343,7 +344,8 @@ app_server <- function(input, output, session) { sz_frac_for_peak = input$dca_sz_frac_for_peak, color_breaks = color_breaks, n_smooth = n_smooth, - plot_x_axis_ticks = FALSE + plot_x_axis_ticks = FALSE, + color_max_by_type = TRUE ) }, res = 96 @@ -410,6 +412,7 @@ app_server <- function(input, output, session) { input$n_smooth ) + color_max_by_type = TRUE output$current_coords <- renderText({ if (is.null(intervals())) { return("Please enter valid genomic coordinates (e.g. \"chr3:34300000-35000020\")") From c40d9268bb022012cc729e30554e602b080958fe Mon Sep 17 00:00:00 2001 From: Ofir Raz Date: Sun, 7 Jan 2024 11:24:05 +0200 Subject: [PATCH 16/16] light post-rebase fixes removed "genome" related arguments removed "flip" related arguments removed "multi' functions from utils-multi-genomes.R and renamed it to R/utils-hclust.R --- R/McCounts.R | 3 - R/plot-region.R | 36 +---- R/shiny.R | 4 +- R/utils-hclust.R | 55 +++++++ R/utils-multi-genomes.R | 341 ---------------------------------------- 5 files changed, 64 insertions(+), 375 deletions(-) create mode 100644 R/utils-hclust.R delete mode 100644 R/utils-multi-genomes.R diff --git a/R/McCounts.R b/R/McCounts.R index 00a0b2b..de6fa93 100755 --- a/R/McCounts.R +++ b/R/McCounts.R @@ -616,11 +616,8 @@ mcc_to_tracks <- function(mc_counts, track_prefix, metacells = NULL, overwrite = gc() }, .parallel = getOption("mcatac.parallel")) - gdb.reload() - cli_alert_success("Created {length(metacells)} tracks at {track_prefix}") - gdb.reload() mct <- mct_create(genome = mc_counts@genome, tracks = glue("{track_prefix}.mc{metacells}"), metacells = metacells, id = mc_counts@id, description = mc_counts@description, path = mc_counts@path, metadata = mc_counts@metadata, resolution = resolution, window_size = window_size, marginal_track = marginal_track) return(mct) } diff --git a/R/plot-region.R b/R/plot-region.R index 259c252..e4f3c9d 100644 --- a/R/plot-region.R +++ b/R/plot-region.R @@ -1,4 +1,4 @@ -get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), ...) { +get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), ...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -71,7 +71,7 @@ get_raw_mat <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, d #' } #' #' @export -mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), min_atac_sum=10, color_max_by_type=FALSE,...) { +mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRUE, downsample_n = NULL, metacells = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), hc = NULL, force_cell_type = TRUE, gene_annot = FALSE, n_smooth = 10, n_pixels = 1000, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", genes_correlations = NULL, cor_colors = c("blue", "white", "white", "white", "red"), cor_color_breaks = c(-1, -0.05, 0, 0.05, 1), min_atac_sum=10, color_max_by_type=FALSE,...) { gset_genome(mct@genome) raw_mat <- mct_get_mat(mct, intervals, downsample, downsample_n) if (!is.null(metacells)) { @@ -119,13 +119,11 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU } if (!is.null(genes_correlations) && has_rna(mct)) { overlapping_types <- intersect(colnames(mct@rna_egc), colnames(mat)) - # atac_egc = t(t(mat)/colSums(mat)) - # atac_legc = log2(1e-5+atac_egc) rna_legc <- log2(1e-5 + mct@rna_egc) mat_smooth <- RcppRoll::roll_sum(mat[, overlapping_types], n = n_smooth, fill = c(0,0,0)) cors <- tgstat::tgs_cor( t(mat[, overlapping_types]), - t(rna_legc[toupper(genes_correlations), overlapping_types, drop = FALSE]), + t(rna_legc[genes_correlations, overlapping_types, drop = FALSE]), pairwise.complete.obs = T ) mat <- cors @@ -136,7 +134,7 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU dca_mat <- NULL n_smooth <- 1 } - plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, genome = mct@genome, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, flip = flip, color_max_by_type=color_max_by_type) + plot_region_mat(mat, mc_colors, colors = colors, color_breaks = color_breaks, intervals = intervals, resolution = mct@resolution, dca_mat = dca_mat, y_seps = y_seps, n_smooth = n_smooth, gene_annot = gene_annot, n_pixels = n_pixels, plot_x_axis_ticks = plot_x_axis_ticks, gene_annot_pos = gene_annot_pos, color_max_by_type=color_max_by_type) } #' Plot a genomic region given a matrix @@ -151,12 +149,10 @@ mct_plot_region <- function(mct, intervals, detect_dca = FALSE, downsample = TRU #' @param n_smooth number of genomic bins to use for smoothing the signal. Signal is smoothed by a rolling sum for each metacell (optional). Default is 20. #' @param n_pixels number of pixels in the plot. The DCA regions would be extended by \code{ceiling(2 * nrow(mat) / n_pixels)} (optional). #' @param gene_annot whether to add gene annotations; these annotations rely on the existence of the existence of an intervals set called "intervs.global.tss" and "intervs.global.exon" in the genome's misha directory. (optional) -#' @param genome the genome to use for the gene annotations (optional) #' @param gene_annot_pos the position of the gene annotations ("top" or "bottom") -#' @param flip whether to flip the coordinates (optional) #' #' @export -plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, genome = NULL, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", flip = FALSE, color_max_by_type=FALSE) { +plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", "black", "gold"), color_breaks = c(0, 6, 12, 18, 24), intervals = NULL, resolution = NULL, dca_mat = NULL, y_seps = NULL, y_seps_lty = 2, y_seps_lwd = 1, n_smooth = 10, n_pixels = 1000, gene_annot = FALSE, plot_x_axis_ticks = TRUE, gene_annot_pos = "top", color_max_by_type=FALSE) { mat_smooth <- RcppRoll::roll_sum(mat, n = n_smooth, fill = c(0, 0, 0)) if (gene_annot) { @@ -172,22 +168,19 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " } par(mar = c(0, 0, 1, 2)) - plot_tss_strip(intervals, flip = flip) + plot_tss_strip(intervals) if (gene_annot_pos == "top") { par(mar = c(0, 0, 0, 2)) } else { par(mar = c(0, 0, 0, 2)) } - gene_annots <- make_gene_annot(intervals, resolution, genome) + gene_annots <- make_gene_annot(intervals, resolution) if (is.null(gene_annots[["exon_coords"]])) { image(as.matrix(rep(0, ncol(mat)), nrow = 1), col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } else { exon_mat <- as.matrix(gene_annots[["exon_coords"]]) - if (flip) { - exon_mat <- exon_mat[nrow(exon_mat):1, , drop = FALSE] - } image(exon_mat, col = c("white", "black"), breaks = c(-0.5, 0, 1), yaxt = "n", xaxt = "n", frame.plot = FALSE) } @@ -195,7 +188,6 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " top_mar <- 0 left_mar <- 2 } else { - # par(mar = c(0,0,0,0)) layout(matrix(1:2, nrow = 1), w = c(1, 20)) top_mar <- 0 left_mar <- 2 @@ -230,9 +222,6 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " mat_smooth[mat_smooth > max(color_breaks)] <- max(color_breaks) shades_breaks <- approx(color_breaks, n = 1001)$y } - if (flip) { - mat_smooth <- mat_smooth[nrow(mat_smooth):1, , drop = FALSE] - } image(mat_smooth, col = shades, breaks = shades_breaks, yaxt = "n", xaxt = "n") if (!is.null(intervals) && plot_x_axis_ticks) { axis(1, at = seq(0, 1, l = 11), round(seq(intervals$start[1], intervals$end[1], l = 11) / 1e+6, 3), las = 2) @@ -243,9 +232,6 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " cli_alert_info("Extending DCAs with {.val {n_peak_smooth}} bins. This can be tweaked using the {.field n_pixels} paramater.") dca_mat <- extend_dca_mat(dca_mat, n_peak_smooth) dca_cols <- c(rgb(0, 0, 1, 0.4), rgb(0, 0, 1, 0.15), rgb(0, 0, 0, 0), rgb(1, 0, 0, 0.15), rgb(1, 0, 0, 0.4)) - if (flip) { - dca_mat <- dca_mat[nrow(dca_mat):1, , drop = FALSE] - } image(dca_mat, col = dca_cols, add = TRUE, breaks = c(-3, -1.5, -0.5, 0.5, 1.5, 3)) } if (!is.null(y_seps)) { @@ -257,7 +243,7 @@ plot_region_mat <- function(mat, mc_colors = NULL, colors = c("white", "gray", " } } -plot_tss_strip <- function(intervals, flip = FALSE) { +plot_tss_strip <- function(intervals) { plot( # empty plot x = intervals$start:intervals$end, y = c(0, rep(0.7, intervals$end - intervals$start)), @@ -272,14 +258,8 @@ plot_tss_strip <- function(intervals, flip = FALSE) { tss_df <- gintervals.neighbors1("intervs.global.tss", intervals) %>% filter(dist == 0) %>% arrange(chrom, start, end, strand, geneSymbol) %>% - # distinct(geneSymbol, strand, .keep_all = TRUE) %>% select(chrom, tss = start, strand, gene = geneSymbol) - if (flip) { - # flip the coordinates relative to the start end end of the intervals - tss_df$tss <- intervals$end - tss_df$tss + intervals$start - tss_df$strand <- -1 * tss_df$strand - } if (nrow(tss_df) > 0) { text(x = tss_df$tss, y = rep(0.35, length(tss_df$tss)), labels = tss_df$gene, las = 1, cex = 1) line_len <- (intervals$end - intervals$start) * 0.01 diff --git a/R/shiny.R b/R/shiny.R index 85b898a..5cafdb3 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -153,7 +153,6 @@ app_ui <- function(request) { ) ), uiOutput("hover_info", style = "pointer-events: none") - ) ) } @@ -197,8 +196,7 @@ app_server <- function(input, output, session) { observe({ shinyWidgets::updateVirtualSelect( "genes", - choices = shinyWidgets::prepare_choices(promoters, label, coords), - selected = "chrX:60843429-60943430" + choices = shinyWidgets::prepare_choices(promoters, label, coords) ) }) diff --git a/R/utils-hclust.R b/R/utils-hclust.R new file mode 100644 index 0000000..92e442b --- /dev/null +++ b/R/utils-hclust.R @@ -0,0 +1,55 @@ +#' Generate an hclust object from a data.frame with cell_type, group and order columns +#' +#' @param df data.frame with cell_type, group and order +#' +#' @return hclust object with cell_type as labels and order as order of the leaves +#' +#' @examples +#' df <- data.frame( +#' cell_type = c( +#' "PGC", "Surface ectoderm", "Neural crest", +#' "Neural tube/Floor plate", "Forebrain/Midbrain/Hindbrain", "Caudal neural plate", +#' "Rostral neural plate", "Neural plate boundary", "Definitive ectoderm", +#' "Epiblast", "Primitive streak", "Caudal epiblast", "Tail bud - neural", +#' "Tail bud - mesoderm", "Early nascent mesoderm" +#' ), order = 1:15, +#' group = c( +#' "ecto", "ecto", "ecto", "ecto", "ecto", "ecto", +#' "ecto", "ecto", "ecto", "early", "early", "early", "early", +#' "Meso", "Meso" +#' ) +#' ) +#' +#' group_to_hclust(df) +#' +#' @export +group_to_hclust <- function(df) { + # Create an empty matrix of 1s + distance_matrix <- matrix(1, nrow = nrow(df), ncol = nrow(df)) + rownames(distance_matrix) <- df$cell_type + colnames(distance_matrix) <- df$cell_type + + # set the distance between elements the same group to 0 (using df$group) + for (grp in unique(df$group)) { + distance_matrix[df$group == grp, df$group == grp] <- 0 + } + + # Hierarchical clustering + hc <- hclust(as.dist(distance_matrix)) + + # Label leaves with cell_type + hc$labels <- df$cell_type + + hc <- as.hclust(reorder(as.dendrogram(hc), df$order, agglo.FUN = mean)) + + return(hc) +} + +get_heatmap_idx <- function(x, y, m) { + n_x <- nrow(m) + n_y <- ncol(m) + x_idx <- floor(x * n_x) + 1 + + y_idx <- n_y - max(floor(y * n_y), 0) + return(c(x_idx, y_idx)) +} diff --git a/R/utils-multi-genomes.R b/R/utils-multi-genomes.R deleted file mode 100644 index 0fe9af8..0000000 --- a/R/utils-multi-genomes.R +++ /dev/null @@ -1,341 +0,0 @@ -flipStrandTricky <- function(strand, flip) { - strandCodes <- c("+" = 1L, "-" = -1L, "*" = 0L) - strandInt <- strandCodes[as.vector(strand)] - flipped <- ifelse(flip, strandInt * -1L, strandInt) + 2L - strandRevCodes <- factor(c("-", "*", "+"), levels(rtracklayer::strand())) - strandRevCodes[as.vector(flipped)] -} - -custom_lift <- function(x, chain) { - liftOverSpace <- function(gr, chain, subind) { - r <- rtracklayer::ranges(gr) - ol <- IRanges::findOverlaps(r, rtracklayer::ranges(chain)) - shits <- S4Vectors::subjectHits(ol) - r <- IRanges::overlapsRanges(r, rtracklayer::ranges(chain), ol) - rev <- as.vector(rtracklayer::reversed(chain)[shits]) - starts <- ifelse(rev, - start(IRanges::reflect(r, rtracklayer::ranges(chain)[shits])), - start(r) - ) - strand <- flipStrandTricky(rtracklayer::strand(gr)[S4Vectors::queryHits(ol)], rev) - r <- IRanges::IRanges(starts, width = rtracklayer::width(r)) - offsets <- rtracklayer::offset(chain)[shits] - spaces <- rtracklayer::space(chain)[shits] - chainscore <- rtracklayer::score(chain)[shits] - ind[[as.character(GenomeInfoDb::seqnames(gr)[1])]] <<- subind[S4Vectors::queryHits(ol)] - GenomicRanges::GRanges(spaces, - IRanges::IRanges(start(r) - offsets, end(r) - offsets), - strand = strand, - rtracklayer::values(gr)[S4Vectors::queryHits(ol), , drop = FALSE], - chainscore - ) - } - rl <- split(x, GenomeInfoDb::seqnames(x), drop = TRUE) - unchainedNames <- setdiff(names(rl), names(chain)) - if (length(unchainedNames)) { - message( - "Discarding unchained sequences: ", - paste(unchainedNames, collapse = ", ") - ) - } - sharedNames <- intersect(names(rl), names(chain)) - ind <- split( - seq_len(length(x)), - as.vector(GenomeInfoDb::seqnames(x)) - )[sharedNames] - liftedList <- mapply(liftOverSpace, rl[sharedNames], - chain[sharedNames], ind, - SIMPLIFY = FALSE - ) - lifted <- unlist(GenomicRanges::GRangesList(liftedList), use.names = FALSE) - - f <- structure(as.integer(unlist(ind, use.names = FALSE)), - levels = seq_len(length(x)), class = "factor" - ) - - GenomicRanges::GRangesList(setNames(split(lifted, f), names(x))) -} - - -custom_lift2 <- function(x, chain, maxgap = 1000) { - test2 <- custom_lift(x, chain) - test3 <- as.data.frame(test2) %>% - group_by(across(c(-start, -end, -width))) %>% - summarise(start = min(start), end = max(end), widthsum = sum(width)) %>% - ungroup() - test3$alt_start <- test3$start - 1 - test3$match <- test3$widthsum / (test3$end - test3$alt_start) - atest2 <- as.data.frame(test2) %>% - left_join( - ungroup(test3) %>% - select(seqnames, strand, chainscore, lstart = alt_start, lend = end, widthsum, match) %>% - mutate(lrow_id = row_number()), - by = c('seqnames', 'strand', 'chainscore') - ) - btest2 <- atest2 %>% - group_by( - group, group_name, seqnames, strand, chainscore, lrow_id - ) %>% - arrange( - start, - .by_group = TRUE - ) %>% - mutate( - gap = start - lag(end) - ) %>% - mutate( - nbreaks = cumsum(ifelse(is.na(gap > maxgap), 0, gap > maxgap)) - ) %>% - group_by( - nbreaks, - .add = TRUE - ) %>% - summarise( - start = min(start), end = max(end), widthsum = sum(width) - ) %>% - ungroup() - btest2$alt_start <- btest2$start - 1 - btest2$match <- btest2$widthsum / (btest2$end - btest2$alt_start) - return(list(full = test2, gapped = btest2, cont = test3)) -} - -translate_intervals <- function(intervals, chain, chainscore = NULL) { - r <- regioneR::toGRanges(intervals %>% rowid_to_column("row_ID") %>% select(chrom, start, end, row_ID)) - lifted_list <- custom_lift2(r, chain) - - if (is.null(chainscore)) { - chainscore <- max(lifted_list$cont$chainscore) - } - gset_genome(chain@metadata[["genome2"]]) - ff_lifted_list <- gintervals.force_range( - lifted_list$cont %>% - filter(chainscore == !!chainscore) %>% - select(chrom = seqnames, start, end, row_ID) %>% - as.data.frame() %>% - select(chrom, start, end, everything()) - ) - gset_genome(chain@metadata[["genome1"]]) - ff_lifted_list -} - -translate_and_center <- function(intervals, chain, chainscore = NULL) { - r <- regioneR::toGRanges(intervals) - lifted_list <- custom_lift2(r, chain) - if (!is.null(chainscore)) { - intervs2 <- lifted_list$cont %>% - filter(chainscore == !!chainscore) %>% - select(chrom = seqnames, start, end) %>% - as.data.frame() - } else { - intervs2 <- lifted_list$cont %>% - arrange(desc(chainscore)) %>% - ungroup() %>% - slice(1) %>% - select(chrom = seqnames, start, end) %>% - as.data.frame() - } - expand <- round(abs(intervals$end - intervals$start) / 2) - intervs2 <- misha.ext::gintervals.centers(intervs2) %>% - mutate(start = start - expand, end = end + expand) - gset_genome(chain@metadata[["genome2"]]) - intervs2 <- gintervals.force_range(intervs2) - gset_genome(chain@metadata[["genome1"]]) - return(intervs2) -} - - -orth_one_per_element <- function(comparison_obj, width = 10) { - comparison_obj$mid1 <- comparison_obj$start1 + as.integer((comparison_obj$end1 - comparison_obj$start1) / 2) - comparison_obj$mid2 <- comparison_obj$start2 + as.integer((comparison_obj$end2 - comparison_obj$start2) / 2) - comparison_obj$start1 <- comparison_obj$mid1 - comparison_obj$end1 <- comparison_obj$mid1 + width - comparison_obj$start2 <- comparison_obj$mid2 - comparison_obj$end2 <- comparison_obj$mid2 + width - comparison_obj[, 1:6] -} - -load_chain <- function(chain, genome1 = NULL, genome2 = NULL) { - if (is.character(chain)) { - cli::cli_alert_info("Loading {genome1}->{genome2} chain {.file {chain}}") - chain <- rtracklayer::import.chain(chain) - cli::cli_alert_success("Loaded chain {.file {chain}} successfully") - } - - chain@metadata <- list(genome1 = genome1, genome2 = genome2) - - return(chain) -} - -compute_annotation_comparison <- function(annot1, annot2, intervals, intervals2, chain_1_to_2, chain_2_to_1, selected_chain_chainscore = NULL) { - annot1 <- annot1 %>% - gintervals.neighbors1(intervals) %>% - filter(dist == 0) - annot1_colors <- chameleon::distinct_colors(nrow(annot1))$name - - lifted_annot1 <- translate_intervals(as.data.frame(annot1), chain = chain_1_to_2, chainscore = selected_chain_chainscore) %>% - arrange(row_ID) - - annot2 <- annot2 %>% - filter( - chrom == intervals2$chrom, - start >= intervals2$start, - end <= intervals2$end - ) - annot2_colors <- chameleon::distinct_colors(nrow(annot1) + nrow(annot2))$name[(nrow(annot1) + 1):(nrow(annot1) + nrow(annot2))] - lifted_annot2 <- translate_intervals(as.data.frame(annot2), chain = chain_2_to_1, chainscore = selected_chain_chainscore) %>% - arrange(row_ID) - list( - annot1 = annot1 %>% mutate(x1 = (start - intervals$start) / (intervals$end - intervals$start)), - lifted_annot1 = lifted_annot1 %>% mutate(x2 = (start - intervals2$start) / (intervals2$end - intervals2$start)), - annot1_colors = annot1_colors, - annot2 = annot2 %>% mutate(x2 = (start - intervals2$start) / (intervals2$end - intervals2$start)), - lifted_annot2 = lifted_annot2 %>% mutate(x1 = (start - intervals$start) / (intervals$end - intervals$start)), - annot2_colors = annot2_colors - ) -} - - -compute_intervals_comparison <- function(intervals, intervals2, chain, chainscore = NULL, grid_resolution = 100) { - grid_resolution <- round((intervals$end - intervals$start) / grid_resolution) - - # create a grid iterator for the first intervals set - i1 <- tibble( - chrom = intervals$chrom, - end = seq(intervals$start, intervals$end, grid_resolution), - start = end - grid_resolution - ) %>% - mutate(end = pmin(end, intervals$end)) %>% - as.data.frame() - - - i12 <- translate_intervals(i1, chain, chainscore = chainscore) - - i12 <- i1 %>% - rowid_to_column("row_ID") %>% - left_join(i12 %>% rename(chrom1 = chrom, start1 = start, end1 = end), by = "row_ID") %>% - as_tibble() - - - # translate start1 and start2 to relative coordinates (from 0 to 1) according to intervals1 and intervals2 - i12 <- i12 %>% - mutate( - x1 = (start - intervals$start) / (intervals$end - intervals$start), - x2 = (start1 - intervals2$start) / (intervals2$end - intervals2$start), - x2 = ifelse(is.na(chrom1) | as.character(chrom1) != as.character(intervals2$chrom), NA, x2) - ) - - return(i12) -} - -is_comparison_flipped <- function(intervals_comparison) { - i12 <- intervals_comparison - - cr <- i12 %>% - mutate( - mid1 = start + (end - start) / 2, - mid2 = start1 + (end1 - start1) / 2 - ) %>% - summarise(cr = cor(mid1, mid2, use = "pairwise.complete.obs")) - - return(cr < 0) -} - -plot_intervals_comparison <- function(intervals_comparison, annotations = NULL, correlations = NULL, grid_resolution = 100) { - i12 <- intervals_comparison - - if (is_comparison_flipped(i12)) { - i12$x2 <- 1 - i12$x2 - } - # Set the plot parameters - plot(1, 1, xlim = c(0, 1), ylim = c(-0.02, 1.02), type = "n", ann = FALSE, axes = FALSE) - - segments(x0 = i12$x1, y0 = 1, x1 = i12$x1, y1 = 0.99, lwd = 1) - text(x = i12$x1, y = 0.96, labels = round(i12$start / 1e+6, 3), srt = 90, adj = c(1, 0.5), xpd = TRUE, cex = 0.5) - segments(x0 = i12$x1, y0 = 0.75, x1 = i12$x1, y1 = 0.7, lwd = 1, col = ifelse(1:length(i12$x1) %% round(grid_resolution / 10) == 0, "black", "grey")) - segments(x0 = i12$x1, y0 = 0.7, x1 = i12$x2, y1 = 0.27, lwd = 1, col = ifelse(1:length(i12$x1) %% round(grid_resolution / 10) == 0, "black", "grey")) - segments(x0 = i12$x2, y0 = 0.27, x1 = i12$x2, y1 = 0.25, lwd = 1, col = ifelse(1:length(i12$x1) %% round(grid_resolution / 10) == 0, "black", "grey")) - text(x = i12$x2[i12$x2 <= 1 & i12$x2 >= 0], y = 0.2, labels = round(i12$start1[i12$x2 <= 1 & i12$x2 >= 0] / 1e+6, 3), srt = 90, adj = c(1, 0.5), xpd = TRUE, cex = 0.5) - segments(x0 = i12$x2, y0 = 0, x1 = i12$x2, y1 = 0.01, lwd = 1) - if (!is.null(correlations)) { - } - if (!is.null(annotations)) { - annot1 <- annotations[["annot1"]] - lifted_annot1 <- annotations[["lifted_annot1"]] - annot1_colors <- annotations[["annot1_colors"]] - annot2 <- annotations[["annot2"]] - lifted_annot2 <- annotations[["lifted_annot2"]] - annot2_colors <- annotations[["annot2_colors"]] - if (is_comparison_flipped(i12)) { - annot2$x2 <- 1 - annot2$x2 - lifted_annot1$x2 <- 1 - lifted_annot1$x2 - } - points( - x = c(annot1$x1, lifted_annot2$x1), - y = c(rnorm(nrow(annot1), sd = 0.001) + 1.019, rnorm(nrow(lifted_annot2), sd = 0.001) + 0.981), - col = c(annot1_colors, annot2_colors), - pch = 3, cex = 1 - ) - points( - x = c(annot2$x2, lifted_annot1$x2), - y = c(rnorm(nrow(annot2), sd = 0.001) - 0.019, rnorm(nrow(lifted_annot1), sd = 0.001) + 0.019), - col = c(annot2_colors, annot1_colors), - pch = 3, cex = 1 - ) - } -} - -#' Generate an hclust object from a data.frame with cell_type, group and order columns -#' -#' @param df data.frame with cell_type, group and order -#' -#' @return hclust object with cell_type as labels and order as order of the leaves -#' -#' @examples -#' df <- data.frame( -#' cell_type = c( -#' "PGC", "Surface ectoderm", "Neural crest", -#' "Neural tube/Floor plate", "Forebrain/Midbrain/Hindbrain", "Caudal neural plate", -#' "Rostral neural plate", "Neural plate boundary", "Definitive ectoderm", -#' "Epiblast", "Primitive streak", "Caudal epiblast", "Tail bud - neural", -#' "Tail bud - mesoderm", "Early nascent mesoderm" -#' ), order = 1:15, -#' group = c( -#' "ecto", "ecto", "ecto", "ecto", "ecto", "ecto", -#' "ecto", "ecto", "ecto", "early", "early", "early", "early", -#' "Meso", "Meso" -#' ) -#' ) -#' -#' group_to_hclust(df) -#' -#' @export -group_to_hclust <- function(df) { - # Create an empty matrix of 1s - distance_matrix <- matrix(1, nrow = nrow(df), ncol = nrow(df)) - rownames(distance_matrix) <- df$cell_type - colnames(distance_matrix) <- df$cell_type - - # set the distance between elements the same group to 0 (using df$group) - for (grp in unique(df$group)) { - distance_matrix[df$group == grp, df$group == grp] <- 0 - } - - # Hierarchical clustering - hc <- hclust(as.dist(distance_matrix)) - - # Label leaves with cell_type - hc$labels <- df$cell_type - - hc <- as.hclust(reorder(as.dendrogram(hc), df$order, agglo.FUN = mean)) - - return(hc) -} - -get_heatmap_idx <- function(x, y, m) { - n_x <- nrow(m) - n_y <- ncol(m) - x_idx <- floor(x * n_x) + 1 - - y_idx <- n_y - max(floor(y * n_y), 0) - return(c(x_idx, y_idx)) -}