Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: escape
Title: Easy single cell analysis platform for enrichment
Version: 2.6.1
Version: 2.6.2
Authors@R: c(
person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"),
person(given = "Jared", family = "Andrews", role = c("aut"), email = "jared.andrews07@gmail.com"),
Expand All @@ -11,7 +11,7 @@ Description: A bridging R package to facilitate gene set enrichment analysis (GS
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: false
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
biocViews: Software, SingleCell, Classification, Annotation, GeneSetEnrichment, Sequencing, GeneSignaling, Pathways
Depends: R (>= 4.1)
Imports:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ importFrom(MatrixGenerics,rowSds)
importFrom(MatrixGenerics,rowSums2)
importFrom(SummarizedExperiment,colData)
importFrom(ggdist,stat_pointinterval)
importFrom(ggplot2,"%+replace%")
importFrom(grDevices,hcl.pals)
importFrom(stats,aggregate)
importFrom(stats,as.formula)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# 2.6.2

## NEW FEATURES
* Added `.themeEscape()` internal theme function for consistent visualization styling across all plotting functions

## ENHANCEMENTS
* **Seurat v5 compatibility**: Updated `.cntEval()` to detect SeuratObject version and use `layer` argument instead of deprecated `slot` argument for SeuratObject >= 5.0.0
* **Consistent theming**: Applied unified theme styling across all visualization functions (`ridgeEnrichment()`, `splitEnrichment()`, `geyserEnrichment()`, `heatmapEnrichment()`, `scatterEnrichment()`, `pcaEnrichment()`, `densityEnrichment()`, `gseaEnrichment()`, `enrichItPlot()`)
* **Improved `densityEnrichment()`**: Added plot title showing gene set name, alphanumeric sorting of group labels, and improved rug segment styling

## DOCUMENTATION
* Reformatted roxygen2 documentation across all exported functions for consistency
* Standardized use of `\code{}`, `\itemize{}`, `\enumerate{}`, `\strong{}`, and `\emph{}` tags
* Replaced Unicode characters with ASCII equivalents for better portability

# 2.6.1

Expand Down
80 changes: 45 additions & 35 deletions R/densityEnrichment.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
#' Visualize Mean Density Ranking of Genes Across Gene Sets
#'
#' This function allows to the user to examine the mean ranking
#' within the groups across the gene set. The visualization uses
#' the density function to display the relative position and distribution
#' of rank.
#'
#' This function allows the user to examine the mean ranking within groups
#' across the gene set. The visualization uses the density function to display
#' the relative position and distribution of rank.
#'
#' @param input.data A \link[SeuratObject]{Seurat} object or a
#' \link[SingleCellExperiment]{SingleCellExperiment}.
#' @param gene.set.use Character(1). Name of the gene set to display.
#' \link[SingleCellExperiment]{SingleCellExperiment}.
#' @param gene.set.use Character. Name of the gene set to display.
#' @param gene.sets A named list of character vectors, the result of
#' [getGeneSets()], or the built-in data object [escape.gene.sets].
#' @param group.by Metadata column. Defaults to the Seurat/SCE `ident`
#' slot when `NULL`.
#' @param rug.height Vertical spacing of the hit rug as a fraction of the
#' y-axis (default `0.02`).
#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}.
#' \code{\link{getGeneSets}}, or the built-in data object
#' \code{\link{escape.gene.sets}}.
#' @param group.by Character. Metadata column used for grouping. Defaults to
#' the Seurat/SCE \code{ident} slot when \code{NULL}.
#' @param rug.height Numeric. Vertical spacing of the hit rug as a fraction of
#' the y-axis. Default is \code{0.02}.
#' @param palette Character. Color palette name from
#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.
#'
#' @examples
#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"),
Expand Down Expand Up @@ -69,7 +70,7 @@ densityEnrichment <- function(input.data,
dens <- suppressWarnings(
compute.cdf(tmp, seq_len(ncol(tmp)), FALSE, FALSE)
)
ord <- apply(dens, 2, order, decreasing = TRUE) # genes × cells
ord <- apply(dens, 2, order, decreasing = TRUE) # genes x cells
scores <- vapply(seq_len(ncol(ord)),
function(j) weights[ord[, j]],
numeric(n.genes))
Expand All @@ -89,37 +90,46 @@ densityEnrichment <- function(input.data,

## -------- 4 Plots ---------------------------------------------------------
cols <- .colorizer(palette, length(groups))
plot.df <- subset(long.df, gene.set.query == "yes" & is.finite(value))


# Filter to gene set members with valid values
plot.df <- long.df[long.df$gene.set.query == "yes" & is.finite(long.df$value), ]

# Alphanumerically sort group labels for consistent ordering
plot.df$variable <- factor(plot.df$variable,
levels = .alphanumericalSort(unique(plot.df$variable)))

# Density plot panel

p1 <- ggplot(plot.df,
aes(x = value, fill = variable)) +
geom_density(alpha = 0.4, colour = "black") +
geom_density(alpha = 0.5, colour = "black", linewidth = 0.4) +
scale_fill_manual(values = cols, name = "Group") +
labs(y = "Rank density") +
theme_classic() +
labs(y = "Rank Density",
title = paste0("Gene Set: ", gene.set.use)) +
.themeEscape(grid_lines = "Y") +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())

## simple segment plot for mean-rank positions
offset <- rug.height
seg.df <- within(plot.df, {
ord <- match(variable, unique(variable))
y <- -(ord * offset - offset)
yend <- y - offset
})

axis.ticks.x = element_blank(),
plot.title = element_text(hjust = 0.5))

# Build rug segment data with proper stacking
seg.df <- plot.df
seg.df$ord <- match(seg.df$variable, levels(seg.df$variable))
seg.df$y <- -(seg.df$ord - 1) * rug.height
seg.df$yend <- seg.df$y - rug.height * 0.9

# Rug plot panel
p2 <- ggplot(seg.df, aes(x = value, xend = value,
y = y, yend = yend,
colour = variable)) +
geom_segment(linewidth = 1) +
geom_segment(linewidth = 0.8, alpha = 0.7) +
scale_colour_manual(values = cols, guide = "none") +
labs(x = "Mean rank order") +
theme_classic() +
labs(x = "Mean Rank Order") +
.themeEscape(grid_lines = "none") +
theme(axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_rect(fill = NA, colour = "black"))
patchwork::wrap_plots(p1, p2, ncol = 1, heights = c(3,1))
panel.border = element_rect(fill = NA, colour = "grey50", linewidth = 0.5))

patchwork::wrap_plots(p1, p2, ncol = 1, heights = c(3, 1))
}
64 changes: 39 additions & 25 deletions R/enrichIt.R
Original file line number Diff line number Diff line change
@@ -1,33 +1,47 @@
#' Flexible GSEA for Precomputed Gene Lists
#'
#' @description
#' A convenience front-end to **fgsea** that lets you point at the
#' `avg_log2FC` and `p_val_adj` columns coming out of Seurat / DESeq2 /
#' edgeR etc. It converts them to a signed -log10(*p*) ranking, filters on
#' A convenience front-end to \pkg{fgsea} that lets you point at the
#' \code{avg_log2FC} and \code{p_val_adj} columns coming out of Seurat / DESeq2
#' / edgeR etc. It converts them to a signed -log10(p) ranking, filters on
#' significance / effect size, and then runs fgsea.
#'
#' @param input.data Either
#' • a named numeric vector **already ranked**, *or*
#' • a data.frame/tibble with one row per gene and columns containing
#' log-fold-change and *p*-value. If the gene ID is not in `rownames(data)`,
#' supply `gene_col`.
#' @param gene.sets AA named list of character vectors, the result of
#' [getGeneSets()], or the built-in data object [escape.gene.sets].
#' @param gene_col Name of the column holding gene identifiers (ignored when
#' they are row-names). Default `NULL`.
#' @param logFC_col,pval_col Column names for logFC and *p* (or adj.*p*)
#' – defaults match Seurat’s `FindMarkers()`.
#' @param minSize,maxSize Integer. Minimum / maximum pathway size passed to
#' *fgsea* (default 5 / 500).
#' @param ranking_fun How to build the ranking: `"signed_log10_p"` (default)
#' or `"logFC"`.
#' @param pval_cutoff,logFC_cutoff Filters applied **before** ranking.
#' @param padjust_method Multiple-testing correction; any method accepted by
#' [stats::p.adjust()] (default `"BH"`).
#' @param nproc Passed to **fgsea** (`0` = multithread if OpenMP available).
#' @param input.data Either:
#' \itemize{
#' \item A named numeric vector \strong{already ranked}, or
#' \item A data.frame/tibble with one row per gene and columns containing
#' log-fold-change and p-value. If the gene ID is not in
#' \code{rownames(data)}, supply \code{gene_col}.
#' }
#' @param gene.sets A named list of character vectors, the result of
#' \code{\link{getGeneSets}}, or the built-in data object
#' \code{\link{escape.gene.sets}}.
#' @param gene_col Character or \code{NULL}. Name of the column holding gene
#' identifiers (ignored when they are row-names). Default is \code{NULL}.
#' @param logFC_col Character. Column name for log-fold-change values. Default
#' is \code{"avg_log2FC"} (matches Seurat's \code{FindMarkers()}).
#' @param pval_col Character. Column name for p-values (or adjusted p-values).
#' Default is \code{c("p_val_adj", "p_val")} (first match is used).
#' @param ranking_fun Character. How to build the ranking:
#' \itemize{
#' \item \code{"signed_log10_p"} (default): sign(logFC) * -log10(p).
#' \item \code{"logFC"}: Use log-fold-change values directly.
#' }
#' @param pval_cutoff Numeric. Filter genes with p-value above this threshold
#' \strong{before} ranking. Default is \code{1} (no filtering).
#' @param logFC_cutoff Numeric. Filter genes with absolute log-fold-change
#' below this threshold \strong{before} ranking. Default is \code{0} (no
#' filtering).
#' @param minSize Integer. Minimum pathway size passed to \pkg{fgsea}. Default
#' is \code{5}.
#' @param maxSize Integer. Maximum pathway size passed to \pkg{fgsea}. Default
#' is \code{500}.
#' @param padjust_method Character. Multiple-testing correction method; any
#' method accepted by \code{\link[stats]{p.adjust}}. Default is \code{"BH"}.
#' @param nproc Integer. Passed to \pkg{fgsea}. Use \code{0} for multithread
#' if OpenMP is available. Default is \code{0}.
#'
#'
#' @seealso [fgsea::fgsea()], [getGeneSets()], [gseaEnrichment()]
#' @seealso \code{\link[fgsea]{fgsea}}, \code{\link{getGeneSets}},
#' \code{\link{gseaEnrichment}}
#'
#' @examples
#' pbmc_small <- SeuratObject::pbmc_small
Expand Down
50 changes: 30 additions & 20 deletions R/enrichItPlot.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
#' Adaptive visualisation of enrichIt results
#' Adaptive Visualisation of enrichIt Results
#'
#' @param res `data.frame` returned by [enrichIt()].
#' @param plot.type `"bar"`, `"dot"`, or `"cnet"`.
#' @param top Integer. Keep the top *n* terms **per database**
#' (ranked by adjusted *p*). Set to `Inf` to keep all.
#' @param x.measure A column in `res` mapped to the *x*-axis
#' (ignored for `"cnet"`). Default `"-log10(padj)"`.
#' @param color.measure Column mapped to color (dot plot only).
#' Default same as `x.measure`.
#' @param show.counts Logical. Annotate bar plot with the `Count` (number of genes).
#' @param palette palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}.
#' @param ... Further arguments passed to **ggplot2** geoms (e.g.
#' `alpha`, `linewidth`).
#' Create bar, dot, or network plots from \code{\link{enrichIt}} results.
#'
#' @return A **patchwork** object (bar / dot) or **ggraph** object (cnet).
#' @param res Data frame. Output from \code{\link{enrichIt}}.
#' @param plot.type Character. Visualization type. Options:
#' \itemize{
#' \item \code{"bar"} (default): Horizontal bar plot.
#' \item \code{"dot"}: Dot plot with size and color encoding.
#' \item \code{"cnet"}: Concept network plot showing gene-pathway
#' relationships.
#' }
#' @param top Integer. Keep the top \emph{n} terms \strong{per database}
#' (ranked by adjusted p-value). Set to \code{Inf} to keep all. Default is
#' \code{20}.
#' @param x.measure Character. Column in \code{res} mapped to the x-axis
#' (ignored for \code{"cnet"}). Default is \code{"-log10(padj)"}.
#' @param color.measure Character. Column mapped to color (dot plot only).
#' Default is same as \code{x.measure}.
#' @param show.counts Logical. If \code{TRUE}, annotate bar plot with the
#' \code{Count} (number of genes). Default is \code{TRUE}.
#' @param palette Character. Color palette name from
#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.
#' @param ... Further arguments passed to \pkg{ggplot2} geoms (e.g.,
#' \code{alpha}, \code{linewidth}).
#'
#' @return A \pkg{ggplot2} object (bar/dot) or \pkg{ggraph} object (cnet).
#' @export
#'
#' @examples
Expand Down Expand Up @@ -65,12 +76,12 @@ enrichItPlot <- function(res,
}

## Bar Plot
if (plot.type == "bar") {
if (plot.type == "bar") {
p <- ggplot2::ggplot(res,
ggplot2::aes(x = .data[[x.measure]], y = .data$Term)) +
ggplot2::geom_col(fill = .colorizer(palette, n = 1)) +
ggplot2::labs(x = x.measure, y = NULL) +
ggplot2::theme_classic()
.themeEscape(grid_lines = "X")

if (isTRUE(show.counts)) {
p <- p + ggplot2::geom_text(
Expand All @@ -79,9 +90,8 @@ enrichItPlot <- function(res,
hjust = 0, size = 3)
}
p <- p + ggplot2::coord_cartesian(clip = "off")
## Dot Plot
} else if (plot.type == "dot") {

## Dot Plot
} else if (plot.type == "dot") {
p <- ggplot2::ggplot(res,
ggplot2::aes(x = .data$geneRatio, y = .data$Term,
color = .data[[color.measure]],
Expand All @@ -90,7 +100,7 @@ enrichItPlot <- function(res,
ggplot2::scale_size_continuous(name = "Core Count") +
ggplot2::labs(x = "geneRatio", y = NULL,
color = color.measure) +
ggplot2::theme_classic() +
.themeEscape(grid_lines = "X") +
ggplot2::theme(legend.box = "vertical")

if (!is.null(palette))
Expand Down
37 changes: 20 additions & 17 deletions R/getGeneSets.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
#' Get a collection of gene sets from the msigdb
#' Get a Collection of Gene Sets from MSigDB
#'
#' This function retrieves gene sets from msigdb and caches the downloaded object
#' for future calls. It allows subsetting by main collection (library),
#' subcollection, or specific gene sets, and only supports human
#' ("Homo sapiens") and mouse ("Mus musculus").
#' This function retrieves gene sets from MSigDB and caches the downloaded
#' object for future calls. It allows subsetting by main collection (library),
#' subcollection, or specific gene sets, and only supports human
#' (\code{"Homo sapiens"}) and mouse (\code{"Mus musculus"}).
#'
#' @param species `"Homo sapiens"` (default) or `"Mus musculus"`.
#' @param library Character. Optional vector of main collection codes
#' (e.g. `"H"`, `"C5"`).
#' @param subcategory Character. Optional vector of sub-collection codes
#' (e.g. `"GO:BP"`).
#' @param gene.sets Character. Optional vector of specific gene-set names.
#' @param version MSigDB version (character, default `"7.4"`).
#' @param id Identifier type (default `"SYM"` for symbols).
#' @param species Character. Species name. Either \code{"Homo sapiens"}
#' (default) or \code{"Mus musculus"}.
#' @param library Character or \code{NULL}. Vector of main collection codes
#' (e.g., \code{"H"}, \code{"C5"}). Default is \code{NULL} (all collections).
#' @param subcategory Character or \code{NULL}. Vector of sub-collection codes
#' (e.g., \code{"GO:BP"}). Default is \code{NULL} (all subcategories).
#' @param gene.sets Character or \code{NULL}. Vector of specific gene-set
#' names. Default is \code{NULL} (all gene sets).
#' @param version Character. MSigDB version. Default is \code{"7.4"}.
#' @param id Character. Identifier type. Default is \code{"SYM"} (gene
#' symbols).
#'
#' @examples
#' \dontrun{
#' # Get all hallmark gene sets from human.
#' gs <- getGeneSets(species = "Homo sapiens",
#' gs <- getGeneSets(species = "Homo sapiens",
#' library = "H")
#'
#' # Get a subset based on main collection and subcollection.
#' gs <- getGeneSets(species = "Homo sapiens",
#' library = c("C2", "C5"),
#' gs <- getGeneSets(species = "Homo sapiens",
#' library = c("C2", "C5"),
#' subcategory = "GO:BP")
#' }
#'
#' @return A named `list` of character vectors (gene IDs).
#' @return A named list of character vectors (gene IDs).
#' @export
getGeneSets <- function(species = c("Homo sapiens", "Mus musculus"),
library = NULL,
Expand Down
Loading