diff --git a/DESCRIPTION b/DESCRIPTION index 1c89d85..b7b666d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: escape Title: Easy single cell analysis platform for enrichment -Version: 2.6.1 +Version: 2.6.2 Authors@R: c( person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"), person(given = "Jared", family = "Andrews", role = c("aut"), email = "jared.andrews07@gmail.com"), @@ -11,7 +11,7 @@ Description: A bridging R package to facilitate gene set enrichment analysis (GS License: MIT + file LICENSE Encoding: UTF-8 LazyData: false -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 biocViews: Software, SingleCell, Classification, Annotation, GeneSetEnrichment, Sequencing, GeneSignaling, Pathways Depends: R (>= 4.1) Imports: diff --git a/NAMESPACE b/NAMESPACE index 51d6485..83c6d48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ importFrom(MatrixGenerics,rowSds) importFrom(MatrixGenerics,rowSums2) importFrom(SummarizedExperiment,colData) importFrom(ggdist,stat_pointinterval) +importFrom(ggplot2,"%+replace%") importFrom(grDevices,hcl.pals) importFrom(stats,aggregate) importFrom(stats,as.formula) diff --git a/NEWS.md b/NEWS.md index 75985e3..1d2dd25 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,17 @@ +# 2.6.2 + +## NEW FEATURES +* Added `.themeEscape()` internal theme function for consistent visualization styling across all plotting functions + +## ENHANCEMENTS +* **Seurat v5 compatibility**: Updated `.cntEval()` to detect SeuratObject version and use `layer` argument instead of deprecated `slot` argument for SeuratObject >= 5.0.0 +* **Consistent theming**: Applied unified theme styling across all visualization functions (`ridgeEnrichment()`, `splitEnrichment()`, `geyserEnrichment()`, `heatmapEnrichment()`, `scatterEnrichment()`, `pcaEnrichment()`, `densityEnrichment()`, `gseaEnrichment()`, `enrichItPlot()`) +* **Improved `densityEnrichment()`**: Added plot title showing gene set name, alphanumeric sorting of group labels, and improved rug segment styling + +## DOCUMENTATION +* Reformatted roxygen2 documentation across all exported functions for consistency +* Standardized use of `\code{}`, `\itemize{}`, `\enumerate{}`, `\strong{}`, and `\emph{}` tags +* Replaced Unicode characters with ASCII equivalents for better portability # 2.6.1 diff --git a/R/densityEnrichment.R b/R/densityEnrichment.R index 76ebf8c..2f4d64c 100644 --- a/R/densityEnrichment.R +++ b/R/densityEnrichment.R @@ -1,20 +1,21 @@ #' Visualize Mean Density Ranking of Genes Across Gene Sets -#' -#' This function allows to the user to examine the mean ranking -#' within the groups across the gene set. The visualization uses -#' the density function to display the relative position and distribution -#' of rank. +#' +#' This function allows the user to examine the mean ranking within groups +#' across the gene set. The visualization uses the density function to display +#' the relative position and distribution of rank. #' #' @param input.data A \link[SeuratObject]{Seurat} object or a -#' \link[SingleCellExperiment]{SingleCellExperiment}. -#' @param gene.set.use Character(1). Name of the gene set to display. +#' \link[SingleCellExperiment]{SingleCellExperiment}. +#' @param gene.set.use Character. Name of the gene set to display. #' @param gene.sets A named list of character vectors, the result of -#' [getGeneSets()], or the built-in data object [escape.gene.sets]. -#' @param group.by Metadata column. Defaults to the Seurat/SCE `ident` -#' slot when `NULL`. -#' @param rug.height Vertical spacing of the hit rug as a fraction of the -#' y-axis (default `0.02`). -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' \code{\link{getGeneSets}}, or the built-in data object +#' \code{\link{escape.gene.sets}}. +#' @param group.by Character. Metadata column used for grouping. Defaults to +#' the Seurat/SCE \code{ident} slot when \code{NULL}. +#' @param rug.height Numeric. Vertical spacing of the hit rug as a fraction of +#' the y-axis. Default is \code{0.02}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. #' #' @examples #' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), @@ -69,7 +70,7 @@ densityEnrichment <- function(input.data, dens <- suppressWarnings( compute.cdf(tmp, seq_len(ncol(tmp)), FALSE, FALSE) ) - ord <- apply(dens, 2, order, decreasing = TRUE) # genes × cells + ord <- apply(dens, 2, order, decreasing = TRUE) # genes x cells scores <- vapply(seq_len(ncol(ord)), function(j) weights[ord[, j]], numeric(n.genes)) @@ -89,37 +90,46 @@ densityEnrichment <- function(input.data, ## -------- 4 Plots --------------------------------------------------------- cols <- .colorizer(palette, length(groups)) - plot.df <- subset(long.df, gene.set.query == "yes" & is.finite(value)) - + + # Filter to gene set members with valid values + plot.df <- long.df[long.df$gene.set.query == "yes" & is.finite(long.df$value), ] + + # Alphanumerically sort group labels for consistent ordering + plot.df$variable <- factor(plot.df$variable, + levels = .alphanumericalSort(unique(plot.df$variable))) + + # Density plot panel + p1 <- ggplot(plot.df, aes(x = value, fill = variable)) + - geom_density(alpha = 0.4, colour = "black") + + geom_density(alpha = 0.5, colour = "black", linewidth = 0.4) + scale_fill_manual(values = cols, name = "Group") + - labs(y = "Rank density") + - theme_classic() + + labs(y = "Rank Density", + title = paste0("Gene Set: ", gene.set.use)) + + .themeEscape(grid_lines = "Y") + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), - axis.ticks.x = element_blank()) - - ## simple segment plot for mean-rank positions - offset <- rug.height - seg.df <- within(plot.df, { - ord <- match(variable, unique(variable)) - y <- -(ord * offset - offset) - yend <- y - offset - }) - + axis.ticks.x = element_blank(), + plot.title = element_text(hjust = 0.5)) + + # Build rug segment data with proper stacking + seg.df <- plot.df + seg.df$ord <- match(seg.df$variable, levels(seg.df$variable)) + seg.df$y <- -(seg.df$ord - 1) * rug.height + seg.df$yend <- seg.df$y - rug.height * 0.9 + + # Rug plot panel p2 <- ggplot(seg.df, aes(x = value, xend = value, y = y, yend = yend, colour = variable)) + - geom_segment(linewidth = 1) + + geom_segment(linewidth = 0.8, alpha = 0.7) + scale_colour_manual(values = cols, guide = "none") + - labs(x = "Mean rank order") + - theme_classic() + + labs(x = "Mean Rank Order") + + .themeEscape(grid_lines = "none") + theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), - panel.border = element_rect(fill = NA, colour = "black")) - - patchwork::wrap_plots(p1, p2, ncol = 1, heights = c(3,1)) + panel.border = element_rect(fill = NA, colour = "grey50", linewidth = 0.5)) + + patchwork::wrap_plots(p1, p2, ncol = 1, heights = c(3, 1)) } \ No newline at end of file diff --git a/R/enrichIt.R b/R/enrichIt.R index 71ecf58..23cd537 100644 --- a/R/enrichIt.R +++ b/R/enrichIt.R @@ -1,33 +1,47 @@ #' Flexible GSEA for Precomputed Gene Lists #' -#' @description -#' A convenience front-end to **fgsea** that lets you point at the -#' `avg_log2FC` and `p_val_adj` columns coming out of Seurat / DESeq2 / -#' edgeR etc. It converts them to a signed -log10(*p*) ranking, filters on +#' A convenience front-end to \pkg{fgsea} that lets you point at the +#' \code{avg_log2FC} and \code{p_val_adj} columns coming out of Seurat / DESeq2 +#' / edgeR etc. It converts them to a signed -log10(p) ranking, filters on #' significance / effect size, and then runs fgsea. #' -#' @param input.data Either -#' • a named numeric vector **already ranked**, *or* -#' • a data.frame/tibble with one row per gene and columns containing -#' log-fold-change and *p*-value. If the gene ID is not in `rownames(data)`, -#' supply `gene_col`. -#' @param gene.sets AA named list of character vectors, the result of -#' [getGeneSets()], or the built-in data object [escape.gene.sets]. -#' @param gene_col Name of the column holding gene identifiers (ignored when -#' they are row-names). Default `NULL`. -#' @param logFC_col,pval_col Column names for logFC and *p* (or adj.*p*) -#' – defaults match Seurat’s `FindMarkers()`. -#' @param minSize,maxSize Integer. Minimum / maximum pathway size passed to -#' *fgsea* (default 5 / 500). -#' @param ranking_fun How to build the ranking: `"signed_log10_p"` (default) -#' or `"logFC"`. -#' @param pval_cutoff,logFC_cutoff Filters applied **before** ranking. -#' @param padjust_method Multiple-testing correction; any method accepted by -#' [stats::p.adjust()] (default `"BH"`). -#' @param nproc Passed to **fgsea** (`0` = multithread if OpenMP available). +#' @param input.data Either: +#' \itemize{ +#' \item A named numeric vector \strong{already ranked}, or +#' \item A data.frame/tibble with one row per gene and columns containing +#' log-fold-change and p-value. If the gene ID is not in +#' \code{rownames(data)}, supply \code{gene_col}. +#' } +#' @param gene.sets A named list of character vectors, the result of +#' \code{\link{getGeneSets}}, or the built-in data object +#' \code{\link{escape.gene.sets}}. +#' @param gene_col Character or \code{NULL}. Name of the column holding gene +#' identifiers (ignored when they are row-names). Default is \code{NULL}. +#' @param logFC_col Character. Column name for log-fold-change values. Default +#' is \code{"avg_log2FC"} (matches Seurat's \code{FindMarkers()}). +#' @param pval_col Character. Column name for p-values (or adjusted p-values). +#' Default is \code{c("p_val_adj", "p_val")} (first match is used). +#' @param ranking_fun Character. How to build the ranking: +#' \itemize{ +#' \item \code{"signed_log10_p"} (default): sign(logFC) * -log10(p). +#' \item \code{"logFC"}: Use log-fold-change values directly. +#' } +#' @param pval_cutoff Numeric. Filter genes with p-value above this threshold +#' \strong{before} ranking. Default is \code{1} (no filtering). +#' @param logFC_cutoff Numeric. Filter genes with absolute log-fold-change +#' below this threshold \strong{before} ranking. Default is \code{0} (no +#' filtering). +#' @param minSize Integer. Minimum pathway size passed to \pkg{fgsea}. Default +#' is \code{5}. +#' @param maxSize Integer. Maximum pathway size passed to \pkg{fgsea}. Default +#' is \code{500}. +#' @param padjust_method Character. Multiple-testing correction method; any +#' method accepted by \code{\link[stats]{p.adjust}}. Default is \code{"BH"}. +#' @param nproc Integer. Passed to \pkg{fgsea}. Use \code{0} for multithread +#' if OpenMP is available. Default is \code{0}. #' -#' -#' @seealso [fgsea::fgsea()], [getGeneSets()], [gseaEnrichment()] +#' @seealso \code{\link[fgsea]{fgsea}}, \code{\link{getGeneSets}}, +#' \code{\link{gseaEnrichment}} #' #' @examples #' pbmc_small <- SeuratObject::pbmc_small diff --git a/R/enrichItPlot.R b/R/enrichItPlot.R index 6ef50df..2639141 100644 --- a/R/enrichItPlot.R +++ b/R/enrichItPlot.R @@ -1,19 +1,30 @@ -#' Adaptive visualisation of enrichIt results +#' Adaptive Visualisation of enrichIt Results #' -#' @param res `data.frame` returned by [enrichIt()]. -#' @param plot.type `"bar"`, `"dot"`, or `"cnet"`. -#' @param top Integer. Keep the top *n* terms **per database** -#' (ranked by adjusted *p*). Set to `Inf` to keep all. -#' @param x.measure A column in `res` mapped to the *x*-axis -#' (ignored for `"cnet"`). Default `"-log10(padj)"`. -#' @param color.measure Column mapped to color (dot plot only). -#' Default same as `x.measure`. -#' @param show.counts Logical. Annotate bar plot with the `Count` (number of genes). -#' @param palette palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. -#' @param ... Further arguments passed to **ggplot2** geoms (e.g. -#' `alpha`, `linewidth`). +#' Create bar, dot, or network plots from \code{\link{enrichIt}} results. #' -#' @return A **patchwork** object (bar / dot) or **ggraph** object (cnet). +#' @param res Data frame. Output from \code{\link{enrichIt}}. +#' @param plot.type Character. Visualization type. Options: +#' \itemize{ +#' \item \code{"bar"} (default): Horizontal bar plot. +#' \item \code{"dot"}: Dot plot with size and color encoding. +#' \item \code{"cnet"}: Concept network plot showing gene-pathway +#' relationships. +#' } +#' @param top Integer. Keep the top \emph{n} terms \strong{per database} +#' (ranked by adjusted p-value). Set to \code{Inf} to keep all. Default is +#' \code{20}. +#' @param x.measure Character. Column in \code{res} mapped to the x-axis +#' (ignored for \code{"cnet"}). Default is \code{"-log10(padj)"}. +#' @param color.measure Character. Column mapped to color (dot plot only). +#' Default is same as \code{x.measure}. +#' @param show.counts Logical. If \code{TRUE}, annotate bar plot with the +#' \code{Count} (number of genes). Default is \code{TRUE}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. +#' @param ... Further arguments passed to \pkg{ggplot2} geoms (e.g., +#' \code{alpha}, \code{linewidth}). +#' +#' @return A \pkg{ggplot2} object (bar/dot) or \pkg{ggraph} object (cnet). #' @export #' #' @examples @@ -65,12 +76,12 @@ enrichItPlot <- function(res, } ## Bar Plot - if (plot.type == "bar") { + if (plot.type == "bar") { p <- ggplot2::ggplot(res, ggplot2::aes(x = .data[[x.measure]], y = .data$Term)) + ggplot2::geom_col(fill = .colorizer(palette, n = 1)) + ggplot2::labs(x = x.measure, y = NULL) + - ggplot2::theme_classic() + .themeEscape(grid_lines = "X") if (isTRUE(show.counts)) { p <- p + ggplot2::geom_text( @@ -79,9 +90,8 @@ enrichItPlot <- function(res, hjust = 0, size = 3) } p <- p + ggplot2::coord_cartesian(clip = "off") - ## Dot Plot - } else if (plot.type == "dot") { - + ## Dot Plot + } else if (plot.type == "dot") { p <- ggplot2::ggplot(res, ggplot2::aes(x = .data$geneRatio, y = .data$Term, color = .data[[color.measure]], @@ -90,7 +100,7 @@ enrichItPlot <- function(res, ggplot2::scale_size_continuous(name = "Core Count") + ggplot2::labs(x = "geneRatio", y = NULL, color = color.measure) + - ggplot2::theme_classic() + + .themeEscape(grid_lines = "X") + ggplot2::theme(legend.box = "vertical") if (!is.null(palette)) diff --git a/R/getGeneSets.R b/R/getGeneSets.R index 2f05b04..9f14951 100644 --- a/R/getGeneSets.R +++ b/R/getGeneSets.R @@ -1,32 +1,35 @@ -#' Get a collection of gene sets from the msigdb +#' Get a Collection of Gene Sets from MSigDB #' -#' This function retrieves gene sets from msigdb and caches the downloaded object -#' for future calls. It allows subsetting by main collection (library), -#' subcollection, or specific gene sets, and only supports human -#' ("Homo sapiens") and mouse ("Mus musculus"). +#' This function retrieves gene sets from MSigDB and caches the downloaded +#' object for future calls. It allows subsetting by main collection (library), +#' subcollection, or specific gene sets, and only supports human +#' (\code{"Homo sapiens"}) and mouse (\code{"Mus musculus"}). #' -#' @param species `"Homo sapiens"` (default) or `"Mus musculus"`. -#' @param library Character. Optional vector of main collection codes -#' (e.g. `"H"`, `"C5"`). -#' @param subcategory Character. Optional vector of sub-collection codes -#' (e.g. `"GO:BP"`). -#' @param gene.sets Character. Optional vector of specific gene-set names. -#' @param version MSigDB version (character, default `"7.4"`). -#' @param id Identifier type (default `"SYM"` for symbols). +#' @param species Character. Species name. Either \code{"Homo sapiens"} +#' (default) or \code{"Mus musculus"}. +#' @param library Character or \code{NULL}. Vector of main collection codes +#' (e.g., \code{"H"}, \code{"C5"}). Default is \code{NULL} (all collections). +#' @param subcategory Character or \code{NULL}. Vector of sub-collection codes +#' (e.g., \code{"GO:BP"}). Default is \code{NULL} (all subcategories). +#' @param gene.sets Character or \code{NULL}. Vector of specific gene-set +#' names. Default is \code{NULL} (all gene sets). +#' @param version Character. MSigDB version. Default is \code{"7.4"}. +#' @param id Character. Identifier type. Default is \code{"SYM"} (gene +#' symbols). #' #' @examples #' \dontrun{ #' # Get all hallmark gene sets from human. -#' gs <- getGeneSets(species = "Homo sapiens", +#' gs <- getGeneSets(species = "Homo sapiens", #' library = "H") #' #' # Get a subset based on main collection and subcollection. -#' gs <- getGeneSets(species = "Homo sapiens", -#' library = c("C2", "C5"), +#' gs <- getGeneSets(species = "Homo sapiens", +#' library = c("C2", "C5"), #' subcategory = "GO:BP") #' } #' -#' @return A named `list` of character vectors (gene IDs). +#' @return A named list of character vectors (gene IDs). #' @export getGeneSets <- function(species = c("Homo sapiens", "Mus musculus"), library = NULL, diff --git a/R/geyserEnrichment.R b/R/geyserEnrichment.R index 31de66a..c9250e9 100644 --- a/R/geyserEnrichment.R +++ b/R/geyserEnrichment.R @@ -1,45 +1,53 @@ #' Visualize Enrichment Distributions Using Geyser Plots -#' -#' This function allows to the user to examine the distribution of +#' +#' This function allows the user to examine the distribution of #' enrichment across groups by generating a geyser plot. #' -#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell -#' object previously processed by \code{\link{runEscape}}. -#' @param assay Name of the assay holding enrichment scores when -#' `input.data` is a single‑cell object. Ignored otherwise. -#' @param group.by Metadata column plotted on the *x*‑axis. Defaults to the -#' Seurat/SCE `ident` slot when `NULL`. -#' @param gene.set Character(1). Gene‑set to plot (must exist in the -#' enrichment matrix). -#' @param color.by Aesthetic mapped to point color. Use either -#' *"group"* (default = `group.by`) for categorical coloring or the -#' *name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric -# gradient. Any other metadata or column present in the data is also -#' accepted. -#' @param order.by How to arrange the x‑axis: -#' *`"mean"`* – groups ordered by decreasing group mean; -#' *`"group"`* – natural sort of group labels; -#' *`NULL`* – keep original ordering. -#' @param facet.by Optional metadata column used to facet the plot. -#' @param summarise.by Optional metadata column used to summarise data. -#' @param summary.stat Optional method used to summarize expression within each -#' group defined by \code{summarise.by}. One of: \code{"mean"} (default), -#' \code{"median"}, \code{"max"}, \code{"sum"}, or \code{"geometric"}. -#' @param scale Logical; if `TRUE` scores are centered/scaled (Z‑score) prior -#' to plotting. -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' @param input.data Output of \code{\link{escape.matrix}} or a single-cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Character. Name of the assay holding enrichment scores when +#' \code{input.data} is a single-cell object. Ignored otherwise. +#' @param group.by Character. Metadata column plotted on the x-axis. Defaults +#' to the Seurat/SCE \code{ident} slot when \code{NULL}. +#' @param gene.set.use Character. Name of the gene set to display. +#' @param color.by Character. Aesthetic mapped to point color. Options: +#' \itemize{ +#' \item \code{"group"} (default): Uses \code{group.by} for categorical +#' coloring. +#' \item \emph{gene-set name}: Use the same value as \code{gene.set.use} +#' to obtain a numeric gradient. +#' \item Any other metadata column present in the data. +#' } +#' @param order.by Character or \code{NULL}. How to arrange the x-axis: +#' \itemize{ +#' \item \code{"mean"}: Groups ordered by decreasing group mean. +#' \item \code{"group"}: Natural (alphanumeric) sort of group labels. +#' \item \code{NULL} (default): Keep original ordering. +#' } +#' @param facet.by Character or \code{NULL}. Metadata column used to facet +#' the plot. +#' @param summarise.by Character or \code{NULL}. Metadata column used to +#' summarise data before plotting. +#' @param summary.stat Character. Method used to summarize expression within +#' each group defined by \code{summarise.by}. One of: \code{"mean"} +#' (default), \code{"median"}, \code{"max"}, \code{"sum"}, or +#' \code{"geometric"}. +#' @param scale Logical. If \code{TRUE}, scores are centered and scaled +#' (Z-score) prior to plotting. Default is \code{FALSE}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. #' #' @examples #' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' +#' #' pbmc <- SeuratObject::pbmc_small |> #' runEscape(gene.sets = gs, #' min.size = NULL) #' -#' geyserEnrichment(pbmc, -#' assay = "escape", -#' gene.set = "Tcells") +#' geyserEnrichment(pbmc, +#' assay = "escape", +#' gene.set.use = "Tcells") #' #' @import ggplot2 #' @importFrom ggdist stat_pointinterval @@ -47,114 +55,113 @@ #' @return A \pkg{ggplot2} object. #' @export geyserEnrichment <- function(input.data, - assay = NULL, - group.by = NULL, - gene.set, - color.by = "group", - order.by = NULL, - scale = FALSE, - facet.by = NULL, + assay = NULL, + group.by = NULL, + gene.set.use, + color.by = "group", + order.by = NULL, + scale = FALSE, + facet.by = NULL, summarise.by = NULL, - summary.stat = "mean", - palette = "inferno") { + summary.stat = "mean", + palette = "inferno") { ## ---- 0) Sanity checks ----------------------------------------------------- - if (missing(gene.set) || length(gene.set) != 1L) - stop("Please supply exactly one 'gene.set' to plot.") - + if (missing(gene.set.use) || length(gene.set.use) != 1L) + stop("Please supply exactly one 'gene.set.use' to plot.") + if (is.null(group.by)) group.by <- "ident" - + if (identical(color.by, "group")) color.by <- group.by - - if (!is.null(summarise.by) && (identical(summarise.by, group.by) || + + if (!is.null(summarise.by) && (identical(summarise.by, group.by) || identical(summarise.by, facet.by))) - stop("'summarise.by' cannot be the same as 'group.by' or 'facet.by'. + stop("'summarise.by' cannot be the same as 'group.by' or 'facet.by'. Please choose a different metadata column.") - + # ---- 1) helper to match summary function ------------------------- summary_fun <- .match_summary_fun(summary.stat) - + ## ---- 2) Build tidy data.frame ------------------------------------------- - enriched <- .prepData(input.data, assay, gene.set, group.by, + enriched <- .prepData(input.data, assay, gene.set.use, group.by, split.by = summarise.by, facet.by = facet.by, color.by = color.by) - + # Define all grouping variables that must be metadata columns grouping_vars <- unique(c(summarise.by, group.by, facet.by)) - + # Determine if color.by is a feature all_features <- rownames(.cntEval(input.data, assay = assay, type = "data")) - + # Determine if color.by is a feature is_feature_color <- !is.null(color.by) && (color.by %in% all_features) - - ## Optionally summarise data with **base aggregate()** ---------------------- + + ## Optionally summarize data with **base aggregate()** ---------------------- if (!is.null(summarise.by)) { - - # add color.by to summarise_vars if it is a feautre, otherwise add to grouping_vars - summarise_vars <- unique(c(gene.set, if (is_feature_color) color.by)) + + # add color.by to summarise_vars if it is a feature, otherwise add to grouping_vars + summarise_vars <- unique(c(gene.set.use, if (is_feature_color) color.by)) grouping_vars <- unique(c(grouping_vars, if (!is_feature_color) color.by)) - + # Perform aggregation enriched <- aggregate(enriched[summarise_vars], by = enriched[grouping_vars], FUN = summary_fun, simplify = TRUE) } - - ## Optionally Z‑transform ---------------------------------------------------- + + ## Optionally Z-transform ---------------------------------------------------- if (scale) { - enriched[[gene.set]] <- scale(as.numeric(enriched[[gene.set]])) - - # Also scale color.by if it's a feature + enriched[[gene.set.use]] <- scale(as.numeric(enriched[[gene.set.use]])) + + # Also scale color.by if it's a feature if (is_feature_color) { enriched[[color.by]] <- scale(enriched[[color.by]]) } } - + ## Optionally reorder groups ------------------------------------------------- if (!is.null(order.by)) enriched <- .orderFunction(enriched, order.by, group.by) - + ## ---- 3) Plot -------------------------------------------------------------- if (!is.null(color.by)) plt <- ggplot(enriched, aes(x = .data[[group.by]], - y = .data[[gene.set]], + y = .data[[gene.set.use]], group = .data[[group.by]], colour = .data[[color.by]])) else plt <- ggplot(enriched, aes(x = .data[[group.by]], - y = .data[[gene.set]]), + y = .data[[gene.set.use]]), group = .data[[group.by]]) # Raw points -------------------------------------------------------------- plt <- plt + geom_jitter(width = 0.25, size = 1.5, alpha = 0.6, na.rm = TRUE) + - + # White base interval + median point ------------------------------------- stat_pointinterval(interval_size_range = c(2, 3), fatten_point = 1.4, interval_colour = "white", point_colour = "white", position = position_dodge(width = 0.6), show.legend = FALSE) + - + # Black outline for clarity ---------------------------------------------- stat_pointinterval(interval_size_range = c(1, 2), fatten_point = 1.4, interval_colour = "black", point_colour = "black", position = position_dodge(width = 0.6), show.legend = FALSE) + - + labs(x = group.by, - y = paste0(gene.set, "\nEnrichment Score"), + y = paste0(gene.set.use, "\nEnrichment Score"), colour = color.by) + - theme_classic() + - theme(legend.direction = "horizontal", - legend.position = "bottom") - + .themeEscape(grid_lines = "Y", legend_position = "bottom") + + ggplot2::theme(legend.direction = "horizontal") + ## ---- 4) Colour scale ------------------------------------------------------ - if (!is.null(color.by)) + if (!is.null(color.by)) plt <- .colorby(enriched, plt, color.by, palette, type = "color") - + ## ---- 5) Facetting --------------------------------------------------------- if (!is.null(facet.by)) plt <- plt + facet_grid(as.formula(paste(".~", facet.by))) - + plt } diff --git a/R/gseaEnrichment.R b/R/gseaEnrichment.R index 64928ab..b926a68 100644 --- a/R/gseaEnrichment.R +++ b/R/gseaEnrichment.R @@ -1,37 +1,43 @@ #' Classical GSEA-style Running-Enrichment Plot #' -#' Produces the familiar two-panel GSEA graphic—running enrichment score -#' (RES) plus a “hit” rug—for a **single gene-set** evaluated across -#' multiple biological groups (clusters, conditions, samples, ...). +#' Produces the familiar two-panel GSEA graphic: running enrichment score (RES) +#' plus a "hit" rug for a \strong{single gene-set} evaluated across multiple +#' biological groups (clusters, conditions, samples, etc.). #' -#' **Algorithm (Subramanian _et al._, PNAS 2005)** -#' 1. Within every group, library-size-normalise counts to CPM. -#' 2. Collapse gene expression with `summary.fun` (mean/median/…). -#' 3. Rank genes (descending) to obtain one ordered list per group. -#' 4. Compute the weighted Kolmogorov–Smirnov running score -#' (weight = \|stat\|^*p*). -#' 5. ES = maximum signed deviation of the curve. +#' @section Algorithm: +#' Based on Subramanian \emph{et al.}, PNAS 2005: +#' \enumerate{ +#' \item Within every group, library-size-normalize counts to CPM. +#' \item Collapse gene expression with \code{summary.fun} (mean/median/etc.). +#' \item Rank genes (descending) to obtain one ordered list per group. +#' \item Compute the weighted Kolmogorov-Smirnov running score +#' (weight = |stat|^p). +#' \item ES = maximum signed deviation of the curve. +#' } #' -#' @param input.data A \link[SeuratObject]{Seurat} object or a -#' \link[SingleCellExperiment]{SingleCellExperiment}. -#' @param gene.set.use Character(1). Name of the gene set to display. +#' @param input.data A \link[SeuratObject]{Seurat} object or a +#' \link[SingleCellExperiment]{SingleCellExperiment}. +#' @param gene.set.use Character. Name of the gene set to display. #' @param gene.sets A named list of character vectors, the result of -#' [getGeneSets()], or the built-in data object [escape.gene.sets]. -#' @param group.by Metadata column. Defaults to the Seurat/SCE `ident` -#' slot when `NULL`. -#' @param summary.fun Method used to collapse expression within each -#' group **before** ranking: one of `"mean"` (default), `"median"`, `"max"`, -#'`"sum"`, or `"geometric"`. -#' @param p Weighting exponent in the KS statistic (classical GSEA uses `p = 1`). -#' @param nperm Integer >= 0. Gene-label permutations per group (default 1000). -#' `0` value will skip NES/*p* calculation. -#' @param rug.height Vertical spacing of the hit rug as a fraction of the -#' y-axis (default `0.02`). -#' @param digits Number of decimal places displayed for ES in the -#' legend (default `2`). +#' \code{\link{getGeneSets}}, or the built-in data object +#' \code{\link{escape.gene.sets}}. +#' @param group.by Character. Metadata column used for grouping. Defaults to +#' the Seurat/SCE \code{ident} slot when \code{NULL}. +#' @param summary.fun Character. Method used to collapse expression within +#' each group \strong{before} ranking. One of: \code{"mean"} (default), +#' \code{"median"}, \code{"max"}, \code{"sum"}, or \code{"geometric"}. +#' @param p Numeric. Weighting exponent in the KS statistic. Classical GSEA +#' uses \code{p = 1}. Default is \code{1}. +#' @param nperm Integer. Number of gene-label permutations per group. +#' Default is \code{1000}. Set to \code{0} to skip NES/p-value calculation. +#' @param rug.height Numeric. Vertical spacing of the hit rug as a fraction +#' of the y-axis. Default is \code{0.02}. +#' @param digits Integer. Number of decimal places displayed for ES in the +#' legend. Default is \code{2}. #' @param BPPARAM A \pkg{BiocParallel} parameter object describing the -#' parallel backend. -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' parallel backend. Default is \code{NULL} (serial execution). +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. #' #' @examples #' pbmc_small <- SeuratObject::pbmc_small @@ -176,24 +182,24 @@ gseaEnrichment <- function(input.data, p_top <- ggplot2::ggplot(running.df, ggplot2::aes(rank, ES, colour = grp)) + ggplot2::geom_step(linewidth = 0.8) + - ggplot2::geom_hline(yintercept = 0) + + ggplot2::geom_hline(yintercept = 0) + ggplot2::scale_colour_manual(values = cols, name = NULL) + ggplot2::labs(y = paste0(gene.set.use, "\nRunning Enrichment Score")) + - ggplot2::theme_classic() + - ggplot2::theme(axis.title.x = element_blank(), - axis.text.x = element_blank(), - axis.ticks.x = element_blank()) + .themeEscape(grid_lines = "Y") + + ggplot2::theme(axis.title.x = ggplot2::element_blank(), + axis.text.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank()) p_mid <- ggplot2::ggplot(rug.df) + ggplot2::geom_segment(ggplot2::aes(x, y, xend = xend, yend = yend, colour = grp)) + ggplot2::scale_colour_manual(values = cols, guide = "none") + - theme_classic() + - ggplot2::ylim(-length(groups)*rug.height, 0) + - theme(axis.title = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank(), - panel.border = element_rect(fill = NA, colour = "black", linewidth = 0.5)) + .themeEscape(grid_lines = "none") + + ggplot2::ylim(-length(groups)*rug.height, 0) + + ggplot2::theme(axis.title = ggplot2::element_blank(), + axis.text.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank(), + panel.border = ggplot2::element_rect(fill = NA, colour = "black", linewidth = 0.5)) patchwork::wrap_plots(p_top, p_mid, ncol = 1, heights = c(3, 0.4)) } diff --git a/R/heatmapEnrichment.R b/R/heatmapEnrichment.R index cc42e24..078ca22 100644 --- a/R/heatmapEnrichment.R +++ b/R/heatmapEnrichment.R @@ -1,26 +1,31 @@ #' Visualize Enrichment Value Summaries Using Heatmaps -#' -#' This function allows to the user to examine the heatmap with the mean -#' enrichment values by group. The heatmap will have the gene sets as rows -#' and columns will be the grouping variable. #' -#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell -#' object previously processed by \code{\link{runEscape}}. -#' @param assay Name of the assay holding enrichment scores when -#' `input.data` is a single‑cell object. Ignored otherwise. -#' @param group.by Metadata column plotted on the *x*‑axis. Defaults to the -#' Seurat/SCE `ident` slot when `NULL`. -#' @param gene.set.use Vector of gene‑set names to plot, or \code{"all"} -#' (default) to show every available gene set. -#' @param cluster.rows,cluster.columns Logical; if \code{TRUE}, rows/columns -#' are ordered by Ward‑linkage hierarchical clustering (Euclidean distance). -#' @param facet.by Optional metadata column used to facet the plot. -#' @param scale If \code{TRUE}, Z‑transforms each gene‑set column **after** -#' summarization. -#' @param summary.stat Optional method used to summarize expression within each -#' group. One of: \code{"mean"} (default), \code{"median"}, \code{"max"}, -#' \code{"sum"}, or \code{"geometric"}. -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' This function allows the user to examine a heatmap with the mean enrichment +#' values by group. The heatmap displays gene sets as rows and the grouping +#' variable as columns. +#' +#' @param input.data Output of \code{\link{escape.matrix}} or a single-cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Character. Name of the assay holding enrichment scores when +#' \code{input.data} is a single-cell object. Ignored otherwise. +#' @param group.by Character. Metadata column plotted on the x-axis. Defaults +#' to the Seurat/SCE \code{ident} slot when \code{NULL}. +#' @param gene.set.use Character vector or \code{"all"}. Gene-set names to +#' plot. Use \code{"all"} (default) to show every available gene set. +#' @param cluster.rows Logical. If \code{TRUE}, rows are ordered by Ward-linkage +#' hierarchical clustering (Euclidean distance). Default is \code{FALSE}. +#' @param cluster.columns Logical. If \code{TRUE}, columns are ordered by +#' Ward-linkage hierarchical clustering (Euclidean distance). Default is +#' \code{FALSE}. +#' @param facet.by Character or \code{NULL}. Metadata column used to facet +#' the plot. +#' @param scale Logical. If \code{TRUE}, Z-transforms each gene-set column +#' \strong{after} summarization. Default is \code{FALSE}. +#' @param summary.stat Character. Method used to summarize expression within +#' each group. One of: \code{"mean"} (default), \code{"median"}, +#' \code{"max"}, \code{"sum"}, or \code{"geometric"}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. #' #' @return A \code{ggplot2} object. #' @importFrom stats aggregate dist hclust @@ -107,11 +112,11 @@ heatmapEnrichment <- function(input.data, ggplot2::scale_x_discrete(expand = c(0, 0)) + ggplot2::scale_y_discrete(expand = c(0, 0)) + ggplot2::coord_equal() + - ggplot2::theme_classic() + - ggplot2::theme(axis.title = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - legend.position = "bottom", - legend.direction= "horizontal") + .themeEscape(grid_lines = "none", legend_position = "bottom") + + ggplot2::theme(axis.title = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + legend.direction = "horizontal", + panel.border = ggplot2::element_blank()) if (!is.null(facet.by)) p <- p + ggplot2::facet_grid(stats::as.formula(paste(". ~", facet.by))) diff --git a/R/pcaEnrichment.R b/R/pcaEnrichment.R index bfbeccd..bc4ace3 100644 --- a/R/pcaEnrichment.R +++ b/R/pcaEnrichment.R @@ -1,23 +1,33 @@ #' Visualize the PCA of Enrichment Values -#' -#' This function allows to the user to examine the distribution -#' of principal components run on the enrichment values. #' -#' @param input.data Single‑cell object (Seurat / SCE) **or** the raw list -#' returned by [`performPCA()`]. -#' @param dimRed Name of the dimensional‑reduction slot to pull from a -#' single‑cell object. Ignored when `input.data` is the list output. -#' @param x.axis,y.axis Character vectors naming the PCs to display (e.g. "PC1"). -#' @param facet.by Metadata column to facet plot. -#' @param style "point" (default) or "hex". -#' @param add.percent.contribution Include percent variance explained in axis -#' labels. -#' @param display.factors Draw arrows for the top gene‑set loadings. -#' @param number.of.factors Integer; how many loadings to display if -#' `display.factors = TRUE`. -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. -#' -#' #' @examples +#' This function allows the user to examine the distribution of principal +#' components computed on the enrichment values. +#' +#' @param input.data Single-cell object (Seurat / SCE) \strong{or} the raw list +#' returned by \code{\link{performPCA}}. +#' @param dimRed Character. Name of the dimensional-reduction slot to pull from +#' a single-cell object. Ignored when \code{input.data} is the list output. +#' @param x.axis Character. Name of the principal component to display on the +#' x-axis (e.g., \code{"PC1"}). Default is \code{"PC1"}. +#' @param y.axis Character. Name of the principal component to display on the +#' y-axis (e.g., \code{"PC2"}). Default is \code{"PC2"}. +#' @param facet.by Character or \code{NULL}. Metadata column used to facet +#' the plot. +#' @param style Character. Plot style. Options: +#' \itemize{ +#' \item \code{"point"} (default): Density-aware scatter plot. +#' \item \code{"hex"}: Hexagonal binning. +#' } +#' @param add.percent.contribution Logical. If \code{TRUE}, include percent +#' variance explained in axis labels. Default is \code{TRUE}. +#' @param display.factors Logical. If \code{TRUE}, draw arrows for the top +#' gene-set loadings. Default is \code{FALSE}. +#' @param number.of.factors Integer. Number of loadings to display when +#' \code{display.factors = TRUE}. Default is \code{10}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. +#' +#' @examples #' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) #' pbmc_small <- SeuratObject::pbmc_small @@ -104,7 +114,7 @@ pcaEnrichment <- function(input.data, ggplot2::scale_color_gradientn(colors = grDevices::hcl.colors(11, palette)) + ggplot2::labs(color = "Density") } - } else { # hex‑bin + } else { # hex-bin if (!requireNamespace("hexbin", quietly = TRUE)) stop("'hexbin' package required for style = 'hex'.") g <- g + ggplot2::stat_binhex() + @@ -112,7 +122,7 @@ pcaEnrichment <- function(input.data, ggplot2::labs(fill = "Count") } - g <- g + ggplot2::labs(x = x.title, y = y.title) + ggplot2::theme_classic() + g <- g + ggplot2::labs(x = x.title, y = y.title) + .themeEscape(grid_lines = "none") if (!is.null(facet.by)) g <- g + ggplot2::facet_grid(stats::as.formula(paste(".~", facet.by))) diff --git a/R/performNormalization.R b/R/performNormalization.R index 8240e87..6b1e830 100644 --- a/R/performNormalization.R +++ b/R/performNormalization.R @@ -1,28 +1,31 @@ #' Perform Normalization on Enrichment Data -#' -#' @description -#' Scales each enrichment value by the **number of genes from the set that are -#' expressed** in that cell (non‑zero counts). Optionally shifts results into a -#' positive range and/or applies a natural‑log transform for compatibility with -#' log‑based differential tests. #' -#' @param input.data raw‐counts matrix (`genes × cells`), a -#' \link[SeuratObject]{Seurat} object, or a -#' \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must -#' match those in `gene.sets`. -#' @param enrichment.data Output of \code{\link{escape.matrix}} or a single‑cell -#' object previously processed by \code{\link{runEscape}}. -#' @param assay Name of the assay holding enrichment scores when -#' `input.data` is a single‑cell object. Ignored otherwise. +#' Scales each enrichment value by the \strong{number of genes from the set +#' that are expressed} in that cell (non-zero counts). Optionally shifts +#' results into a positive range and/or applies a natural-log transform for +#' compatibility with log-based differential tests. +#' +#' @param input.data A raw-counts matrix (genes x cells), a +#' \link[SeuratObject]{Seurat} object, or a +#' \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must +#' match those in \code{gene.sets}. +#' @param enrichment.data Matrix. Output of \code{\link{escape.matrix}} or +#' \code{NULL} if enrichment scores are already stored in \code{input.data}. +#' @param assay Character. Name of the assay holding enrichment scores when +#' \code{input.data} is a single-cell object. Default is \code{"escape"}. +#' Ignored otherwise. #' @param gene.sets A named list of character vectors, the result of -#' [getGeneSets()], or the built-in data object [escape.gene.sets]. -#' List names become column names in the result. -#' @param make.positive Logical; if `TRUE` shifts each column so its minimum is -#' zero. -#' @param scale.factor Optional numeric vector overriding gene‑count scaling -#' (length = #cells). Use when you want external per‑cell normalization factors. -#' @param groups Integer >= 1. Number of cells per processing chunk. -#' Larger values reduce overhead but increase memory usage. Default **1000**. +#' \code{\link{getGeneSets}}, or the built-in data object +#' \code{\link{escape.gene.sets}}. List names must match column names in the +#' enrichment matrix. +#' @param make.positive Logical. If \code{TRUE}, shifts each column so its +#' minimum is zero. Default is \code{FALSE}. +#' @param scale.factor Numeric vector or \code{NULL}. Optional per-cell scaling +#' factors (length = number of cells). Use when you want external per-cell +#' normalization factors. Default is \code{NULL} (compute from gene counts). +#' @param groups Integer or \code{NULL}. Number of cells per processing chunk. +#' Larger values reduce overhead but increase memory usage. Default is +#' \code{NULL} (process all cells at once). #' #' @examples #' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), @@ -81,7 +84,7 @@ performNormalization <- function(input.data, egc <- egc[names(egc) %in% colnames(enriched)] if (!length(egc)) stop("None of the supplied gene sets match enrichment columns.") - ## counts matrix (genes × cells) – drop after use to save RAM + ## counts matrix (genes x cells) - drop after use to save RAM cnts <- .cntEval(input.data, assay = "RNA", type = "counts") message("Computing expressed-gene counts per cell...") scale.mat <- do.call(cbind, lapply(egc, function(gs) { diff --git a/R/performPCA.R b/R/performPCA.R index 491b3f3..3fd95e6 100644 --- a/R/performPCA.R +++ b/R/performPCA.R @@ -1,23 +1,27 @@ #' Perform Principal Component Analysis on Enrichment Data -#' -#' This function allows users to calculate the principal components -#' for the gene set enrichment values. For single-cell data, the PCA -#' will be stored with the dimensional reductions. If a matrix is used -#' as input, the output is a list for further plotting. Alternatively, -#' users can use functions for PCA calculations based on their desired -#' workflow in lieu of using \code{\link{performPCA}}, but will not be -#' compatible with downstream \code{\link{pcaEnrichment}} visualization. #' -#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell -#' object previously processed by \code{\link{runEscape}}. -#' @param assay Name of the assay holding enrichment scores when -#' `input.data` is a single‑cell object. Ignored otherwise. -#' @param scale Logical; if `TRUE` standardises each gene-set column -#' before PCA. -#' @param n.dim Integer >= 1 or vector; the **largest** value sets the -#' number of principal components to compute / keep. -#' @param reduction.name,reduction.key Names used when writing back to a -#' Seurat / SCE object. +#' This function allows users to calculate the principal components for the +#' gene set enrichment values. For single-cell data, the PCA will be stored +#' with the dimensional reductions. If a matrix is used as input, the output +#' is a list for further plotting. Alternatively, users can use functions for +#' PCA calculations based on their desired workflow in lieu of using +#' \code{\link{performPCA}}, but will not be compatible with downstream +#' \code{\link{pcaEnrichment}} visualization. +#' +#' @param input.data Output of \code{\link{escape.matrix}} or a single-cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Character. Name of the assay holding enrichment scores when +#' \code{input.data} is a single-cell object. Default is \code{"escape"}. +#' Ignored otherwise. +#' @param scale Logical. If \code{TRUE}, standardizes each gene-set column +#' before PCA. Default is \code{TRUE}. +#' @param n.dim Integer. The number of principal components to compute and +#' keep. Default is \code{10}. +#' @param reduction.name Character. Name used for the dimensional reduction +#' slot when writing back to a Seurat/SCE object. Default is +#' \code{"escape.PCA"}. +#' @param reduction.key Character. Key prefix for the dimensional reduction +#' when writing back to a Seurat/SCE object. Default is \code{"escPC_"}. #' #' @examples #' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), diff --git a/R/ridgeEnrichment.R b/R/ridgeEnrichment.R index 8fca7c1..607129d 100644 --- a/R/ridgeEnrichment.R +++ b/R/ridgeEnrichment.R @@ -1,29 +1,37 @@ #' Visualize Enrichment Distributions Using Ridge Plots -#' -#' This function allows to the user to examine the distribution of +#' +#' This function allows the user to examine the distribution of #' enrichment across groups by generating a ridge plot. #' -#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell -#' object previously processed by \code{\link{runEscape}}. -#' @param gene.set.use Character(1). Name of the gene set to display. -#' @param assay Name of the assay holding enrichment scores when -#' `input.data` is a single‑cell object. Ignored otherwise. -#' @param group.by Metadata column plotted on the *y*‑axis. Defaults to the -#' Seurat/SCE `ident` slot when `NULL`. -#'@param color.by Aesthetic mapped to point color. Use either -#' *"group"* (default = `group.by`) for categorical coloring or the -#' *name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric -# gradient. Any other metadata or column present in the data is also -#' accepted. -#' @param order.by How to arrange the x‑axis: -#' *`"mean"`* – groups ordered by decreasing group mean; -#' *`"group"`* – natural sort of group labels; -#' *`NULL`* – keep original ordering. -#' @param facet.by Optional metadata column used to facet the plot. -#' @param scale Logical; if `TRUE` scores are centred/scaled (Z‑score) prior -#' to plotting. -#' @param add.rug Logical. Draw per-cell tick marks underneath each ridge. -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' @param input.data Output of \code{\link{escape.matrix}} or a single-cell +#' object previously processed by \code{\link{runEscape}}. +#' @param gene.set.use Character. Name of the gene set to display. +#' @param assay Character. Name of the assay holding enrichment scores when +#' \code{input.data} is a single-cell object. Ignored otherwise. +#' @param group.by Character. Metadata column plotted on the y-axis. Defaults +#' to the Seurat/SCE \code{ident} slot when \code{NULL}. +#' @param color.by Character. Aesthetic mapped to fill color. Options: +#' \itemize{ +#' \item \code{"group"} (default): Uses \code{group.by} for categorical +#' coloring. +#' \item \emph{gene-set name}: Use the same value as \code{gene.set.use} +#' to obtain a numeric gradient. +#' \item Any other metadata column present in the data. +#' } +#' @param order.by Character or \code{NULL}. How to arrange the y-axis: +#' \itemize{ +#' \item \code{"mean"}: Groups ordered by decreasing group mean. +#' \item \code{"group"}: Natural (alphanumeric) sort of group labels. +#' \item \code{NULL} (default): Keep original ordering. +#' } +#' @param facet.by Character or \code{NULL}. Metadata column used to facet +#' the plot. +#' @param scale Logical. If \code{TRUE}, scores are centered and scaled +#' (Z-score) prior to plotting. Default is \code{FALSE}. +#' @param add.rug Logical. If \code{TRUE}, draw per-cell tick marks underneath +#' each ridge. Default is \code{FALSE}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. #' #' @examples #' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), @@ -121,7 +129,7 @@ ridgeEnrichment <- function(input.data, p <- p + ylab(group.by) + xlab(paste0(gene.set.use, "\nEnrichment Score")) + - ggplot2::theme_classic(base_size = 11) + .themeEscape(grid_lines = "none") p <- .colorby(df, p, color.by, palette, type = "fill") + guides(fill = "none") diff --git a/R/runEscape.R b/R/runEscape.R index 77006e2..334ffe8 100644 --- a/R/runEscape.R +++ b/R/runEscape.R @@ -1,60 +1,63 @@ #' Calculate Single-Cell Gene-Set Enrichment Scores #' -#' `escape.matrix()` computes per-cell enrichment for arbitrary gene-set +#' \code{escape.matrix()} computes per-cell enrichment for arbitrary gene-set #' collections using one of four scoring back-ends and returns a dense numeric -#' matrix (cells × gene-sets). The expression matrix is processed in -#' user-defined *chunks* (`groups`) so that memory use remains predictable; -#' each chunk is dispatched in parallel via a \pkg{BiocParallel} `BPPARAM` -#' backend. Heavy engines (\pkg{GSVA}, \pkg{UCell}, \pkg{AUCell}) are loaded -#' lazily, keeping them in the package’s \strong{Suggests} field. +#' matrix (cells x gene-sets). The expression matrix is processed in +#' user-defined chunks (\code{groups}) so that memory use remains predictable; +#' each chunk is dispatched in parallel via a \pkg{BiocParallel} \code{BPPARAM} +#' backend. Heavy engines (\pkg{GSVA}, \pkg{UCell}, \pkg{AUCell}) are loaded +#' lazily, keeping them in the package's \strong{Suggests} field. #' #' @section Supported methods: #' \describe{ -#' \item{`"GSVA"`}{Gene-set variation analysis (Poisson kernel).} -#' \item{`"ssGSEA"`}{Single-sample GSEA.} -#' \item{`"UCell"`}{Rank-based UCell scoring.} -#' \item{`"AUCell"`}{Area-under-the-curve ranking score.} +#' \item{\code{"GSVA"}}{Gene-set variation analysis (Poisson kernel).} +#' \item{\code{"ssGSEA"}}{Single-sample GSEA.} +#' \item{\code{"UCell"}}{Rank-based UCell scoring.} +#' \item{\code{"AUCell"}}{Area-under-the-curve ranking score.} #' } #' -#' @param input.data A raw‐counts matrix (`genes × cells`), a -#' \link[SeuratObject]{Seurat} object, or a -#' \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must -#' match those in `gene.sets`. +#' @param input.data A raw-counts matrix (genes x cells), a +#' \link[SeuratObject]{Seurat} object, or a +#' \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must +#' match those in \code{gene.sets}. #' @param gene.sets A named list of character vectors, the result of -#' [getGeneSets()], or the built-in data object [escape.gene.sets]. -#' List names become column names in the result. -#' @param method Scoring algorithm (case-insensitive). One of `"GSVA"`, -#' `"ssGSEA"`, `"UCell"`, or `"AUCell"`. Default **`"ssGSEA"`**. -#' @param groups Integer >= 1. Number of cells per processing chunk. -#' Larger values reduce overhead but increase memory usage. Default **1000**. -#' @param min.size Minimum number of genes from a set that must be detected -#' in the expression matrix for that set to be scored. Default **5**. -#' Use `NULL` to disable filtering. -#' @param normalize Logical. If `TRUE`, the score matrix is passed to -#' [performNormalization()] (drop-out scaling and optional log transform). -#' Default **FALSE**. -#' @param make.positive Logical. If `TRUE` *and* `normalize = TRUE`, shifts -#' every gene-set column so its global minimum is zero, facilitating -#' downstream log-ratio analyses. Default **FALSE**. -#' @param min.expr.cells Numeric. Gene-expression filter threshold (see -#' details above). Default **0** (no gene filtering). -#' @param min.filter.by Character or `NULL`. Column name in `meta.data` -#' (Seurat) or `colData` (SCE) defining groups within which the -#' `min.expr.cells` rule is applied. Default **`NULL`**. +#' \code{\link{getGeneSets}}, or the built-in data object +#' \code{\link{escape.gene.sets}}. List names become column names in the +#' result. +#' @param method Character. Scoring algorithm (case-insensitive). One of +#' \code{"GSVA"}, \code{"ssGSEA"}, \code{"UCell"}, or \code{"AUCell"}. +#' Default is \code{"ssGSEA"}. +#' @param groups Integer. Number of cells per processing chunk. Larger values +#' reduce overhead but increase memory usage. Default is \code{1000}. +#' @param min.size Integer or \code{NULL}. Minimum number of genes from a set +#' that must be detected in the expression matrix for that set to be scored. +#' Default is \code{5}. Use \code{NULL} to disable filtering. +#' @param normalize Logical. If \code{TRUE}, the score matrix is passed to +#' \code{\link{performNormalization}} (drop-out scaling and optional log +#' transform). Default is \code{FALSE}. +#' @param make.positive Logical. If \code{TRUE} \emph{and} +#' \code{normalize = TRUE}, shifts every gene-set column so its global +#' minimum is zero, facilitating downstream log-ratio analyses. Default is +#' \code{FALSE}. +#' @param min.expr.cells Numeric. Gene-expression filter threshold. Default is +#' \code{0} (no gene filtering). +#' @param min.filter.by Character or \code{NULL}. Column name in +#' \code{meta.data} (Seurat) or \code{colData} (SCE) defining groups within +#' which the \code{min.expr.cells} rule is applied. Default is \code{NULL}. #' @param BPPARAM A \pkg{BiocParallel} parameter object describing the -#' parallel backend. -#' @param ... Extra arguments passed verbatim to the chosen back-end -#' scoring function (`gsva()`, `ScoreSignatures_UCell()`, or -#' `AUCell_calcAUC()`). +#' parallel backend. Default is \code{NULL} (serial execution). +#' @param ... Extra arguments passed verbatim to the chosen back-end scoring +#' function (\code{gsva()}, \code{ScoreSignatures_UCell()}, or +#' \code{AUCell_calcAUC()}). #' #' @return A numeric matrix with one row per cell and one column per gene set, -#' ordered as in `gene.sets`. +#' ordered as in \code{gene.sets}. #' #' @author Nick Borcherding, Jared Andrews #' -#' @seealso [runEscape()] to attach scores to a single-cell object; -#' [getGeneSets()] for MSigDB retrieval; [performNormalization()] for the -#' optional normalization workflow. +#' @seealso \code{\link{runEscape}} to attach scores to a single-cell object; +#' \code{\link{getGeneSets}} for MSigDB retrieval; +#' \code{\link{performNormalization}} for the optional normalization workflow. #' #' @examples #' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), @@ -149,25 +152,26 @@ escape.matrix <- function(input.data, #' Calculate Enrichment Scores Using Seurat or SingleCellExperiment Objects #' -#' `runEscape()` is a convenience wrapper around [escape.matrix()] that -#' computes enrichment scores and inserts them as a new assay (default -#' `"escape"`) in a \pkg{Seurat} or \pkg{SingleCellExperiment} object. All -#' arguments (except `new.assay.name`) map directly to their counterparts in -#' `escape.matrix()`. +#' \code{runEscape()} is a convenience wrapper around \code{\link{escape.matrix}} +#' that computes enrichment scores and inserts them as a new assay (default +#' \code{"escape"}) in a \pkg{Seurat} or \pkg{SingleCellExperiment} object. All +#' arguments (except \code{new.assay.name}) map directly to their counterparts +#' in \code{escape.matrix()}. #' #' @inheritParams escape.matrix #' @param new.assay.name Character. Name for the assay that will store the -#' enrichment matrix in the returned object. Default **"escape"**. +#' enrichment matrix in the returned object. Default is \code{"escape"}. #' #' @return The input single-cell object with an additional assay containing the -#' enrichment scores (`cells × gene-sets`). Matrix orientation follows -#' standard single-cell conventions (gene-sets as rows inside the assay). +#' enrichment scores (cells x gene-sets). Matrix orientation follows standard +#' single-cell conventions (gene-sets as rows inside the assay). #' #' @author Nick Borcherding, Jared Andrews #' -#' @seealso [escape.matrix()] for the underlying computation, -#' [performNormalization()] to add normalized scores, [heatmapEnrichment()], -#' [ridgeEnrichment()] and related plotting helpers for visualization. +#' @seealso \code{\link{escape.matrix}} for the underlying computation; +#' \code{\link{performNormalization}} to add normalized scores; +#' \code{\link{heatmapEnrichment}}, \code{\link{ridgeEnrichment}}, and +#' related plotting helpers for visualization. #' #' @examples #' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), diff --git a/R/scatterEnrichment.R b/R/scatterEnrichment.R index 2a19f40..e346b45 100644 --- a/R/scatterEnrichment.R +++ b/R/scatterEnrichment.R @@ -1,29 +1,45 @@ #' Plot 2D Enrichment Distributions With Density or Hexplots #' -#' Visualize the relationship between *two* enrichment scores at single-cell -#' resolution. By default points are shaded by local 2-D density -#' (`color.by = "density"`), but users can instead color by a metadata column -#' (discrete) or by the raw gene-set scores themselves (continuous). +#' Visualize the relationship between two enrichment scores at single-cell +#' resolution. By default, points are shaded by local 2-D density +#' (\code{color.by = "density"}), but users can instead color by a metadata +#' column (discrete) or by the raw gene-set scores themselves (continuous). #' -#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell -#' object previously processed by \code{\link{runEscape}}. -#' @param assay Name of the assay holding enrichment scores when -#' `input.data` is a single‑cell object. Ignored otherwise. -#' @param x.axis,y.axis Gene-set names to plot on the *x* and *y* axes. -#' @param facet.by Optional metadata column used to facet the plot. -#' @param group.by Metadata column plotted. Defaults to the -#' Seurat/SCE `ident` slot when `NULL`. -#' @param color.by Aesthetic mapped to point color. Use -#' `"density"` (default), `"group"`, `"x"`, or `"y"`. The latter two apply a -#' continuous gradient to the corresponding axis. -#' @param style `"point"` (density-aware points) or `"hex"` (hex-bin). -#' @param scale Logical; if `TRUE` scores are centered/scaled (Z‑score) prior -#' to plotting. -#' @param bins Number of hex bins along each axis when `style = "hex"`. -#' @param point.size,alpha Aesthetic tweaks for `style = "point"`. -#' @param add.corr Logical. Add Pearson and Spearman correlation -#' coefficients (top-left corner of the first facet). -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' @param input.data Output of \code{\link{escape.matrix}} or a single-cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Character. Name of the assay holding enrichment scores when +#' \code{input.data} is a single-cell object. Ignored otherwise. +#' @param x.axis Character. Gene-set name to plot on the x-axis. +#' @param y.axis Character. Gene-set name to plot on the y-axis. +#' @param facet.by Character or \code{NULL}. Metadata column used to facet +#' the plot. +#' @param group.by Character. Metadata column used when \code{color.by = "group"}. +#' Defaults to the Seurat/SCE \code{ident} slot when \code{NULL}. +#' @param color.by Character. Aesthetic mapped to point color. Options: +#' \itemize{ +#' \item \code{"density"} (default): Shade points by local 2-D density. +#' \item \code{"group"}: Color by the \code{group.by} metadata column. +#' \item \code{"x"}: Apply a continuous gradient based on the x-axis values. +#' \item \code{"y"}: Apply a continuous gradient based on the y-axis values. +#' } +#' @param style Character. Plot style. Options: +#' \itemize{ +#' \item \code{"point"} (default): Density-aware scatter plot. +#' \item \code{"hex"}: Hexagonal binning. +#' } +#' @param scale Logical. If \code{TRUE}, scores are centered and scaled +#' (Z-score) prior to plotting. Default is \code{FALSE}. +#' @param bins Integer. Number of hex bins along each axis when +#' \code{style = "hex"}. Default is \code{40}. +#' @param point.size Numeric. Point size for \code{style = "point"}. +#' Default is \code{1.2}. +#' @param alpha Numeric. Transparency for points or hexbins. +#' Default is \code{0.8}. +#' @param add.corr Logical. If \code{TRUE}, add Pearson and Spearman +#' correlation coefficients to the plot (top-left corner). Default is +#' \code{FALSE}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. #' #' @examples #' gs <- list( @@ -138,7 +154,7 @@ scatterEnrichment <- function(input.data, plt <- plt + ggplot2::labs(x = paste0(x.axis, "\nEnrichment score"), y = paste0(y.axis, "\nEnrichment score")) + - ggplot2::theme_classic() + .themeEscape(grid_lines = "none") if (!is.null(facet.by)) { plt <- plt + ggplot2::facet_grid(as.formula(paste(". ~", facet.by))) diff --git a/R/splitEnrichment.R b/R/splitEnrichment.R index 7226b9b..d001420 100644 --- a/R/splitEnrichment.R +++ b/R/splitEnrichment.R @@ -1,27 +1,33 @@ #' Plot Enrichment Distributions Using Split or Dodged Violin Plots #' #' Visualize the distribution of gene set enrichment scores across groups using -#' violin plots. When `split.by` contains exactly two levels, the function draws -#' split violins for easy group comparison within each `group.by` category. If -#' `split.by` has more than two levels, standard dodged violins are drawn instead. +#' violin plots. When \code{split.by} contains exactly two levels, the function +#' draws split violins for easy group comparison within each \code{group.by} +#' category. If \code{split.by} has more than two levels, standard dodged +#' violins are drawn instead. #' -#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell -#' object previously processed by \code{\link{runEscape}}. -#' @param assay Name of the assay holding enrichment scores when -#' `input.data` is a single‑cell object. Ignored otherwise. -#' @param split.by A metadata column used to split or color violins. Must contain -#' at least two levels. If it contains more than two, dodged violins are used. -#' @param group.by Metadata column plotted on the *x*‑axis. Defaults to the -#' Seurat/SCE `ident` slot when `NULL`. -#' @param gene.set.use Character(1). Name of the gene set to display. -#' @param order.by How to arrange the x‑axis: -#' *`"mean"`* – groups ordered by decreasing group mean; -#' *`"group"`* – natural sort of group labels; -#' *`NULL`* – keep original ordering. -#' @param facet.by Optional metadata column used to facet the plot. -#' @param scale Logical; if `TRUE` scores are centred/scaled (Z‑score) prior -#' to plotting. -#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' @param input.data Output of \code{\link{escape.matrix}} or a single-cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Character. Name of the assay holding enrichment scores when +#' \code{input.data} is a single-cell object. Ignored otherwise. +#' @param split.by Character. Metadata column used to split or color violins. +#' Must contain at least two levels. If more than two levels are present, +#' dodged violins are used instead of split violins. +#' @param group.by Character. Metadata column plotted on the x-axis. Defaults +#' to the Seurat/SCE \code{ident} slot when \code{NULL}. +#' @param gene.set.use Character. Name of the gene set to display. +#' @param order.by Character or \code{NULL}. How to arrange the x-axis: +#' \itemize{ +#' \item \code{"mean"}: Groups ordered by decreasing group mean. +#' \item \code{"group"}: Natural (alphanumeric) sort of group labels. +#' \item \code{NULL} (default): Keep original ordering. +#' } +#' @param facet.by Character or \code{NULL}. Metadata column used to facet +#' the plot. +#' @param scale Logical. If \code{TRUE}, scores are centered and scaled +#' (Z-score) prior to plotting. Default is \code{TRUE}. +#' @param palette Character. Color palette name from +#' \code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}. #' #' @return A [ggplot2] object. #' @@ -86,7 +92,7 @@ splitEnrichment <- function(input.data, ylab(paste0(gene.set.use, "\nEnrichment Score")) + labs(fill = split.by) + scale_fill_manual(values = .colorizer(palette, n.levels)) + - theme_classic() + .themeEscape(grid_lines = "Y") # Split violin if binary, otherwise dodge standard violins if (n.levels == 2) { diff --git a/R/utils.R b/R/utils.R index cc4ae2a..175cf84 100644 --- a/R/utils.R +++ b/R/utils.R @@ -201,25 +201,29 @@ } # ----------------------------------------------------------------------------- -# EXPRESSION MATRIX EXTRACTOR +# EXPRESSION MATRIX EXTRACTOR # ----------------------------------------------------------------------------- #' @importFrom MatrixGenerics rowSums2 .cntEval <- function(obj, assay = "RNA", type = "counts") { if (.is_seurat(obj)) { # Use generic accessor if available if (requireNamespace("SeuratObject", quietly = TRUE)) { - suppressWarnings( + # Use layer argument for SeuratObject >= 5.0.0, slot for older versions + so_version <- utils::packageVersion("SeuratObject") + if (so_version >= "5.0.0") { + cnts <- SeuratObject::GetAssayData(obj, assay = assay, layer = type) + } else { cnts <- SeuratObject::GetAssayData(obj, assay = assay, slot = type) - ) + } } else { cnts <- obj@assays[[assay]][[type]] } - + } else if (.is_sce(obj)) { if (requireNamespace("SummarizedExperiment", quietly = TRUE) && requireNamespace("SingleCellExperiment", quietly = TRUE)) { pos <- if (assay == "RNA") "counts" else assay - + cnts <- if (assay == "RNA") { SummarizedExperiment::assay(obj, pos) } else { @@ -297,7 +301,7 @@ } # ----------------------------------------------------------------------------- -# GENE‑SET / META HELPERS +# GENE-SET / META HELPERS # ----------------------------------------------------------------------------- .GS.check <- function(gene.sets) { if (is.null(gene.sets)) @@ -423,7 +427,7 @@ ) } -#─ Split a matrix into equal‑sized column chunks ------------------------------ +#─ Split a matrix into equal-sized column chunks ------------------------------ .split_cols <- function(mat, chunk) { if (ncol(mat) <= chunk) return(list(mat)) idx <- split(seq_len(ncol(mat)), ceiling(seq_len(ncol(mat)) / chunk)) @@ -496,6 +500,94 @@ utils::globalVariables(c( "gene.set.query", "index" )) +# ----------------------------------------------------------------------------- +# THEME HELPER +# ----------------------------------------------------------------------------- +#' @importFrom ggplot2 %+replace% +.themeEscape <- function(base_size = 12, + base_family = "sans", + grid_lines = "Y", + axis_lines = FALSE, + legend_position = "right") { + + t <- ggplot2::theme_bw(base_size = base_size, base_family = base_family) + + t <- t %+replace% + ggplot2::theme( + # Plot titles and caption + plot.title = ggplot2::element_text( + size = ggplot2::rel(1.2), hjust = 0, face = "bold", + margin = ggplot2::margin(b = base_size / 2) + ), + plot.subtitle = ggplot2::element_text( + hjust = 0, face = "italic", + margin = ggplot2::margin(b = base_size) + ), + plot.caption = ggplot2::element_text( + size = ggplot2::rel(0.8), hjust = 1 + ), + + # Axis titles and text + axis.title = ggplot2::element_text(size = ggplot2::rel(1), face = "bold"), + axis.text = ggplot2::element_text(size = ggplot2::rel(0.85)), + + # Facet strips + strip.text = ggplot2::element_text( + size = ggplot2::rel(0.9), + face = "bold", + margin = ggplot2::margin(base_size / 2.5, base_size / 2.5, base_size / 2.5, base_size / 2.5) + ), + strip.background = ggplot2::element_rect(fill = "grey90", color = NA), + + # Panel border and background + panel.border = ggplot2::element_rect(color = "grey70", fill = NA, linewidth = 0.5), + panel.background = ggplot2::element_rect(fill = "white", color = NA), + + # Legend styling + legend.title = ggplot2::element_text(face = "bold"), + legend.position = legend_position, + legend.key = ggplot2::element_rect(fill = "white"), + + # Plot spacing/margins + plot.margin = ggplot2::margin(base_size, base_size, base_size, base_size) + ) + + # Handle grid lines + grid_lines <- toupper(grid_lines) + if (grid_lines == "NONE") { + t <- t + ggplot2::theme( + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank() + ) + } else if (grid_lines == "X") { + t <- t + ggplot2::theme( + panel.grid.major.y = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.grid.major.x = ggplot2::element_line(color = "grey85", linewidth = 0.25) + ) + } else if (grid_lines == "Y") { + t <- t + ggplot2::theme( + panel.grid.major.x = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.grid.major.y = ggplot2::element_line(color = "grey85", linewidth = 0.25) + ) + } else { + t <- t + ggplot2::theme( + panel.grid.major = ggplot2::element_line(color = "grey85", linewidth = 0.25), + panel.grid.minor = ggplot2::element_blank() + ) + } + + # Handle axis lines + if (isTRUE(axis_lines)) { + t <- t + ggplot2::theme( + axis.line = ggplot2::element_line(color = "grey30", linewidth = 0.5) + ) + } + + return(t) +} + # helper to match summary function .match_summary_fun <- function(fun) { if (is.function(fun)) return(fun) diff --git a/escape.Rproj b/escape.Rproj deleted file mode 100644 index 21a4da0..0000000 --- a/escape.Rproj +++ /dev/null @@ -1,17 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/man/densityEnrichment.Rd b/man/densityEnrichment.Rd index 88f83b7..31d9b07 100644 --- a/man/densityEnrichment.Rd +++ b/man/densityEnrichment.Rd @@ -17,27 +17,28 @@ densityEnrichment( \item{input.data}{A \link[SeuratObject]{Seurat} object or a \link[SingleCellExperiment]{SingleCellExperiment}.} -\item{gene.set.use}{Character(1). Name of the gene set to display.} +\item{gene.set.use}{Character. Name of the gene set to display.} \item{gene.sets}{A named list of character vectors, the result of -[getGeneSets()], or the built-in data object [escape.gene.sets].} +\code{\link{getGeneSets}}, or the built-in data object +\code{\link{escape.gene.sets}}.} -\item{group.by}{Metadata column. Defaults to the Seurat/SCE `ident` -slot when `NULL`.} +\item{group.by}{Character. Metadata column used for grouping. Defaults to +the Seurat/SCE \code{ident} slot when \code{NULL}.} -\item{rug.height}{Vertical spacing of the hit rug as a fraction of the -y-axis (default `0.02`).} +\item{rug.height}{Numeric. Vertical spacing of the hit rug as a fraction of +the y-axis. Default is \code{0.02}.} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} } \value{ A `patchwork`/`ggplot2` object. } \description{ -This function allows to the user to examine the mean ranking -within the groups across the gene set. The visualization uses -the density function to display the relative position and distribution -of rank. +This function allows the user to examine the mean ranking within groups +across the gene set. The visualization uses the density function to display +the relative position and distribution of rank. } \examples{ gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), diff --git a/man/enrichIt.Rd b/man/enrichIt.Rd index d2cdea0..344389d 100644 --- a/man/enrichIt.Rd +++ b/man/enrichIt.Rd @@ -20,42 +20,60 @@ enrichIt( ) } \arguments{ -\item{input.data}{Either -• a named numeric vector **already ranked**, *or* -• a data.frame/tibble with one row per gene and columns containing - log-fold-change and *p*-value. If the gene ID is not in `rownames(data)`, - supply `gene_col`.} +\item{input.data}{Either: +\itemize{ + \item A named numeric vector \strong{already ranked}, or + \item A data.frame/tibble with one row per gene and columns containing + log-fold-change and p-value. If the gene ID is not in + \code{rownames(data)}, supply \code{gene_col}. +}} -\item{gene.sets}{AA named list of character vectors, the result of -[getGeneSets()], or the built-in data object [escape.gene.sets].} +\item{gene.sets}{A named list of character vectors, the result of +\code{\link{getGeneSets}}, or the built-in data object +\code{\link{escape.gene.sets}}.} -\item{gene_col}{Name of the column holding gene identifiers (ignored when -they are row-names). Default `NULL`.} +\item{gene_col}{Character or \code{NULL}. Name of the column holding gene +identifiers (ignored when they are row-names). Default is \code{NULL}.} -\item{logFC_col, pval_col}{Column names for logFC and *p* (or adj.*p*) -– defaults match Seurat’s `FindMarkers()`.} +\item{logFC_col}{Character. Column name for log-fold-change values. Default +is \code{"avg_log2FC"} (matches Seurat's \code{FindMarkers()}).} -\item{ranking_fun}{How to build the ranking: `"signed_log10_p"` (default) -or `"logFC"`.} +\item{pval_col}{Character. Column name for p-values (or adjusted p-values). +Default is \code{c("p_val_adj", "p_val")} (first match is used).} -\item{pval_cutoff, logFC_cutoff}{Filters applied **before** ranking.} +\item{ranking_fun}{Character. How to build the ranking: +\itemize{ + \item \code{"signed_log10_p"} (default): sign(logFC) * -log10(p). + \item \code{"logFC"}: Use log-fold-change values directly. +}} -\item{minSize, maxSize}{Integer. Minimum / maximum pathway size passed to -*fgsea* (default 5 / 500).} +\item{pval_cutoff}{Numeric. Filter genes with p-value above this threshold +\strong{before} ranking. Default is \code{1} (no filtering).} -\item{padjust_method}{Multiple-testing correction; any method accepted by -[stats::p.adjust()] (default `"BH"`).} +\item{logFC_cutoff}{Numeric. Filter genes with absolute log-fold-change +below this threshold \strong{before} ranking. Default is \code{0} (no +filtering).} -\item{nproc}{Passed to **fgsea** (`0` = multithread if OpenMP available).} +\item{minSize}{Integer. Minimum pathway size passed to \pkg{fgsea}. Default +is \code{5}.} + +\item{maxSize}{Integer. Maximum pathway size passed to \pkg{fgsea}. Default +is \code{500}.} + +\item{padjust_method}{Character. Multiple-testing correction method; any +method accepted by \code{\link[stats]{p.adjust}}. Default is \code{"BH"}.} + +\item{nproc}{Integer. Passed to \pkg{fgsea}. Use \code{0} for multithread +if OpenMP is available. Default is \code{0}.} } \value{ `data.frame` with the usual fgsea columns plus a convenient `leadingEdge` character column collapsed with \";\". } \description{ -A convenience front-end to **fgsea** that lets you point at the -`avg_log2FC` and `p_val_adj` columns coming out of Seurat / DESeq2 / -edgeR etc. It converts them to a signed -log10(*p*) ranking, filters on +A convenience front-end to \pkg{fgsea} that lets you point at the +\code{avg_log2FC} and \code{p_val_adj} columns coming out of Seurat / DESeq2 +/ edgeR etc. It converts them to a signed -log10(p) ranking, filters on significance / effect size, and then runs fgsea. } \examples{ @@ -74,5 +92,6 @@ gsea <- enrichIt(markers, } \seealso{ -[fgsea::fgsea()], [getGeneSets()], [gseaEnrichment()] +\code{\link[fgsea]{fgsea}}, \code{\link{getGeneSets}}, + \code{\link{gseaEnrichment}} } diff --git a/man/enrichItPlot.Rd b/man/enrichItPlot.Rd index 7b94ea6..2f69a09 100644 --- a/man/enrichItPlot.Rd +++ b/man/enrichItPlot.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/enrichItPlot.R \name{enrichItPlot} \alias{enrichItPlot} -\title{Adaptive visualisation of enrichIt results} +\title{Adaptive Visualisation of enrichIt Results} \usage{ enrichItPlot( res, @@ -16,31 +16,40 @@ enrichItPlot( ) } \arguments{ -\item{res}{`data.frame` returned by [enrichIt()].} +\item{res}{Data frame. Output from \code{\link{enrichIt}}.} -\item{plot.type}{`"bar"`, `"dot"`, or `"cnet"`.} +\item{plot.type}{Character. Visualization type. Options: +\itemize{ + \item \code{"bar"} (default): Horizontal bar plot. + \item \code{"dot"}: Dot plot with size and color encoding. + \item \code{"cnet"}: Concept network plot showing gene-pathway + relationships. +}} -\item{top}{Integer. Keep the top *n* terms **per database** -(ranked by adjusted *p*). Set to `Inf` to keep all.} +\item{top}{Integer. Keep the top \emph{n} terms \strong{per database} +(ranked by adjusted p-value). Set to \code{Inf} to keep all. Default is +\code{20}.} -\item{x.measure}{A column in `res` mapped to the *x*-axis -(ignored for `"cnet"`). Default `"-log10(padj)"`.} +\item{x.measure}{Character. Column in \code{res} mapped to the x-axis +(ignored for \code{"cnet"}). Default is \code{"-log10(padj)"}.} -\item{color.measure}{Column mapped to color (dot plot only). -Default same as `x.measure`.} +\item{color.measure}{Character. Column mapped to color (dot plot only). +Default is same as \code{x.measure}.} -\item{show.counts}{Logical. Annotate bar plot with the `Count` (number of genes).} +\item{show.counts}{Logical. If \code{TRUE}, annotate bar plot with the +\code{Count} (number of genes). Default is \code{TRUE}.} -\item{palette}{palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} -\item{...}{Further arguments passed to **ggplot2** geoms (e.g. -`alpha`, `linewidth`).} +\item{...}{Further arguments passed to \pkg{ggplot2} geoms (e.g., +\code{alpha}, \code{linewidth}).} } \value{ -A **patchwork** object (bar / dot) or **ggraph** object (cnet). +A \pkg{ggplot2} object (bar/dot) or \pkg{ggraph} object (cnet). } \description{ -Adaptive visualisation of enrichIt results +Create bar, dot, or network plots from \code{\link{enrichIt}} results. } \examples{ \dontrun{ diff --git a/man/escape.matrix.Rd b/man/escape.matrix.Rd index 4c213d8..4c7dd08 100644 --- a/man/escape.matrix.Rd +++ b/man/escape.matrix.Rd @@ -19,67 +19,70 @@ escape.matrix( ) } \arguments{ -\item{input.data}{A raw‐counts matrix (`genes × cells`), a +\item{input.data}{A raw-counts matrix (genes x cells), a \link[SeuratObject]{Seurat} object, or a \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must -match those in `gene.sets`.} +match those in \code{gene.sets}.} \item{gene.sets}{A named list of character vectors, the result of -[getGeneSets()], or the built-in data object [escape.gene.sets]. -List names become column names in the result.} +\code{\link{getGeneSets}}, or the built-in data object +\code{\link{escape.gene.sets}}. List names become column names in the +result.} -\item{method}{Scoring algorithm (case-insensitive). One of `"GSVA"`, -`"ssGSEA"`, `"UCell"`, or `"AUCell"`. Default **`"ssGSEA"`**.} +\item{method}{Character. Scoring algorithm (case-insensitive). One of +\code{"GSVA"}, \code{"ssGSEA"}, \code{"UCell"}, or \code{"AUCell"}. +Default is \code{"ssGSEA"}.} -\item{groups}{Integer >= 1. Number of cells per processing chunk. -Larger values reduce overhead but increase memory usage. Default **1000**.} +\item{groups}{Integer. Number of cells per processing chunk. Larger values +reduce overhead but increase memory usage. Default is \code{1000}.} -\item{min.size}{Minimum number of genes from a set that must be detected -in the expression matrix for that set to be scored. Default **5**. -Use `NULL` to disable filtering.} +\item{min.size}{Integer or \code{NULL}. Minimum number of genes from a set +that must be detected in the expression matrix for that set to be scored. +Default is \code{5}. Use \code{NULL} to disable filtering.} -\item{normalize}{Logical. If `TRUE`, the score matrix is passed to -[performNormalization()] (drop-out scaling and optional log transform). -Default **FALSE**.} +\item{normalize}{Logical. If \code{TRUE}, the score matrix is passed to +\code{\link{performNormalization}} (drop-out scaling and optional log +transform). Default is \code{FALSE}.} -\item{make.positive}{Logical. If `TRUE` *and* `normalize = TRUE`, shifts -every gene-set column so its global minimum is zero, facilitating -downstream log-ratio analyses. Default **FALSE**.} +\item{make.positive}{Logical. If \code{TRUE} \emph{and} +\code{normalize = TRUE}, shifts every gene-set column so its global +minimum is zero, facilitating downstream log-ratio analyses. Default is +\code{FALSE}.} -\item{min.expr.cells}{Numeric. Gene-expression filter threshold (see -details above). Default **0** (no gene filtering).} +\item{min.expr.cells}{Numeric. Gene-expression filter threshold. Default is +\code{0} (no gene filtering).} -\item{min.filter.by}{Character or `NULL`. Column name in `meta.data` -(Seurat) or `colData` (SCE) defining groups within which the -`min.expr.cells` rule is applied. Default **`NULL`**.} +\item{min.filter.by}{Character or \code{NULL}. Column name in +\code{meta.data} (Seurat) or \code{colData} (SCE) defining groups within +which the \code{min.expr.cells} rule is applied. Default is \code{NULL}.} \item{BPPARAM}{A \pkg{BiocParallel} parameter object describing the -parallel backend.} +parallel backend. Default is \code{NULL} (serial execution).} -\item{...}{Extra arguments passed verbatim to the chosen back-end -scoring function (`gsva()`, `ScoreSignatures_UCell()`, or -`AUCell_calcAUC()`).} +\item{...}{Extra arguments passed verbatim to the chosen back-end scoring +function (\code{gsva()}, \code{ScoreSignatures_UCell()}, or +\code{AUCell_calcAUC()}).} } \value{ A numeric matrix with one row per cell and one column per gene set, -ordered as in `gene.sets`. + ordered as in \code{gene.sets}. } \description{ -`escape.matrix()` computes per-cell enrichment for arbitrary gene-set +\code{escape.matrix()} computes per-cell enrichment for arbitrary gene-set collections using one of four scoring back-ends and returns a dense numeric -matrix (cells × gene-sets). The expression matrix is processed in -user-defined *chunks* (`groups`) so that memory use remains predictable; -each chunk is dispatched in parallel via a \pkg{BiocParallel} `BPPARAM` -backend. Heavy engines (\pkg{GSVA}, \pkg{UCell}, \pkg{AUCell}) are loaded -lazily, keeping them in the package’s \strong{Suggests} field. +matrix (cells x gene-sets). The expression matrix is processed in +user-defined chunks (\code{groups}) so that memory use remains predictable; +each chunk is dispatched in parallel via a \pkg{BiocParallel} \code{BPPARAM} +backend. Heavy engines (\pkg{GSVA}, \pkg{UCell}, \pkg{AUCell}) are loaded +lazily, keeping them in the package's \strong{Suggests} field. } \section{Supported methods}{ \describe{ - \item{`"GSVA"`}{Gene-set variation analysis (Poisson kernel).} - \item{`"ssGSEA"`}{Single-sample GSEA.} - \item{`"UCell"`}{Rank-based UCell scoring.} - \item{`"AUCell"`}{Area-under-the-curve ranking score.} + \item{\code{"GSVA"}}{Gene-set variation analysis (Poisson kernel).} + \item{\code{"ssGSEA"}}{Single-sample GSEA.} + \item{\code{"UCell"}}{Rank-based UCell scoring.} + \item{\code{"AUCell"}}{Area-under-the-curve ranking score.} } } @@ -96,9 +99,9 @@ es <- escape.matrix(pbmc, } \seealso{ -[runEscape()] to attach scores to a single-cell object; -[getGeneSets()] for MSigDB retrieval; [performNormalization()] for the -optional normalization workflow. +\code{\link{runEscape}} to attach scores to a single-cell object; + \code{\link{getGeneSets}} for MSigDB retrieval; + \code{\link{performNormalization}} for the optional normalization workflow. } \author{ Nick Borcherding, Jared Andrews diff --git a/man/getGeneSets.Rd b/man/getGeneSets.Rd index 19be69a..87ab284 100644 --- a/man/getGeneSets.Rd +++ b/man/getGeneSets.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/getGeneSets.R \name{getGeneSets} \alias{getGeneSets} -\title{Get a collection of gene sets from the msigdb} +\title{Get a Collection of Gene Sets from MSigDB} \usage{ getGeneSets( species = c("Homo sapiens", "Mus musculus"), @@ -14,38 +14,41 @@ getGeneSets( ) } \arguments{ -\item{species}{`"Homo sapiens"` (default) or `"Mus musculus"`.} +\item{species}{Character. Species name. Either \code{"Homo sapiens"} +(default) or \code{"Mus musculus"}.} -\item{library}{Character. Optional vector of main collection codes -(e.g. `"H"`, `"C5"`).} +\item{library}{Character or \code{NULL}. Vector of main collection codes +(e.g., \code{"H"}, \code{"C5"}). Default is \code{NULL} (all collections).} -\item{subcategory}{Character. Optional vector of sub-collection codes -(e.g. `"GO:BP"`).} +\item{subcategory}{Character or \code{NULL}. Vector of sub-collection codes +(e.g., \code{"GO:BP"}). Default is \code{NULL} (all subcategories).} -\item{gene.sets}{Character. Optional vector of specific gene-set names.} +\item{gene.sets}{Character or \code{NULL}. Vector of specific gene-set +names. Default is \code{NULL} (all gene sets).} -\item{version}{MSigDB version (character, default `"7.4"`).} +\item{version}{Character. MSigDB version. Default is \code{"7.4"}.} -\item{id}{Identifier type (default `"SYM"` for symbols).} +\item{id}{Character. Identifier type. Default is \code{"SYM"} (gene +symbols).} } \value{ -A named `list` of character vectors (gene IDs). +A named list of character vectors (gene IDs). } \description{ -This function retrieves gene sets from msigdb and caches the downloaded object -for future calls. It allows subsetting by main collection (library), -subcollection, or specific gene sets, and only supports human -("Homo sapiens") and mouse ("Mus musculus"). +This function retrieves gene sets from MSigDB and caches the downloaded +object for future calls. It allows subsetting by main collection (library), +subcollection, or specific gene sets, and only supports human +(\code{"Homo sapiens"}) and mouse (\code{"Mus musculus"}). } \examples{ \dontrun{ # Get all hallmark gene sets from human. -gs <- getGeneSets(species = "Homo sapiens", +gs <- getGeneSets(species = "Homo sapiens", library = "H") # Get a subset based on main collection and subcollection. -gs <- getGeneSets(species = "Homo sapiens", - library = c("C2", "C5"), +gs <- getGeneSets(species = "Homo sapiens", + library = c("C2", "C5"), subcategory = "GO:BP") } diff --git a/man/geyserEnrichment.Rd b/man/geyserEnrichment.Rd index bd2f151..3e3588c 100644 --- a/man/geyserEnrichment.Rd +++ b/man/geyserEnrichment.Rd @@ -8,7 +8,7 @@ geyserEnrichment( input.data, assay = NULL, group.by = NULL, - gene.set, + gene.set.use, color.by = "group", order.by = NULL, scale = FALSE, @@ -19,58 +19,67 @@ geyserEnrichment( ) } \arguments{ -\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +\item{input.data}{Output of \code{\link{escape.matrix}} or a single-cell object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay holding enrichment scores when -`input.data` is a single‑cell object. Ignored otherwise.} +\item{assay}{Character. Name of the assay holding enrichment scores when +\code{input.data} is a single-cell object. Ignored otherwise.} -\item{group.by}{Metadata column plotted on the *x*‑axis. Defaults to the -Seurat/SCE `ident` slot when `NULL`.} +\item{group.by}{Character. Metadata column plotted on the x-axis. Defaults +to the Seurat/SCE \code{ident} slot when \code{NULL}.} -\item{gene.set}{Character(1). Gene‑set to plot (must exist in the -enrichment matrix).} +\item{gene.set.use}{Character. Name of the gene set to display.} -\item{color.by}{Aesthetic mapped to point color. Use either -*"group"* (default = `group.by`) for categorical coloring or the -*name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric -accepted.} +\item{color.by}{Character. Aesthetic mapped to point color. Options: +\itemize{ + \item \code{"group"} (default): Uses \code{group.by} for categorical + coloring. + \item \emph{gene-set name}: Use the same value as \code{gene.set.use} + to obtain a numeric gradient. + \item Any other metadata column present in the data. +}} -\item{order.by}{How to arrange the x‑axis: -*`"mean"`* – groups ordered by decreasing group mean; -*`"group"`* – natural sort of group labels; -*`NULL`* – keep original ordering.} +\item{order.by}{Character or \code{NULL}. How to arrange the x-axis: +\itemize{ + \item \code{"mean"}: Groups ordered by decreasing group mean. + \item \code{"group"}: Natural (alphanumeric) sort of group labels. + \item \code{NULL} (default): Keep original ordering. +}} -\item{scale}{Logical; if `TRUE` scores are centered/scaled (Z‑score) prior -to plotting.} +\item{scale}{Logical. If \code{TRUE}, scores are centered and scaled +(Z-score) prior to plotting. Default is \code{FALSE}.} -\item{facet.by}{Optional metadata column used to facet the plot.} +\item{facet.by}{Character or \code{NULL}. Metadata column used to facet +the plot.} -\item{summarise.by}{Optional metadata column used to summarise data.} +\item{summarise.by}{Character or \code{NULL}. Metadata column used to +summarise data before plotting.} -\item{summary.stat}{Optional method used to summarize expression within each -group defined by \code{summarise.by}. One of: \code{"mean"} (default), -\code{"median"}, \code{"max"}, \code{"sum"}, or \code{"geometric"}.} +\item{summary.stat}{Character. Method used to summarize expression within +each group defined by \code{summarise.by}. One of: \code{"mean"} +(default), \code{"median"}, \code{"max"}, \code{"sum"}, or +\code{"geometric"}.} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} } \value{ A \pkg{ggplot2} object. } \description{ -This function allows to the user to examine the distribution of +This function allows the user to examine the distribution of enrichment across groups by generating a geyser plot. } \examples{ gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) - + pbmc <- SeuratObject::pbmc_small |> runEscape(gene.sets = gs, min.size = NULL) -geyserEnrichment(pbmc, - assay = "escape", - gene.set = "Tcells") +geyserEnrichment(pbmc, + assay = "escape", + gene.set.use = "Tcells") } diff --git a/man/gseaEnrichment.Rd b/man/gseaEnrichment.Rd index a8ae66a..826e613 100644 --- a/man/gseaEnrichment.Rd +++ b/man/gseaEnrichment.Rd @@ -22,51 +22,58 @@ gseaEnrichment( \item{input.data}{A \link[SeuratObject]{Seurat} object or a \link[SingleCellExperiment]{SingleCellExperiment}.} -\item{gene.set.use}{Character(1). Name of the gene set to display.} +\item{gene.set.use}{Character. Name of the gene set to display.} \item{gene.sets}{A named list of character vectors, the result of -[getGeneSets()], or the built-in data object [escape.gene.sets].} +\code{\link{getGeneSets}}, or the built-in data object +\code{\link{escape.gene.sets}}.} -\item{group.by}{Metadata column. Defaults to the Seurat/SCE `ident` -slot when `NULL`.} +\item{group.by}{Character. Metadata column used for grouping. Defaults to +the Seurat/SCE \code{ident} slot when \code{NULL}.} -\item{summary.fun}{Method used to collapse expression within each -group **before** ranking: one of `"mean"` (default), `"median"`, `"max"`, -`"sum"`, or `"geometric"`.} +\item{summary.fun}{Character. Method used to collapse expression within +each group \strong{before} ranking. One of: \code{"mean"} (default), +\code{"median"}, \code{"max"}, \code{"sum"}, or \code{"geometric"}.} -\item{p}{Weighting exponent in the KS statistic (classical GSEA uses `p = 1`).} +\item{p}{Numeric. Weighting exponent in the KS statistic. Classical GSEA +uses \code{p = 1}. Default is \code{1}.} -\item{nperm}{Integer >= 0. Gene-label permutations per group (default 1000). -`0` value will skip NES/*p* calculation.} +\item{nperm}{Integer. Number of gene-label permutations per group. +Default is \code{1000}. Set to \code{0} to skip NES/p-value calculation.} -\item{rug.height}{Vertical spacing of the hit rug as a fraction of the -y-axis (default `0.02`).} +\item{rug.height}{Numeric. Vertical spacing of the hit rug as a fraction +of the y-axis. Default is \code{0.02}.} -\item{digits}{Number of decimal places displayed for ES in the -legend (default `2`).} +\item{digits}{Integer. Number of decimal places displayed for ES in the +legend. Default is \code{2}.} \item{BPPARAM}{A \pkg{BiocParallel} parameter object describing the -parallel backend.} +parallel backend. Default is \code{NULL} (serial execution).} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} } \value{ A single `patchwork`/`ggplot2` object } \description{ -Produces the familiar two-panel GSEA graphic—running enrichment score -(RES) plus a “hit” rug—for a **single gene-set** evaluated across -multiple biological groups (clusters, conditions, samples, ...). +Produces the familiar two-panel GSEA graphic: running enrichment score (RES) +plus a "hit" rug for a \strong{single gene-set} evaluated across multiple +biological groups (clusters, conditions, samples, etc.). +} +\section{Algorithm}{ + +Based on Subramanian \emph{et al.}, PNAS 2005: +\enumerate{ + \item Within every group, library-size-normalize counts to CPM. + \item Collapse gene expression with \code{summary.fun} (mean/median/etc.). + \item Rank genes (descending) to obtain one ordered list per group. + \item Compute the weighted Kolmogorov-Smirnov running score + (weight = |stat|^p). + \item ES = maximum signed deviation of the curve. } -\details{ -**Algorithm (Subramanian _et al._, PNAS 2005)** -1. Within every group, library-size-normalise counts to CPM. -2. Collapse gene expression with `summary.fun` (mean/median/…). -3. Rank genes (descending) to obtain one ordered list per group. -4. Compute the weighted Kolmogorov–Smirnov running score - (weight = \|stat\|^*p*). -5. ES = maximum signed deviation of the curve. } + \examples{ pbmc_small <- SeuratObject::pbmc_small diff --git a/man/heatmapEnrichment.Rd b/man/heatmapEnrichment.Rd index 722c4f2..4204c35 100644 --- a/man/heatmapEnrichment.Rd +++ b/man/heatmapEnrichment.Rd @@ -18,39 +18,45 @@ heatmapEnrichment( ) } \arguments{ -\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +\item{input.data}{Output of \code{\link{escape.matrix}} or a single-cell object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay holding enrichment scores when -`input.data` is a single‑cell object. Ignored otherwise.} +\item{assay}{Character. Name of the assay holding enrichment scores when +\code{input.data} is a single-cell object. Ignored otherwise.} -\item{group.by}{Metadata column plotted on the *x*‑axis. Defaults to the -Seurat/SCE `ident` slot when `NULL`.} +\item{group.by}{Character. Metadata column plotted on the x-axis. Defaults +to the Seurat/SCE \code{ident} slot when \code{NULL}.} -\item{gene.set.use}{Vector of gene‑set names to plot, or \code{"all"} -(default) to show every available gene set.} +\item{gene.set.use}{Character vector or \code{"all"}. Gene-set names to +plot. Use \code{"all"} (default) to show every available gene set.} -\item{cluster.rows, cluster.columns}{Logical; if \code{TRUE}, rows/columns -are ordered by Ward‑linkage hierarchical clustering (Euclidean distance).} +\item{cluster.rows}{Logical. If \code{TRUE}, rows are ordered by Ward-linkage +hierarchical clustering (Euclidean distance). Default is \code{FALSE}.} -\item{facet.by}{Optional metadata column used to facet the plot.} +\item{cluster.columns}{Logical. If \code{TRUE}, columns are ordered by +Ward-linkage hierarchical clustering (Euclidean distance). Default is +\code{FALSE}.} -\item{scale}{If \code{TRUE}, Z‑transforms each gene‑set column **after** -summarization.} +\item{facet.by}{Character or \code{NULL}. Metadata column used to facet +the plot.} -\item{summary.stat}{Optional method used to summarize expression within each -group. One of: \code{"mean"} (default), \code{"median"}, \code{"max"}, -\code{"sum"}, or \code{"geometric"}.} +\item{scale}{Logical. If \code{TRUE}, Z-transforms each gene-set column +\strong{after} summarization. Default is \code{FALSE}.} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{summary.stat}{Character. Method used to summarize expression within +each group. One of: \code{"mean"} (default), \code{"median"}, +\code{"max"}, \code{"sum"}, or \code{"geometric"}.} + +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} } \value{ A \code{ggplot2} object. } \description{ -This function allows to the user to examine the heatmap with the mean -enrichment values by group. The heatmap will have the gene sets as rows -and columns will be the grouping variable. +This function allows the user to examine a heatmap with the mean enrichment +values by group. The heatmap displays gene sets as rows and the grouping +variable as columns. } \examples{ gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), diff --git a/man/pcaEnrichment.Rd b/man/pcaEnrichment.Rd index 4e1e84c..9edd547 100644 --- a/man/pcaEnrichment.Rd +++ b/man/pcaEnrichment.Rd @@ -18,29 +18,47 @@ pcaEnrichment( ) } \arguments{ -\item{input.data}{Single‑cell object (Seurat / SCE) **or** the raw list -returned by [`performPCA()`].} +\item{input.data}{Single-cell object (Seurat / SCE) \strong{or} the raw list +returned by \code{\link{performPCA}}.} -\item{dimRed}{Name of the dimensional‑reduction slot to pull from a -single‑cell object. Ignored when `input.data` is the list output.} +\item{dimRed}{Character. Name of the dimensional-reduction slot to pull from +a single-cell object. Ignored when \code{input.data} is the list output.} -\item{x.axis, y.axis}{Character vectors naming the PCs to display (e.g. "PC1").} +\item{x.axis}{Character. Name of the principal component to display on the +x-axis (e.g., \code{"PC1"}). Default is \code{"PC1"}.} -\item{facet.by}{Metadata column to facet plot.} +\item{y.axis}{Character. Name of the principal component to display on the +y-axis (e.g., \code{"PC2"}). Default is \code{"PC2"}.} -\item{style}{"point" (default) or "hex".} +\item{facet.by}{Character or \code{NULL}. Metadata column used to facet +the plot.} -\item{add.percent.contribution}{Include percent variance explained in axis -labels.} +\item{style}{Character. Plot style. Options: +\itemize{ + \item \code{"point"} (default): Density-aware scatter plot. + \item \code{"hex"}: Hexagonal binning. +}} -\item{display.factors}{Draw arrows for the top gene‑set loadings.} +\item{add.percent.contribution}{Logical. If \code{TRUE}, include percent +variance explained in axis labels. Default is \code{TRUE}.} -\item{number.of.factors}{Integer; how many loadings to display if -`display.factors = TRUE`.} +\item{display.factors}{Logical. If \code{TRUE}, draw arrows for the top +gene-set loadings. Default is \code{FALSE}.} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +\item{number.of.factors}{Integer. Number of loadings to display when +\code{display.factors = TRUE}. Default is \code{10}.} -#' @examples +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} +} +\value{ +A **ggplot2** object. +} +\description{ +This function allows the user to examine the distribution of principal +components computed on the enrichment values. +} +\examples{ GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) pbmc_small <- SeuratObject::pbmc_small @@ -54,12 +72,6 @@ pbmc_small <- performPCA(pbmc_small, pcaEnrichment(pbmc_small, x.axis = "PC1", y.axis = "PC2", - dimRed = "escape.PCA")} -} -\value{ -A **ggplot2** object. -} -\description{ -This function allows to the user to examine the distribution -of principal components run on the enrichment values. + dimRed = "escape.PCA") + } diff --git a/man/performNormalization.Rd b/man/performNormalization.Rd index e784d5d..f1f08b5 100644 --- a/man/performNormalization.Rd +++ b/man/performNormalization.Rd @@ -15,39 +15,43 @@ performNormalization( ) } \arguments{ -\item{input.data}{raw‐counts matrix (`genes × cells`), a -\link[SeuratObject]{Seurat} object, or a +\item{input.data}{A raw-counts matrix (genes x cells), a +\link[SeuratObject]{Seurat} object, or a \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must -match those in `gene.sets`.} +match those in \code{gene.sets}.} -\item{enrichment.data}{Output of \code{\link{escape.matrix}} or a single‑cell -object previously processed by \code{\link{runEscape}}.} +\item{enrichment.data}{Matrix. Output of \code{\link{escape.matrix}} or +\code{NULL} if enrichment scores are already stored in \code{input.data}.} -\item{assay}{Name of the assay holding enrichment scores when -`input.data` is a single‑cell object. Ignored otherwise.} +\item{assay}{Character. Name of the assay holding enrichment scores when +\code{input.data} is a single-cell object. Default is \code{"escape"}. +Ignored otherwise.} \item{gene.sets}{A named list of character vectors, the result of -[getGeneSets()], or the built-in data object [escape.gene.sets]. -List names become column names in the result.} +\code{\link{getGeneSets}}, or the built-in data object +\code{\link{escape.gene.sets}}. List names must match column names in the +enrichment matrix.} -\item{make.positive}{Logical; if `TRUE` shifts each column so its minimum is -zero.} +\item{make.positive}{Logical. If \code{TRUE}, shifts each column so its +minimum is zero. Default is \code{FALSE}.} -\item{scale.factor}{Optional numeric vector overriding gene‑count scaling -(length = #cells). Use when you want external per‑cell normalization factors.} +\item{scale.factor}{Numeric vector or \code{NULL}. Optional per-cell scaling +factors (length = number of cells). Use when you want external per-cell +normalization factors. Default is \code{NULL} (compute from gene counts).} -\item{groups}{Integer >= 1. Number of cells per processing chunk. -Larger values reduce overhead but increase memory usage. Default **1000**.} +\item{groups}{Integer or \code{NULL}. Number of cells per processing chunk. +Larger values reduce overhead but increase memory usage. Default is +\code{NULL} (process all cells at once).} } \value{ If `input.data` is an object, the same object with a new assay "_normalized". Otherwise a matrix of normalized scores. } \description{ -Scales each enrichment value by the **number of genes from the set that are -expressed** in that cell (non‑zero counts). Optionally shifts results into a -positive range and/or applies a natural‑log transform for compatibility with -log‑based differential tests. +Scales each enrichment value by the \strong{number of genes from the set +that are expressed} in that cell (non-zero counts). Optionally shifts +results into a positive range and/or applies a natural-log transform for +compatibility with log-based differential tests. } \examples{ gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), diff --git a/man/performPCA.Rd b/man/performPCA.Rd index c0d3bcc..970bf8d 100644 --- a/man/performPCA.Rd +++ b/man/performPCA.Rd @@ -14,20 +14,25 @@ performPCA( ) } \arguments{ -\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +\item{input.data}{Output of \code{\link{escape.matrix}} or a single-cell object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay holding enrichment scores when -`input.data` is a single‑cell object. Ignored otherwise.} +\item{assay}{Character. Name of the assay holding enrichment scores when +\code{input.data} is a single-cell object. Default is \code{"escape"}. +Ignored otherwise.} -\item{scale}{Logical; if `TRUE` standardises each gene-set column -before PCA.} +\item{scale}{Logical. If \code{TRUE}, standardizes each gene-set column +before PCA. Default is \code{TRUE}.} -\item{n.dim}{Integer >= 1 or vector; the **largest** value sets the -number of principal components to compute / keep.} +\item{n.dim}{Integer. The number of principal components to compute and +keep. Default is \code{10}.} -\item{reduction.name, reduction.key}{Names used when writing back to a -Seurat / SCE object.} +\item{reduction.name}{Character. Name used for the dimensional reduction +slot when writing back to a Seurat/SCE object. Default is +\code{"escape.PCA"}.} + +\item{reduction.key}{Character. Key prefix for the dimensional reduction +when writing back to a Seurat/SCE object. Default is \code{"escPC_"}.} } \value{ *If* `input.data` is a single-cell object, the same object with a @@ -35,13 +40,13 @@ Seurat / SCE object.} `PCA`, `eigen_values`, `contribution`, and `rotation`. } \description{ -This function allows users to calculate the principal components -for the gene set enrichment values. For single-cell data, the PCA -will be stored with the dimensional reductions. If a matrix is used -as input, the output is a list for further plotting. Alternatively, -users can use functions for PCA calculations based on their desired -workflow in lieu of using \code{\link{performPCA}}, but will not be -compatible with downstream \code{\link{pcaEnrichment}} visualization. +This function allows users to calculate the principal components for the +gene set enrichment values. For single-cell data, the PCA will be stored +with the dimensional reductions. If a matrix is used as input, the output +is a list for further plotting. Alternatively, users can use functions for +PCA calculations based on their desired workflow in lieu of using +\code{\link{performPCA}}, but will not be compatible with downstream +\code{\link{pcaEnrichment}} visualization. } \examples{ gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), diff --git a/man/ridgeEnrichment.Rd b/man/ridgeEnrichment.Rd index b37cee1..6c01f14 100644 --- a/man/ridgeEnrichment.Rd +++ b/man/ridgeEnrichment.Rd @@ -18,41 +18,50 @@ ridgeEnrichment( ) } \arguments{ -\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +\item{input.data}{Output of \code{\link{escape.matrix}} or a single-cell object previously processed by \code{\link{runEscape}}.} -\item{gene.set.use}{Character(1). Name of the gene set to display.} +\item{gene.set.use}{Character. Name of the gene set to display.} -\item{assay}{Name of the assay holding enrichment scores when -`input.data` is a single‑cell object. Ignored otherwise.} +\item{assay}{Character. Name of the assay holding enrichment scores when +\code{input.data} is a single-cell object. Ignored otherwise.} -\item{group.by}{Metadata column plotted on the *y*‑axis. Defaults to the -Seurat/SCE `ident` slot when `NULL`.} +\item{group.by}{Character. Metadata column plotted on the y-axis. Defaults +to the Seurat/SCE \code{ident} slot when \code{NULL}.} -\item{color.by}{Aesthetic mapped to point color. Use either -*"group"* (default = `group.by`) for categorical coloring or the -*name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric -accepted.} +\item{color.by}{Character. Aesthetic mapped to fill color. Options: +\itemize{ + \item \code{"group"} (default): Uses \code{group.by} for categorical + coloring. + \item \emph{gene-set name}: Use the same value as \code{gene.set.use} + to obtain a numeric gradient. + \item Any other metadata column present in the data. +}} -\item{order.by}{How to arrange the x‑axis: -*`"mean"`* – groups ordered by decreasing group mean; -*`"group"`* – natural sort of group labels; -*`NULL`* – keep original ordering.} +\item{order.by}{Character or \code{NULL}. How to arrange the y-axis: +\itemize{ + \item \code{"mean"}: Groups ordered by decreasing group mean. + \item \code{"group"}: Natural (alphanumeric) sort of group labels. + \item \code{NULL} (default): Keep original ordering. +}} -\item{scale}{Logical; if `TRUE` scores are centred/scaled (Z‑score) prior -to plotting.} +\item{scale}{Logical. If \code{TRUE}, scores are centered and scaled +(Z-score) prior to plotting. Default is \code{FALSE}.} -\item{facet.by}{Optional metadata column used to facet the plot.} +\item{facet.by}{Character or \code{NULL}. Metadata column used to facet +the plot.} -\item{add.rug}{Logical. Draw per-cell tick marks underneath each ridge.} +\item{add.rug}{Logical. If \code{TRUE}, draw per-cell tick marks underneath +each ridge. Default is \code{FALSE}.} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} } \value{ A [ggplot2] object. } \description{ -This function allows to the user to examine the distribution of +This function allows the user to examine the distribution of enrichment across groups by generating a ridge plot. } \examples{ diff --git a/man/runEscape.Rd b/man/runEscape.Rd index 7c1be5c..fabf090 100644 --- a/man/runEscape.Rd +++ b/man/runEscape.Rd @@ -20,61 +20,64 @@ runEscape( ) } \arguments{ -\item{input.data}{A raw‐counts matrix (`genes × cells`), a +\item{input.data}{A raw-counts matrix (genes x cells), a \link[SeuratObject]{Seurat} object, or a \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must -match those in `gene.sets`.} +match those in \code{gene.sets}.} \item{gene.sets}{A named list of character vectors, the result of -[getGeneSets()], or the built-in data object [escape.gene.sets]. -List names become column names in the result.} +\code{\link{getGeneSets}}, or the built-in data object +\code{\link{escape.gene.sets}}. List names become column names in the +result.} -\item{method}{Scoring algorithm (case-insensitive). One of `"GSVA"`, -`"ssGSEA"`, `"UCell"`, or `"AUCell"`. Default **`"ssGSEA"`**.} +\item{method}{Character. Scoring algorithm (case-insensitive). One of +\code{"GSVA"}, \code{"ssGSEA"}, \code{"UCell"}, or \code{"AUCell"}. +Default is \code{"ssGSEA"}.} -\item{groups}{Integer >= 1. Number of cells per processing chunk. -Larger values reduce overhead but increase memory usage. Default **1000**.} +\item{groups}{Integer. Number of cells per processing chunk. Larger values +reduce overhead but increase memory usage. Default is \code{1000}.} -\item{min.size}{Minimum number of genes from a set that must be detected -in the expression matrix for that set to be scored. Default **5**. -Use `NULL` to disable filtering.} +\item{min.size}{Integer or \code{NULL}. Minimum number of genes from a set +that must be detected in the expression matrix for that set to be scored. +Default is \code{5}. Use \code{NULL} to disable filtering.} -\item{normalize}{Logical. If `TRUE`, the score matrix is passed to -[performNormalization()] (drop-out scaling and optional log transform). -Default **FALSE**.} +\item{normalize}{Logical. If \code{TRUE}, the score matrix is passed to +\code{\link{performNormalization}} (drop-out scaling and optional log +transform). Default is \code{FALSE}.} -\item{make.positive}{Logical. If `TRUE` *and* `normalize = TRUE`, shifts -every gene-set column so its global minimum is zero, facilitating -downstream log-ratio analyses. Default **FALSE**.} +\item{make.positive}{Logical. If \code{TRUE} \emph{and} +\code{normalize = TRUE}, shifts every gene-set column so its global +minimum is zero, facilitating downstream log-ratio analyses. Default is +\code{FALSE}.} \item{new.assay.name}{Character. Name for the assay that will store the -enrichment matrix in the returned object. Default **"escape"**.} +enrichment matrix in the returned object. Default is \code{"escape"}.} -\item{min.expr.cells}{Numeric. Gene-expression filter threshold (see -details above). Default **0** (no gene filtering).} +\item{min.expr.cells}{Numeric. Gene-expression filter threshold. Default is +\code{0} (no gene filtering).} -\item{min.filter.by}{Character or `NULL`. Column name in `meta.data` -(Seurat) or `colData` (SCE) defining groups within which the -`min.expr.cells` rule is applied. Default **`NULL`**.} +\item{min.filter.by}{Character or \code{NULL}. Column name in +\code{meta.data} (Seurat) or \code{colData} (SCE) defining groups within +which the \code{min.expr.cells} rule is applied. Default is \code{NULL}.} \item{BPPARAM}{A \pkg{BiocParallel} parameter object describing the -parallel backend.} +parallel backend. Default is \code{NULL} (serial execution).} -\item{...}{Extra arguments passed verbatim to the chosen back-end -scoring function (`gsva()`, `ScoreSignatures_UCell()`, or -`AUCell_calcAUC()`).} +\item{...}{Extra arguments passed verbatim to the chosen back-end scoring +function (\code{gsva()}, \code{ScoreSignatures_UCell()}, or +\code{AUCell_calcAUC()}).} } \value{ The input single-cell object with an additional assay containing the -enrichment scores (`cells × gene-sets`). Matrix orientation follows -standard single-cell conventions (gene-sets as rows inside the assay). + enrichment scores (cells x gene-sets). Matrix orientation follows standard + single-cell conventions (gene-sets as rows inside the assay). } \description{ -`runEscape()` is a convenience wrapper around [escape.matrix()] that -computes enrichment scores and inserts them as a new assay (default -`"escape"`) in a \pkg{Seurat} or \pkg{SingleCellExperiment} object. All -arguments (except `new.assay.name`) map directly to their counterparts in -`escape.matrix()`. +\code{runEscape()} is a convenience wrapper around \code{\link{escape.matrix}} +that computes enrichment scores and inserts them as a new assay (default +\code{"escape"}) in a \pkg{Seurat} or \pkg{SingleCellExperiment} object. All +arguments (except \code{new.assay.name}) map directly to their counterparts +in \code{escape.matrix()}. } \examples{ gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), @@ -90,9 +93,10 @@ sce <- runEscape(sce, } \seealso{ -[escape.matrix()] for the underlying computation, -[performNormalization()] to add normalized scores, [heatmapEnrichment()], -[ridgeEnrichment()] and related plotting helpers for visualization. +\code{\link{escape.matrix}} for the underlying computation; + \code{\link{performNormalization}} to add normalized scores; + \code{\link{heatmapEnrichment}}, \code{\link{ridgeEnrichment}}, and + related plotting helpers for visualization. } \author{ Nick Borcherding, Jared Andrews diff --git a/man/scatterEnrichment.Rd b/man/scatterEnrichment.Rd index d7983f0..b43e484 100644 --- a/man/scatterEnrichment.Rd +++ b/man/scatterEnrichment.Rd @@ -22,45 +22,63 @@ scatterEnrichment( ) } \arguments{ -\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +\item{input.data}{Output of \code{\link{escape.matrix}} or a single-cell object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay holding enrichment scores when -`input.data` is a single‑cell object. Ignored otherwise.} +\item{assay}{Character. Name of the assay holding enrichment scores when +\code{input.data} is a single-cell object. Ignored otherwise.} -\item{x.axis, y.axis}{Gene-set names to plot on the *x* and *y* axes.} +\item{x.axis}{Character. Gene-set name to plot on the x-axis.} -\item{facet.by}{Optional metadata column used to facet the plot.} +\item{y.axis}{Character. Gene-set name to plot on the y-axis.} -\item{group.by}{Metadata column plotted. Defaults to the -Seurat/SCE `ident` slot when `NULL`.} +\item{facet.by}{Character or \code{NULL}. Metadata column used to facet +the plot.} -\item{color.by}{Aesthetic mapped to point color. Use -`"density"` (default), `"group"`, `"x"`, or `"y"`. The latter two apply a -continuous gradient to the corresponding axis.} +\item{group.by}{Character. Metadata column used when \code{color.by = "group"}. +Defaults to the Seurat/SCE \code{ident} slot when \code{NULL}.} -\item{style}{`"point"` (density-aware points) or `"hex"` (hex-bin).} +\item{color.by}{Character. Aesthetic mapped to point color. Options: +\itemize{ + \item \code{"density"} (default): Shade points by local 2-D density. + \item \code{"group"}: Color by the \code{group.by} metadata column. + \item \code{"x"}: Apply a continuous gradient based on the x-axis values. + \item \code{"y"}: Apply a continuous gradient based on the y-axis values. +}} -\item{scale}{Logical; if `TRUE` scores are centered/scaled (Z‑score) prior -to plotting.} +\item{style}{Character. Plot style. Options: +\itemize{ + \item \code{"point"} (default): Density-aware scatter plot. + \item \code{"hex"}: Hexagonal binning. +}} -\item{bins}{Number of hex bins along each axis when `style = "hex"`.} +\item{scale}{Logical. If \code{TRUE}, scores are centered and scaled +(Z-score) prior to plotting. Default is \code{FALSE}.} -\item{point.size, alpha}{Aesthetic tweaks for `style = "point"`.} +\item{bins}{Integer. Number of hex bins along each axis when +\code{style = "hex"}. Default is \code{40}.} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{point.size}{Numeric. Point size for \code{style = "point"}. +Default is \code{1.2}.} -\item{add.corr}{Logical. Add Pearson and Spearman correlation -coefficients (top-left corner of the first facet).} +\item{alpha}{Numeric. Transparency for points or hexbins. +Default is \code{0.8}.} + +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} + +\item{add.corr}{Logical. If \code{TRUE}, add Pearson and Spearman +correlation coefficients to the plot (top-left corner). Default is +\code{FALSE}.} } \value{ A \pkg{ggplot2} object. } \description{ -Visualize the relationship between *two* enrichment scores at single-cell -resolution. By default points are shaded by local 2-D density -(`color.by = "density"`), but users can instead color by a metadata column -(discrete) or by the raw gene-set scores themselves (continuous). +Visualize the relationship between two enrichment scores at single-cell +resolution. By default, points are shaded by local 2-D density +(\code{color.by = "density"}), but users can instead color by a metadata +column (discrete) or by the raw gene-set scores themselves (continuous). } \examples{ gs <- list( diff --git a/man/splitEnrichment.Rd b/man/splitEnrichment.Rd index 614a584..b235a5e 100644 --- a/man/splitEnrichment.Rd +++ b/man/splitEnrichment.Rd @@ -17,40 +17,46 @@ splitEnrichment( ) } \arguments{ -\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +\item{input.data}{Output of \code{\link{escape.matrix}} or a single-cell object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay holding enrichment scores when -`input.data` is a single‑cell object. Ignored otherwise.} +\item{assay}{Character. Name of the assay holding enrichment scores when +\code{input.data} is a single-cell object. Ignored otherwise.} -\item{split.by}{A metadata column used to split or color violins. Must contain -at least two levels. If it contains more than two, dodged violins are used.} +\item{split.by}{Character. Metadata column used to split or color violins. +Must contain at least two levels. If more than two levels are present, +dodged violins are used instead of split violins.} -\item{group.by}{Metadata column plotted on the *x*‑axis. Defaults to the -Seurat/SCE `ident` slot when `NULL`.} +\item{group.by}{Character. Metadata column plotted on the x-axis. Defaults +to the Seurat/SCE \code{ident} slot when \code{NULL}.} -\item{gene.set.use}{Character(1). Name of the gene set to display.} +\item{gene.set.use}{Character. Name of the gene set to display.} -\item{order.by}{How to arrange the x‑axis: -*`"mean"`* – groups ordered by decreasing group mean; -*`"group"`* – natural sort of group labels; -*`NULL`* – keep original ordering.} +\item{order.by}{Character or \code{NULL}. How to arrange the x-axis: +\itemize{ + \item \code{"mean"}: Groups ordered by decreasing group mean. + \item \code{"group"}: Natural (alphanumeric) sort of group labels. + \item \code{NULL} (default): Keep original ordering. +}} -\item{facet.by}{Optional metadata column used to facet the plot.} +\item{facet.by}{Character or \code{NULL}. Metadata column used to facet +the plot.} -\item{scale}{Logical; if `TRUE` scores are centred/scaled (Z‑score) prior -to plotting.} +\item{scale}{Logical. If \code{TRUE}, scores are centered and scaled +(Z-score) prior to plotting. Default is \code{TRUE}.} -\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +\item{palette}{Character. Color palette name from +\code{\link[grDevices]{hcl.pals}}. Default is \code{"inferno"}.} } \value{ A [ggplot2] object. } \description{ Visualize the distribution of gene set enrichment scores across groups using -violin plots. When `split.by` contains exactly two levels, the function draws -split violins for easy group comparison within each `group.by` category. If -`split.by` has more than two levels, standard dodged violins are drawn instead. +violin plots. When \code{split.by} contains exactly two levels, the function +draws split violins for easy group comparison within each \code{group.by} +category. If \code{split.by} has more than two levels, standard dodged +violins are drawn instead. } \examples{ gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), diff --git a/vignettes/escape.Rmd b/vignettes/escape.Rmd index cc954e5..4350d5b 100644 --- a/vignettes/escape.Rmd +++ b/vignettes/escape.Rmd @@ -332,54 +332,54 @@ heatmapEnrichment(sce.pbmc, We can also focus on individual gene sets - one approach is to use ```geyserEnrichment()```. Here individual cells are plotted along the Y-axis with graphical summary where the central dot refers to the median enrichment value and the thicker/thinner lines demonstrate the interval summaries referring to the 66% and 95%. ```{r tidy=FALSE} -geyserEnrichment(pbmc_small, +geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "T1-Interferon") + gene.set.use = "T1-Interferon") ``` To show the additional parameters that appear in visualizations of individual enrichment gene sets - we can reorder the groups by the mean of the gene set using **order.by** = "mean". ```{r tidy=FALSE} -geyserEnrichment(pbmc_small, +geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "T1-Interferon", + gene.set.use = "T1-Interferon", order.by = "mean") ``` -What if we had 2 separate samples or groups within the data? Another parameter we can use is **facet.by** to allow for direct visualization of an additional variable. +What if we had 2 separate samples or groups within the data? Another parameter we can use is **facet.by** to allow for direct visualization of an additional variable. ```{r tidy=FALSE} -geyserEnrichment(pbmc_small, +geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "T1-Interferon", + gene.set.use = "T1-Interferon", facet.by = "groups") ``` Lastly, we can select the way the color is applied to the plot using the **color.by** parameter. Here we can set it to the gene set of interest *"HALLMARK-INTERFERON-GAMMA-RESPONSE"*. ```{r tidy=FALSE} -geyserEnrichment(pbmc_small, +geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "T1-Interferon", + gene.set.use = "T1-Interferon", color.by = "T1-Interferon") ``` ## ridgeEnrichment -Similar to the ```geyserEnrichment()``` the ```ridgeEnrichment()``` can display the distribution of enrichment values across the selected gene set. The central line is at the median value for the respective grouping. +Similar to the ```geyserEnrichment()``` the ```ridgeEnrichment()``` can display the distribution of enrichment values across the selected gene set. The central line is at the median value for the respective grouping. ```{r tidy=FALSE} -ridgeEnrichment(sce.pbmc, +ridgeEnrichment(sce.pbmc, assay = "escape.UCell", - gene.set = "T2_Interferon") + gene.set.use = "T2_Interferon") ``` We can get the relative position of individual cells along the x-axis using the **add.rug** parameter. ```{r tidy=FALSE} -ridgeEnrichment(sce.pbmc, +ridgeEnrichment(sce.pbmc, assay = "escape.UCell", - gene.set = "T2_Interferon", + gene.set.use = "T2_Interferon", add.rug = TRUE, scale = TRUE) ``` @@ -389,19 +389,19 @@ ridgeEnrichment(sce.pbmc, Another distribution visualization is a violin plot, which we separate and directly compare using a binary classification. Like ```ridgeEnrichment()```, this allows for greater use of categorical variables. For ```splitEnrichment()```, the output will be two halves of a violin plot based on the **split.by** parameter with a central boxplot with the relative distribution across all samples. ```{r tidy=FALSE} -splitEnrichment(pbmc_small, +splitEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "Lipid-mediators", + gene.set.use = "Lipid-mediators", split.by = "groups") ``` If selecting a **split.by** variable with more than 2 levels, ```splitEnrichment()``` will convert the violin plots to dodge. ```{r tidy=FALSE} -splitEnrichment(pbmc_small, +splitEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "Lipid-mediators", - split.by = "ident", + gene.set.use = "Lipid-mediators", + split.by = "ident", group.by = "groups") ```