Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/famous-hounds-dress.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'@platforma-open/milaboratories.dimensionality-reduction.workflow': minor
'@platforma-open/milaboratories.dimensionality-reduction': minor
---

Enable block deduplication and improve trace label
4 changes: 2 additions & 2 deletions block/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
"title": "Dimensionality Reduction",
"logo": "file:../logos/block-logo.png",
"url": "https://github.com/platforma-open/dimensionality-reduction",
"docs": "https://github.com/platforma-open/dimensionality-reduction/docs",
"docs": "https://docs.platforma.bio/guides/sc-rna-seq-analysis/dimensionality-reduction/",
"support": "mailto:support@milaboratories.com",
"description": "Takes single cell count matrix as input, performs dimensionality reduction using UMAP and tSNE methods.",
"description": "Performs dimensionality reduction on single-cell RNA-seq count matrices using PCA, t-SNE, and UMAP, with optional Harmony batch correction.",
"longDescription": "file:../docs/description.md",
"changelog": "file:../CHANGELOG.md",
"tags": [
Expand Down
18 changes: 17 additions & 1 deletion docs/description.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# Overview

Takes high-dimensional gene expression data and transforms it into a lower-dimensional space while preserving the most important biological variation. The block takes the output from any scRNA-seq preprocessing block (e.g., Cell Ranger) as input. It generates plots with tSNE and UMAP projections to aid dataset exploration and outputs dimension values to be used by downstream blocks (e.g. Cell Browser).
Takes high-dimensional gene expression data from scRNA-seq preprocessing blocks (e.g., Cell Ranger) and transforms it into a lower-dimensional space while preserving biological variation using three complementary methods. Principal Component Analysis (PCA) reduces the data to a configurable number of principal components capturing major sources of variation. t-distributed Stochastic Neighbor Embedding (t-SNE) creates a two-dimensional embedding optimized for local structure preservation, while Uniform Manifold Approximation and Projection (UMAP) generates a three-dimensional embedding balancing local and global structure. Both t-SNE and UMAP use adaptive parameters that adjust based on dataset size, and all methods operate on the PCA space for computational efficiency.

When metadata covariates are provided, the block optionally performs batch correction using Harmony, which integrates cells across batches while preserving biological variation. Harmony correction is applied to the PCA space, and both UMAP and t-SNE embeddings are recomputed using the corrected principal components.

The resulting dimension values are used by downstream blocks: PCA components for clustering and pseudotime inference, and UMAP/t-SNE embeddings for visualization. The block provides both standard and batch-corrected embeddings for comparison.

The block uses scanpy v1.10.1 for dimensionality reduction algorithms and preprocessing. When using this block in your research, cite the scanpy publication (Wolf et al. 2018) listed below.

The following publications describe the methodologies used:

> Wolf, F. A., Angerer, P., & Theis, F. J. (2018). SCANPY: large-scale single-cell gene expression data analysis. _Genome Biology_ **19**, 15 (2018). [https://doi.org/10.1186/s13059-017-1382-0](https://doi.org/10.1186/s13059-017-1382-0)

> McInnes, L., Healy, J., & Melville, J. (2018). UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction. _Journal of Open Source Software_ **3**(29), 861. [https://doi.org/10.21105/joss.00861](https://doi.org/10.21105/joss.00861)

> van der Maaten, L., & Hinton, G. (2008). Visualizing data using t-SNE. _Journal of Machine Learning Research_ **9**, 2579-2605. [http://www.jmlr.org/papers/v9/vandermaaten08a.html](http://www.jmlr.org/papers/v9/vandermaaten08a.html)

> Korsunsky, I., Millard, N., Fan, J. et al. (2019). Fast, sensitive and accurate integration of single-cell data with Harmony. _Nature Methods_ **16**, 1289–1296 (2019). [https://doi.org/10.1038/s41592-019-0619-0](https://doi.org/10.1038/s41592-019-0619-0)
661 changes: 361 additions & 300 deletions pnpm-lock.yaml

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions pnpm-workspace.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ packages:
- block

catalog:
"@platforma-sdk/model": ^1.45.0
"@platforma-sdk/ui-vue": ^1.45.5
"@platforma-sdk/workflow-tengo": ^5.5.9
"@platforma-sdk/model": ^1.45.23
"@platforma-sdk/ui-vue": ^1.45.23
"@platforma-sdk/workflow-tengo": ^5.5.16
"@platforma-sdk/block-tools": ^2.6.16
"@platforma-sdk/test": ^1.45.11
"@platforma-sdk/tengo-builder": ^2.3.3
"@platforma-sdk/test": ^1.45.25
"@platforma-sdk/tengo-builder": ^2.3.4
"@platforma-sdk/package-builder": ^3.10.7
"@platforma-sdk/blocks-deps-updater": ^2.0.0
"@milaboratories/graph-maker": ^1.1.175
Expand Down
41 changes: 41 additions & 0 deletions workflow/src/batch-correction.tpl.tengo
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
self := import("@platforma-sdk/workflow-tengo:tpl")
exec := import("@platforma-sdk/workflow-tengo:exec")
assets := import("@platforma-sdk/workflow-tengo:assets")

self.defineOutputs("umapDimensionsCsv", "tsneDimensionsCsv", "harmonyResultsCsv")

self.body(func(args) {
// Input parameters
csvCounts := args.csvCounts
csvCovariates := args.csvCovariates
mem := args.mem
cpu := args.cpu

// Batch correction software execution
batchCorrection := exec.builder().
software(assets.importSoftware("@platforma-open/milaboratories.dimensionality-reduction.software:calculate-batchCorrection")).
mem(mem).
cpu(cpu).
addFile("rawCounts.csv", csvCounts).
addFile("metadata.csv", csvCovariates).
arg("--counts").arg("rawCounts.csv").
arg("--metadata").arg("metadata.csv").
arg("--output").arg(".").
saveFile("umap_dimensions.csv").
saveFile("tsne_dimensions.csv").
saveFile("harmony_results.csv").
cache(24 * 60 * 60 * 1000).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The cache duration is hardcoded as a magic number. To improve readability and maintainability, it's better to define this value as a named constant at the top of the file.

For example:

cacheDuration := 24 * 60 * 60 * 1000 // 24 hours

Then, you can use this constant here: cache(cacheDuration).

run()

// Get result files
umapDimensionsCsv := batchCorrection.getFile("umap_dimensions.csv")
tsneDimensionsCsv := batchCorrection.getFile("tsne_dimensions.csv")
harmonyResultsCsv := batchCorrection.getFile("harmony_results.csv")

return {
umapDimensionsCsv: umapDimensionsCsv,
tsneDimensionsCsv: tsneDimensionsCsv,
harmonyResultsCsv: harmonyResultsCsv
}
})

42 changes: 42 additions & 0 deletions workflow/src/dim-reduction-calculation.tpl.tengo
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
self := import("@platforma-sdk/workflow-tengo:tpl")
exec := import("@platforma-sdk/workflow-tengo:exec")
assets := import("@platforma-sdk/workflow-tengo:assets")

self.defineOutputs("umapResultsCsv", "tsneResultsCsv", "pcaResultsCsv")

self.body(func(args) {
// Input parameters
csvCounts := args.csvCounts
nPCs := args.nPCs
nNeighbors := args.nNeighbors
mem := args.mem
cpu := args.cpu

// Dimensionality reduction software execution
dimReduction := exec.builder().
software(assets.importSoftware("@platforma-open/milaboratories.dimensionality-reduction.software:calculate-dimRed")).
mem(mem).
cpu(cpu).
addFile("rawCounts.csv", csvCounts).
arg("--file_path").arg("rawCounts.csv").
arg("--output_dir").arg(".").
arg("--n_pcs").arg(string(nPCs)).
arg("--n_neighbors").arg(string(nNeighbors)).
saveFile("umap_results.csv").
saveFile("tsne_results.csv").
saveFile("pca_results.csv").
cache(24 * 60 * 60 * 1000).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The cache duration is hardcoded as a magic number. To improve readability and maintainability, it's better to define this value as a named constant at the top of the file.

For example:

cacheDuration := 24 * 60 * 60 * 1000 // 24 hours

Then, you can use this constant here: cache(cacheDuration).

run()

// Get result files
umapResultsCsv := dimReduction.getFile("umap_results.csv")
tsneResultsCsv := dimReduction.getFile("tsne_results.csv")
pcaResultsCsv := dimReduction.getFile("pca_results.csv")

return {
umapResultsCsv: umapResultsCsv,
tsneResultsCsv: tsneResultsCsv,
pcaResultsCsv: pcaResultsCsv
}
})

78 changes: 25 additions & 53 deletions workflow/src/main.tpl.tengo
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
wf := import("@platforma-sdk/workflow-tengo:workflow")
exec := import("@platforma-sdk/workflow-tengo:exec")
assets:= import("@platforma-sdk/workflow-tengo:assets")
render := import("@platforma-sdk/workflow-tengo:render")
xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
pframes := import("@platforma-sdk/workflow-tengo:pframes")
pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
Expand All @@ -15,6 +15,9 @@ pfPCABatchConv := import(":pf-pca-batch-conv")
// pfRawCountsConv := import(":pf-counts-conv")
// pfNormCountsConv := import(":pf-norm-counts-conv")

dimReductionTpl := assets.importTemplate(":dim-reduction-calculation")
batchCorrectionTpl := assets.importTemplate(":batch-correction")

wf.prepare(func(args){
metaRefs := {}
i := 0
Expand Down Expand Up @@ -46,31 +49,22 @@ wf.body(func(args) {
csvCounts := xsv.exportFrame([rawCounts], "csv", { mem: defaultConvMem, cpu: defaultConvCpu })

// Always run regular dimensionality reduction
dimReduction := exec.builder().
software(assets.importSoftware("@platforma-open/milaboratories.dimensionality-reduction.software:calculate-dimRed")).
mem("32GiB").
cpu(16).
addFile("rawCounts.csv", csvCounts).
arg("--file_path").arg("rawCounts.csv").
arg("--output_dir").arg(".").
arg("--n_pcs").arg(string(nPCs)).
arg("--n_neighbors").arg(string(nNeighbors)).
saveFile("umap_results.csv").
saveFile("tsne_results.csv").
saveFile("pca_results.csv").
printErrStreamToStdout().
saveStdoutContent().
cache(24 * 60 * 60 * 1000).
run()
dimReductionRender := render.create(dimReductionTpl, {
csvCounts: csvCounts,
nPCs: nPCs,
nNeighbors: nNeighbors,
mem: "32GiB",
cpu: 16
Comment on lines +56 to +57

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The memory (32GiB) and CPU (16) resources are hardcoded here and also in the batchCorrectionRender block. To improve maintainability and avoid duplication, consider defining these as constants at the top of the file and reusing them in both places.

For example:

// At the top of the file
defaultExecMem := "32GiB"
defaultExecCpu := 16

// ... then use them in the render blocks
mem: defaultExecMem,
cpu: defaultExecCpu

})

UMAPDimImportParams := pfUMAPConv.getColumns(blockId, inputSpec)
UMAPPf := xsv.importFile(dimReduction.getFile("umap_results.csv"), "csv", UMAPDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})
UMAPPf := xsv.importFile(dimReductionRender.output("umapResultsCsv"), "csv", UMAPDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})

tSNEDimImportParams := pfTSNEConv.getColumns(blockId, inputSpec)
tSNEPf := xsv.importFile(dimReduction.getFile("tsne_results.csv"), "csv", tSNEDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})
tSNEPf := xsv.importFile(dimReductionRender.output("tsneResultsCsv"), "csv", tSNEDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})

PCADimImportParams := pfPCAConv.getColumns(blockId, inputSpec)
PCAPf := xsv.importFile(dimReduction.getFile("pca_results.csv"), "csv", PCADimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})
PCAPf := xsv.importFile(dimReductionRender.output("pcaResultsCsv"), "csv", PCADimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})

// Conditionally run batch correction if covariates are provided
UMAPHarmonyPf := undefined
Expand All @@ -85,55 +79,38 @@ wf.body(func(args) {

csvCovariates := xsv.exportFrame(covariates, "csv", {})

batchCorrection := exec.builder().
software(assets.importSoftware("@platforma-open/milaboratories.dimensionality-reduction.software:calculate-batchCorrection")).
mem("32GiB").
cpu(16).
addFile("rawCounts.csv", csvCounts).
addFile("metadata.csv", csvCovariates).
arg("--counts").arg("rawCounts.csv").
arg("--metadata").arg("metadata.csv").
arg("--output").arg(".").
saveFile("umap_dimensions.csv").
saveFile("tsne_dimensions.csv").
// saveFile("batch_corrected_counts.csv").
// saveFile("batch_corrected_normalized_counts.csv").
saveFile("harmony_results.csv").
printErrStreamToStdout().
saveStdoutContent().
cache(24 * 60 * 60 * 1000).
run()
batchCorrectionRender := render.create(batchCorrectionTpl, {
csvCounts: csvCounts,
csvCovariates: csvCovariates,
mem: "32GiB",
cpu: 16
})

// Process harmony-corrected results
UMAPHarmonyDimImportParams := pfUMAPBatchConv.getColumns(blockId, inputSpec)
UMAPHarmonyPf = xsv.importFile(batchCorrection.getFile("umap_dimensions.csv"), "csv", UMAPHarmonyDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})
UMAPHarmonyPf = xsv.importFile(batchCorrectionRender.output("umapDimensionsCsv"), "csv", UMAPHarmonyDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})

tSNEHarmonyDimImportParams := pfTSNEBatchConv.getColumns(blockId, inputSpec)
tSNEHarmonyPf = xsv.importFile(batchCorrection.getFile("tsne_dimensions.csv"), "csv", tSNEHarmonyDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})
tSNEHarmonyPf = xsv.importFile(batchCorrectionRender.output("tsneDimensionsCsv"), "csv", tSNEHarmonyDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})

// batchCorrectedCountsImportParams := pfRawCountsConv.getColumns(blockId, inputSpec, species)
// batchCorrectedCountsPf := xsv.importFile(batchCorrection.getFile("batch_corrected_counts.csv"), "csv", batchCorrectedCountsImportParams)

PCAHarmonyDimImportParams := pfPCABatchConv.getColumns(blockId, inputSpec)
PCAHarmonyPf = xsv.importFile(batchCorrection.getFile("harmony_results.csv"), "csv", PCAHarmonyDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})
PCAHarmonyPf = xsv.importFile(batchCorrectionRender.output("harmonyResultsCsv"), "csv", PCAHarmonyDimImportParams, {splitDataAndSpec: true, mem: defaultConvMem, cpu: defaultConvCpu})

// batchCorrectedNormalizedCountsImportParams := pfNormCountsConv.getColumns(blockId, inputSpec, species)
// batchCorrectedNormalizedCountsPf := xsv.importFile(batchCorrection.getFile("batch_corrected_normalized_counts.csv"), "csv", batchCorrectedNormalizedCountsImportParams)
}

// Make trace with informative label
traceLabel := "(nPCs:" + string(nPCs) + ", nNeighbors:" + string(nNeighbors) + ")"
traceLabel := "Dimensionality Reduction (nPCs:" + string(nPCs) + ", nNeighbors:" + string(nNeighbors) + ")"

// Make trace
trace := pSpec.makeTrace(inputSpec,
{
type: "milaboratories.dimensionality-reduction",
id: blockId, importance: 35,
label: "Dimensionality Reduction"
},
{
type: "milaboratories.dimensionality-reduction",
id: blockId, importance: 30,
label: traceLabel
}
)
Expand All @@ -142,12 +119,7 @@ wf.body(func(args) {
{
type: "milaboratories.dimensionality-reduction",
id: blockId, importance: 35,
label: "Batch Corrected Dimensionality Reduction"
},
{
type: "milaboratories.dimensionality-reduction",
id: blockId, importance: 30,
label: traceLabel
label: "Batch Corrected " + traceLabel
}
)

Expand Down