diff --git a/.BBSoptions b/.BBSoptions
new file mode 100644
index 0000000..c51a3fc
--- /dev/null
+++ b/.BBSoptions
@@ -0,0 +1 @@
+UnsupportedPlatforms: win32
\ No newline at end of file
diff --git a/.Rbuildignore b/.Rbuildignore
index 989c4b9..d809f26 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,3 +9,9 @@
^__autograph_generated_filezt06eymn\.py$
^\.github$
^LICENSE\.md$
+^\.devcontainer
+^\.trigger_build$
+^Dockerfile$
+^dev
+^\.BBSoptions$
+^\.vscode
\ No newline at end of file
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..37091c3
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,13 @@
+{
+ "name": "Ibex devcontainer",
+ "shutdownAction": "none",
+ "build": {
+ "context": "..",
+ "dockerfile": "../Dockerfile"
+ },
+ "runArgs": [
+ // to use GPUs in container uncomment next line
+ // "--gpus=all",
+ "--name=ibex-devbox"
+ ]
+}
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 32c3e56..ccce637 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -2,9 +2,9 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
- branches: [main, master]
+ branches: [main, devel]
pull_request:
- branches: [main, master]
+ branches: [main, devel]
name: R-CMD-check
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 9ee1f3d..d0a0796 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -2,9 +2,9 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
- branches: [main, master]
+ branches: [main, devel]
pull_request:
- branches: [main, master]
+ branches: [main, devel]
name: test-coverage
diff --git a/.gitignore b/.gitignore
index c67ac69..df54527 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
docs
.DS_Store
dev/
+.vscode/
diff --git a/.markdownlint.json b/.markdownlint.json
new file mode 100644
index 0000000..cc51b65
--- /dev/null
+++ b/.markdownlint.json
@@ -0,0 +1,7 @@
+{
+ "MD007" : { "indent": 4 },
+ "MD013": false,
+ "MD024": false,
+ "MD025": false,
+ "MD033": false
+}
diff --git a/DESCRIPTION b/DESCRIPTION
index 8db19a6..ae66fcd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,13 +1,14 @@
Package: Ibex
Title: Methods for BCR single-cell embedding
-Version: 0.99.33
+Version: 1.0.0.9000
Authors@R: c(
- person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"))
+ person(given = "Nick", family = "Borcherding", role = c("aut", "cre", "cph"), email = "ncborch@gmail.com"),
+ person(given = "Qile", family = "Yang", role = "ctb", email = "qile.yang@berkeley.edu", comment = c(ORCID = "0009-0005-0148-2499")))
Description: Implementation of the Ibex algorithm for single-cell embedding based on BCR sequences. The package includes a standalone function to encode BCR sequence information by amino acid properties or sequence order using tensorflow-based autoencoder. In addition, the package interacts with SingleCellExperiment or Seurat data objects.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: false
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing
Depends:
R (>= 4.5.0)
@@ -17,7 +18,6 @@ Imports:
methods,
Matrix,
reticulate (>= 1.43.0),
- rlang,
SeuratObject,
scRepertoire,
SingleCellExperiment,
diff --git a/DockerFile b/DockerFile
new file mode 100644
index 0000000..ff09f8c
--- /dev/null
+++ b/DockerFile
@@ -0,0 +1,8 @@
+FROM rocker/verse:4.5.1
+RUN apt-get update && apt-get install -y texlive-fonts-extra
+WORKDIR /Ibex
+COPY . .
+RUN apt install -y libgsl-dev
+RUN Rscript -e "install.packages('Seurat')"
+RUN Rscript -e "remotes::install_deps(dependencies = TRUE)"
+RUN Rscript -e "devtools::test()"
diff --git a/NAMESPACE b/NAMESPACE
index 9bf1cfd..a7da0e9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,7 +4,6 @@ export(CoNGAfy)
export(Ibex_matrix)
export(combineExpandedBCR)
export(runIbex)
-import(basilisk)
importFrom(Matrix,colSums)
importFrom(Matrix,sparse.model.matrix)
importFrom(SeuratObject,CreateAssayObject)
@@ -30,7 +29,6 @@ importFrom(immApex,onehotEncoder)
importFrom(immApex,propertyEncoder)
importFrom(methods,is)
importFrom(methods,slot)
-importFrom(rlang,"%||%")
importFrom(scRepertoire,combineBCR)
importFrom(stats,as.formula)
importFrom(stats,dist)
diff --git a/NEWS b/NEWS
index 82536fc..5a20b98 100644
--- a/NEWS
+++ b/NEWS
@@ -1,124 +1,173 @@
-CHANGES IN VERSION 0.99.31
-------------------------
-* ibex_ensure_basilisk_external_dir no longer importsFrom basilisk.utils directly
-
-CHANGES IN VERSION 0.99.30
-------------------------
-* Moved data processing script out of vignette to inst/scripts
-* Added ibex_ensure_basilisk_external_dir with basilisk.utils
-
-CHANGES IN VERSION 0.99.29
-------------------------
-* Adding internal .OnLoad() function to handle basilisk lock dir issue
-
-CHANGES IN VERSION 0.99.28
-------------------------
-* Deprecated `quietBCRgenes()`
-* Converted `Ibex.matrix()` to `Ibex_matrix()`
-* Added Install Instructions for BioCondcutor on README and Vignette
-* Removed references to Keras3 Installation
-* Removed LazyData TRUE statement
-
-CHANGES IN VERSION 0.99.10
-------------------------
-* Added information to example data
-
-CHANGES IN VERSION 0.99.9
-------------------------
-* Examples now check if python is installed and running
-
-CHANGES IN VERSION 0.99.8
-------------------------
-* Updated example data to 2k HEL BEAM-Ab from 10x
-* Converted ibex_example into SCE object for compliance
-* Large revision of vignette to fit new data/format
-* Added species argument to runIbex
-* Updated CoNGA handling of assay for Seurat and Single-Cell Objects.
-
-CHANGES IN VERSION 0.99.7
-------------------------
-* Integration of Ibex with immApex
-* Updated Seurat object to v5
-* Updated support for SCE format for ```runIbex()```
-* Update ```CoNGAfy()``` to function with all versions of Seurat
-* Updated ```quietBCRgenes()``` to use VariableFeatures() call for SeuratV5 and backward compatibility.
-* Add ```getHumanIgPseudoGenes()``` to return a list of human Immunoglobulin Pseudo genes that are kept by ```quietBCRgenes()```
+# Ibex Development Version
+
+- removed rlang from Imports
+- Reformatted `NEWS` and `README.md`
+- As per `basilisk` documentation:
+ - Add `.BBSoptions` with `UnsupportedPlatforms: win32`
+ - Add `configure` and `configure.win` scripts
+- Add Docker infrastructure with `Dockerfile` and `.devcontainer/devcontainer.json`
+- Add `.markdownlint.json` to sourcecode
+- Improved testthat compatibility across platforms
+
+# Ibex 1.0.0
+
+## Major Underlying Changes
+
+- Integration of Ibex with immApex
+- Updated Seurat object to v5
+- Runs using basilisk instead of reticulate - no installation of python packages
+
+## Feature Changes
+
+- Renamed `Ibex.matrix()` to `Ibex_matrix()`
+- Updated support for SCE format for `runIbex()`
+- Update `CoNGAfy()` to function with all versions of Seurat
+- Updated `quietBCRgenes()` to use `VariableFeatures()` call for SeuratV5 and backward compatibility.
+- Add `getHumanIgPseudoGenes()` to return a list of human Immunoglobulin Pseudo genes that are kept by `quietBCRgenes()`
+
+## New Models
+
+- Added New Light and Heavy Chain Models
+- Encoding methods now accepted: "OHE", "atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM","tScales", "zScales"
+- Sequence input:
+ - Human Heavy: 10000000
+ - Human Light: 5000000
+ - Human Heavy-Expanded: 5000000
+ - Human Light-Expanded: 2500000
+ - Mouse Heavy: 5000000
+ - Mouse Heavy-Expanded: 5000000
+- Trained convolutional and variational autoencoders for Heavy/Light chains
+ - Architecture: 512-256-128-256-512
+ - Parameters:
+ - Batch Size = 128
+ - Latent Dimensions = 128
+ - Epochs = 100
+ - Loss = Mean Squared Error (CNN) & KL Divergence (VAE)
+ - Activation = relu
+ - Learning rate = 1e-6
+ - Optimizers: Adam
+ - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
+ - CNN autoencoders have batch normalization layers between the dense layers.
+
+# Ibex 0.99.31
+
+- ibex_ensure_basilisk_external_dir no longer importsFrom basilisk.utils directly
+
+# Ibex 0.99.30
+
+- Moved data processing script out of vignette to inst/scripts
+- Added ibex_ensure_basilisk_external_dir with basilisk.utils
+
+# Ibex 0.99.29
+
+- Adding internal .OnLoad() function to handle basilisk lock dir issue
+
+# Ibex 0.99.28
+
+- Deprecated `quietBCRgenes()`
+- Converted `Ibex.matrix()` to `Ibex_matrix()`
+- Added Install Instructions for BioCondcutor on README and Vignette
+- Removed references to Keras3 Installation
+- Removed LazyData TRUE statement
+
+# Ibex 0.99.10
+
+- Added information to example data
+
+# Ibex 0.99.9
+
+- Examples now check if python is installed and running
+
+# Ibex 0.99.8
+
+- Updated example data to 2k HEL BEAM-Ab from 10x
+- Converted ibex_example into SCE object for compliance
+- Large revision of vignette to fit new data/format
+- Added species argument to runIbex
+- Updated CoNGA handling of assay for Seurat and Single-Cell Objects.
+
+# Ibex 0.99.7
+
+- Integration of Ibex with immApex
+- Updated Seurat object to v5
+- Updated support for SCE format for ```runIbex()```
+- Update ```CoNGAfy()``` to function with all versions of Seurat
+- Updated ```quietBCRgenes()``` to use VariableFeatures() call for SeuratV5 and backward compatibility.
+- Add ```getHumanIgPseudoGenes()``` to return a list of human Immunoglobulin Pseudo genes that are kept by ```quietBCRgenes()```
## New Models
-* Added New Light and Heavy Chain Models
-* Encoding methods now accepted: "OHE", "atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM","tScales", "zScales"
-* Sequence input:
- - Human Heavy: 10000000
- - Human Light: 5000000
- - Human Heavy-Expanded: 5000000
- - Human Light-Expanded: 2500000
- - Mouse Heavy: 5000000
- - Mouse Heavy-Expanded: 5000000
-* Trained convolutional and variational autoencoders for Heavy/Light chains
- - Architecture: 512-256-128-256-512
- - Parameters:
- Batch Size = 128
- Latent Dimensions = 128
- Epochs = 100
- Loss = Mean Squared Error (CNN) & KL Divergence (VAE)
- Activation = relu
- Learning rate = 1e-6
- - Optimizers: Adam
- - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
- - CNN autoencoders have batch normalization layers between the dense layers.
-
-CHANGES IN VERSION 0.99.6
-------------------------
-* Implementing GitHub action workflows
-* Adding testthat framework
-* Deprecating clonalCommunity
-
-CHANGES IN VERSION 0.99.5
-------------------------
-* Added geometric encoding using the BLOSUM62 matrix
-* Trained classical and variational autoencoders for light/heavy chains with 1.5 million cdr sequences
- - Architecture: 256-128-30-128-256
- - Parameters:
- Batch Size = 64
- Latent Dimensions = 30
- Epochs = 100
- Loss = Mean Squared Error
- - Optimizers: Adam
- - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
- - learn rate varied by models
- - classical auto encoders have batch normalization layers between the dense layers.
-
-CHANGES IN VERSION 0.99.4
-------------------------
-* Added chain.checker() function to allow for uncapitlized chain calls
-
-CHANGES IN VERSION 0.99.3
-------------------------
-* Updated models for manuscript revision
- - Architecture: 256-128-30-128-256
- - Parameters:
- Batch Size = 64
- Learning Rate = 0.001
- Latent Dimensions = 30
- Epochs = 50
- Loss = Mean Squared Error
- - Optimizers: RAdam (for amino acid properties) and RMSprop (for OHE)
- - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
-
-
-CHANGES IN VERSION 0.99.2
-------------------------
-* Updated models to include radam optimization, early stop for min 10 epochs, and all trained on 800,000 unique cdr3s
-* quietBCRgenes() now does not remove human Ig pseudogenes
-
-
-CHANGES IN VERSION 0.99.1
-------------------------
-* Added detection of chain length to function call
-* Added support for direct output of combineBCR()
-* Modified quietBCR() to include constant regions and J-chains
-
-
-CHANGES IN VERSION 0.99.0
-------------------------
-* Initial commit
\ No newline at end of file
+
+- Added New Light and Heavy Chain Models
+- Encoding methods now accepted: "OHE", "atchleyFactors", "crucianiProperties", "kideraFactors", "MSWHIM","tScales", "zScales"
+- Sequence input:
+ - Human Heavy: 10000000
+ - Human Light: 5000000
+ - Human Heavy-Expanded: 5000000
+ - Human Light-Expanded: 2500000
+ - Mouse Heavy: 5000000
+ - Mouse Heavy-Expanded: 5000000
+- Trained convolutional and variational autoencoders for Heavy/Light chains
+ - Architecture: 512-256-128-256-512
+ - Parameters:
+ - Batch Size = 128
+ - Latent Dimensions = 128
+ - Epochs = 100
+ - Loss = Mean Squared Error (CNN) & KL Divergence (VAE)
+ - Activation = relu
+ - Learning rate = 1e-6
+ - Optimizers: Adam
+ - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
+ - CNN autoencoders have batch normalization layers between the dense layers.
+
+# Ibex 0.99.6
+
+- Implementing GitHub action workflows
+- Adding testthat framework
+- Deprecating clonalCommunity
+
+# Ibex 0.99.5
+
+- Added geometric encoding using the BLOSUM62 matrix
+- Trained classical and variational autoencoders for light/heavy chains with 1.5 million cdr sequences
+ - Architecture: 256-128-30-128-256
+ - Parameters:
+ - Batch Size = 64
+ - Latent Dimensions = 30
+ - Epochs = 100
+ - Loss = Mean Squared Error
+ - Optimizers: Adam
+ - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
+ - learn rate varied by models
+ - classical auto encoders have batch normalization layers between the dense layers.
+
+# Ibex 0.99.4
+
+- Added chain.checker() function to allow for uncapitlized chain calls
+
+# Ibex 0.99.3
+
+- Updated models for manuscript revision
+ - Architecture: 256-128-30-128-256
+ - Parameters:
+ Batch Size = 64
+ Learning Rate = 0.001
+ Latent Dimensions = 30
+ Epochs = 50
+ Loss = Mean Squared Error
+ - Optimizers: RAdam (for amino acid properties) and RMSprop (for OHE)
+ - Early stopping was set to patients of 10 for minimal validation loss and restoration of best weights
+
+# Ibex 0.99.2
+
+- Updated models to include radam optimization, early stop for min 10 epochs, and all trained on 800,000 unique cdr3s
+- quietBCRgenes() now does not remove human Ig pseudogenes
+
+# Ibex 0.99.1
+
+- Added detection of chain length to function call
+- Added support for direct output of combineBCR()
+- Modified quietBCR() to include constant regions and J-chains
+
+# Ibex 0.99.0
+
+- Initial commit
diff --git a/R/CoNGAfy.R b/R/CoNGAfy.R
index 2bee708..7b737fb 100644
--- a/R/CoNGAfy.R
+++ b/R/CoNGAfy.R
@@ -192,7 +192,6 @@ grabAssay <- function(input.data, assay) {
}
# Calculate mean across individual clones
-#' @importFrom rlang %||%
#' @importFrom Matrix sparse.model.matrix colSums
#' @importFrom SummarizedExperiment assay
#' @importFrom SeuratObject GetAssayData
diff --git a/R/Ibex-package.R b/R/Ibex-package.R
index 6e1a392..6ad30a1 100644
--- a/R/Ibex-package.R
+++ b/R/Ibex-package.R
@@ -37,3 +37,8 @@
#'
#' @keywords package
"_PACKAGE"
+
+
+## usethis namespace: start
+## usethis namespace: end
+NULL
diff --git a/R/basiliskEnv.R b/R/basiliskEnv.R
index 5d63f7c..dd5d892 100644
--- a/R/basiliskEnv.R
+++ b/R/basiliskEnv.R
@@ -1,5 +1,4 @@
-#' @import basilisk
-IbexEnv <- BasiliskEnvironment(
+IbexEnv <- basilisk::BasiliskEnvironment(
envname = "IbexEnv",
pkgname = "Ibex",
packages = c(
@@ -9,4 +8,4 @@ IbexEnv <- BasiliskEnvironment(
"h5py=3.13",
"numpy=1.26"
)
-)
\ No newline at end of file
+)
diff --git a/R/utils.R b/R/utils.R
index f9d50e6..b1ddac7 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -3,7 +3,6 @@
amino.acids <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
# Add to meta data some of the metrics calculated
-#' @importFrom rlang %||%
#' @importFrom SingleCellExperiment colData
add.meta.data <- function(sc, meta, header) {
if (inherits(x=sc, what ="Seurat")) {
diff --git a/README.md b/README.md
index a67bb8a..a65d47d 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,28 @@
- Ibex
+# Ibex
+
Using BCR sequences for graph embedding
+
[](https://github.com/BorchLab/Ibex/actions/workflows/R-CMD-check.yaml)
[](https://app.codecov.io/gh/BorchLab/Ibex?branch=master)
[](https://www.borch.dev/uploads/screpertoire/articles/ibex)
+
## Introduction
+
Single-cell sequencing is an integral tool in immunology and oncology, enabling researchers to measure gene expression and immune cell receptor profiling at the level of individual cells. We developed the [scRepertoire](https://github.com/BorchLab/scRepertoire) R package to facilitate the integration of immune receptor and gene expression data. However, leveraging clonal indices for more complex analyses—such as using clonality in cell embedding—remains challenging.
**Ibex** addresses this need by using deep learning to vectorize BCR sequences based on amino acid properties or their underlying order. Ibex is the sister package to [Trex](https://github.com/BorchLab/Trex), which focuses on TCR sequence data.
-# System Requirements
+# System Requirements
+
Ibex has been tested on R versions >= 4.0. For details on required R packages, refer to the package’s DESCRIPTION file. It is designed to work with single-cell objects containing BCR data generated using [scRepertoire](https://github.com/BorchLab/scRepertoire). Ibex has been tested on macOS and Linux.
-# Installation
+## Installation
-Ibex relies on the [immApex](https://github.com/BorchLab/immApex) API can be installed directly from GitHub:
+Ibex relies on the [immApex](https://github.com/BorchLab/immApex) API can be installed directly from GitHub:
```r
devtools::install_github("BorchLab/immApex")
@@ -32,13 +37,13 @@ if (!require("BiocManager", quietly = TRUE))
BiocManager::install("immApex")
```
-After immApex installation, you can install Ibex with:
+After immApex installation, you can install Ibex with:
```r
devtools::install_github("BorchLab/Ibex")
```
-Or via Bioconductor
+Or via Bioconductor:
```r
BiocManager::install("Ibex")
@@ -46,28 +51,28 @@ BiocManager::install("Ibex")
The main version of Ibex is submitted to Bioconductor (installation instructions will be updated after review). By default, Ibex will automatically pull deep learning models from a [Zenodo repository](https://zenodo.org/records/14919286) and cache them locally.
-# Usage/Demos
+## Usage/Demos
Ibex integrates smoothly into most popular R-based single-cell workflows, including **Seurat** and **Bioconductor/SingleCellExperiment.**
-## Quick Start
+### Quick Start
-See the [vignette](https://www.borch.dev/uploads/screpertoire/articles/ibex) for a step-by-step tutorial.
+See the [vignette](https://www.borch.dev/uploads/screpertoire/articles/ibex) for a step-by-step tutorial.
-## Autoencoded Matrix
+### Autoencoded Matrix
-The Ibex algorithm allows users to select BCR-based metrics to return autoencoded values to be used in dimensional reduction. If single-cell objects are not filtered for B cells with BCR, `Ibex_matrix()` will still return values, however IBEX_1 will be based on the disparity of BCR-containing and BCR-non-containing cells based on the Ibex algorithm.
+The Ibex algorithm allows users to select BCR-based metrics to return autoencoded values to be used in dimensional reduction. If single-cell objects are not filtered for B cells with BCR, `Ibex_matrix()` will still return values, however IBEX_1 will be based on the disparity of BCR-containing and BCR-non-containing cells based on the Ibex algorithm.
```r
library(Ibex)
my_ibex <- Ibex_matrix(singleObject)
```
-## Seurat or Single-Cell Experiment
+### Seurat or Single-Cell Experiment
-You can run Ibex within your Seurat or Single-Cell Experiemt workflow. **Importantly** `runIbex()` will automatically filter single-cells that do not contain BCR information in the meta data of the single-cell object.
+You can run Ibex within your Seurat or Single-Cell Experiemt workflow. **Importantly** `runIbex()` will automatically filter single-cells that do not contain BCR information in the meta data of the single-cell object.
```r
seuratObj_Bonly <- runIbex(seuratObj, #The single cell object
@@ -78,11 +83,11 @@ seuratObj_Bonly <- runIbex(seuratObj, #The single cell object
"kideraFactors", "MSWHIM", "tScales", "OHE"), # Method of Encoding
geometric.theta = pi/3, # theta for Geometric Encoding
species = "Human") # "Mouse" or "Human"
-
+
seuratObj_Bonly <- runIbex(seuratObj, reduction.name = "Ibex")
```
-## After Running Ibex
+### After Running Ibex
Once the Ibex embeddings are part of your Seurat object, you can use these embeddings to generate a t-SNE or UMAP:
@@ -93,13 +98,15 @@ seuratObj <- RunUMAP(seuratObj, reduction = "Ibex", reduction.key = "Ibex_")
If using Seurat package, the Ibex embedding information and gene expression PCA can be used to find the [Weighted Nearest Neighbors](https://pubmed.ncbi.nlm.nih.gov/34062119/). Before applying the WNN approach, best practice would be to remove the BCR-related genes from the list of variable genes and rerunning the PCA analysis.
-### Recalculate PCA without BCR genes with quietBCRgenes() function in Ibex.
+#### Recalculate PCA without BCR genes with quietBCRgenes() function in Ibex.
+
```r
seuratObj <- quietBCRgenes(seuratObj)
seuratObj <- RunPCA(seuratObj)
```
-### Running WNN approach
+#### Running WNN approach
+
```r
seuratObj <- FindMultiModalNeighbors(seuratObj,
reduction.list = list("pca", "Ibex"),
@@ -111,17 +118,19 @@ seuratObj <- RunUMAP(seuratObj,
reduction.name = "wnn.umap",
reduction.key = "wnnUMAP_")
```
+
## Bug Reports/New Features
-#### If you run into any issues or bugs please submit a [GitHub issue](https://github.com/BorchLab/Ibex/issues) with details of the issue.
+### If you run into any issues or bugs please submit a [GitHub issue](https://github.com/BorchLab/Ibex/issues) with details of the issue.
- If possible please include a [reproducible example](https://reprex.tidyverse.org/).
Alternatively, an example with the internal **ibex_example** would
be extremely helpful.
-#### Any requests for new features or enhancements can also be submitted as [GitHub issues](https://github.com/BorchLab/Ibex/issues).
+### Any requests for new features or enhancements can also be submitted as [GitHub issues](https://github.com/BorchLab/Ibex/issues).
-#### [Pull Requests](https://github.com/BorchLab/Ibex/pulls) are welcome for bug fixes, new features, or enhancements.
+### [Pull Requests](https://github.com/BorchLab/Ibex/pulls) are welcome for bug fixes, new features, or enhancements.
## Citation
-More information on Ibex is available at our [Biorxiv preprint](https://www.biorxiv.org/content/10.1101/2022.11.09.515787v2).
+
+More information on Ibex is available at our [Biorxiv preprint](https://www.biorxiv.org/content/10.1101/2022.11.09.515787v2).
diff --git a/configure b/configure
new file mode 100755
index 0000000..c5ad505
--- /dev/null
+++ b/configure
@@ -0,0 +1,2 @@
+#!/bin/sh
+${R_HOME}/bin/Rscript -e "basilisk::configureBasiliskEnv(src = 'R/basiliskEnv.R')"
diff --git a/configure.win b/configure.win
new file mode 100755
index 0000000..dd3e17d
--- /dev/null
+++ b/configure.win
@@ -0,0 +1,2 @@
+#!/bin/sh
+${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "basilisk::configureBasiliskEnv(src = 'R/basiliskEnv.R')"
diff --git a/data/ibex_vdj.rda b/data/ibex_vdj.rda
index 521de08..7c05d7d 100644
Binary files a/data/ibex_vdj.rda and b/data/ibex_vdj.rda differ
diff --git a/inst/WORDLIST b/inst/WORDLIST
index e6ff5b4..e536e44 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -5,7 +5,6 @@ Autoencoder
Autoencoders
BCR
BLOSUM
-Bioconductor's
Biorxiv
CDR
CMD
@@ -23,11 +22,14 @@ HEL
IGH
IGK
Ig
+Interoperate
Kidera
Lysozyme
MSWHIM
MultiUMAP
OHE
+ORCID
+Pretrained
SNE
Schattgen
SingleCellExperiment
@@ -55,21 +57,19 @@ combineBCR
contig
crucianiProperties
csv
-customizable
embeddings
gp
hydrophobicity
ident
-identOriginal
immApex
+interoperates
interpretability
-keras
kideraFactors
nCount
nFeature
physicochemical
preprint
-pseudogenes
+pretrained
pv
quietBCRgenes
runIbex
diff --git a/man/Ibex-package.Rd b/man/Ibex-package.Rd
index 7759772..1e11fea 100644
--- a/man/Ibex-package.Rd
+++ b/man/Ibex-package.Rd
@@ -42,7 +42,12 @@ runtime; no manual setup is usually required.
\url{https://github.com/BorchLab/Ibex/issues}
}
\author{
-\strong{Maintainer}: Nick Borcherding \email{ncborch@gmail.com}
+\strong{Maintainer}: Nick Borcherding \email{ncborch@gmail.com} [copyright holder]
+
+Other contributors:
+\itemize{
+ \item Qile Yang \email{qile.yang@berkeley.edu} (\href{https://orcid.org/0009-0005-0148-2499}{ORCID}) [contributor]
+}
}
\keyword{package}
diff --git a/tests/testthat/helper-testingFunctions.R b/tests/testthat/helper-testingFunctions.R
index 316f0da..714d981 100644
--- a/tests/testthat/helper-testingFunctions.R
+++ b/tests/testthat/helper-testingFunctions.R
@@ -1,3 +1,25 @@
getdata <- function(dir, name) {
- readRDS(paste("testdata/", dir, "/", name, ".rds", sep = "")) # could move testdata 1 dir lvl up nstead
-}
\ No newline at end of file
+ readRDS(paste("testdata/", dir, "/", name, ".rds", sep = ""))
+}
+
+skip_if_py_not_installed <- function(python_packages) {
+
+ missing_packages <- basilisk::basiliskRun(
+ env = IbexEnv,
+ fun = function(packages) {
+ packages[sapply(packages, Negate(reticulate::py_module_available))]
+ },
+ packages = python_packages
+ )
+
+ if (length(missing_packages) > 0) {
+ testthat::skip(paste0(
+ "Required Python Module",
+ if (length(missing_packages) > 1) "s" else "",
+ " `",
+ paste(missing_packages, collapse = "`, `"),
+ "` not available."
+ ))
+ }
+
+}
diff --git a/tests/testthat/test-CoNGAfy.R b/tests/testthat/test-CoNGAfy.R
index 2ba7305..72b4a2f 100644
--- a/tests/testthat/test-CoNGAfy.R
+++ b/tests/testthat/test-CoNGAfy.R
@@ -25,6 +25,9 @@ test_that("CoNGAfy filters cells correctly", {
})
test_that("CoNGAfy stops if amino acid sequences are missing", {
+
+ local_reproducible_output(unicode = FALSE)
+
sc_example <- suppressWarnings(CreateSeuratObject(counts = matrix(rnorm(1000),
nrow = 10,
ncol = 100)))
diff --git a/tests/testthat/test-Ibex_matrix.R b/tests/testthat/test-Ibex_matrix.R
index e0a85c1..53eca53 100644
--- a/tests/testthat/test-Ibex_matrix.R
+++ b/tests/testthat/test-Ibex_matrix.R
@@ -1,8 +1,10 @@
# test script for Ibex_matrix.R - testcases are NOT comprehensive!
-library(Ibex)
ibex_example <- get(data("ibex_example"))
test_that("Ibex_matrix handles incorrect inputs gracefully", {
+
+ local_reproducible_output(unicode = FALSE)
+
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Middle", method = "encoder"),
"'arg' should be one of \"Heavy\", \"Light\"")
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "xyz"),
@@ -15,68 +17,61 @@ test_that("Ibex_matrix handles incorrect inputs gracefully", {
"non-numeric argument to mathematical function")
})
-keras_installed <- reticulate::py_module_available("keras")
-numpy_installed <- reticulate::py_module_available("numpy")
+test_that("Ibex_matrix returns expected output format", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ result <- Ibex_matrix(input.data = ibex_example,
+ chain = "Heavy",
+ method = "encoder",
+ encoder.model = "VAE",
+ encoder.input = "atchleyFactors",
+ verbose = FALSE)
+ expect_true(is.data.frame(result))
+ expect_true(all(grepl("^Ibex_", colnames(result))))
+ expect_gt(nrow(result), 0)
+ expect_gt(ncol(result), 0)
+})
+
+test_that("Ibex_matrix works with encoder method", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ result <- Ibex_matrix(input.data = ibex_example,
+ chain = "Light",
+ method = "encoder",
+ encoder.model = "CNN",
+ encoder.input = "OHE",
+ verbose = FALSE)
+ expect_true(is.data.frame(result))
+ expect_true(all(grepl("^Ibex_", colnames(result))))
+})
+
+test_that("Ibex_matrix works with geometric method", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ result <- Ibex_matrix(input.data = ibex_example,
+ chain = "Heavy",
+ method = "geometric",
+ geometric.theta = pi / 4,
+ verbose = FALSE)
+ expect_true(is.data.frame(result))
+ expect_true(all(grepl("^Ibex_", colnames(result))))
+})
-# 2. If not installed, skip everything:
-if (!keras_installed || !numpy_installed) {
- test_that("Skipping Ibex_matrix tests", {
- skip("Required Python modules (Keras, NumPy) are not available.")
- })
-} else {
-
- test_that("Ibex_matrix returns expected output format", {
- result <- Ibex_matrix(input.data = ibex_example,
+test_that("Ibex_matrix handles different species options", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ result1 <- Ibex_matrix(input.data = ibex_example,
chain = "Heavy",
method = "encoder",
encoder.model = "VAE",
encoder.input = "atchleyFactors",
+ species = "Human",
verbose = FALSE)
- expect_true(is.data.frame(result))
- expect_true(all(grepl("^Ibex_", colnames(result))))
- expect_gt(nrow(result), 0)
- expect_gt(ncol(result), 0)
- })
-
- test_that("Ibex_matrix works with encoder method", {
- result <- Ibex_matrix(input.data = ibex_example,
- chain = "Light",
- method = "encoder",
- encoder.model = "CNN",
- encoder.input = "OHE",
- verbose = FALSE)
- expect_true(is.data.frame(result))
- expect_true(all(grepl("^Ibex_", colnames(result))))
- })
-
- test_that("Ibex_matrix works with geometric method", {
- result <- Ibex_matrix(input.data = ibex_example,
+ result2 <- Ibex_matrix(input.data = ibex_example,
chain = "Heavy",
- method = "geometric",
- geometric.theta = pi / 4,
+ method = "encoder",
+ encoder.model = "VAE",
+ encoder.input = "atchleyFactors",
+ species = "Mouse",
verbose = FALSE)
- expect_true(is.data.frame(result))
- expect_true(all(grepl("^Ibex_", colnames(result))))
- })
-
- test_that("Ibex_matrix handles different species options", {
- result1 <- Ibex_matrix(input.data = ibex_example,
- chain = "Heavy",
- method = "encoder",
- encoder.model = "VAE",
- encoder.input = "atchleyFactors",
- species = "Human",
- verbose = FALSE)
- result2 <- Ibex_matrix(input.data = ibex_example,
- chain = "Heavy",
- method = "encoder",
- encoder.model = "VAE",
- encoder.input = "atchleyFactors",
- species = "Mouse",
- verbose = FALSE)
- expect_true(is.data.frame(result1))
- expect_true(is.data.frame(result2))
- expect_true(all(grepl("^Ibex_", colnames(result1))))
- expect_true(all(grepl("^Ibex_", colnames(result2))))
- })
-}
+ expect_true(is.data.frame(result1))
+ expect_true(is.data.frame(result2))
+ expect_true(all(grepl("^Ibex_", colnames(result1))))
+ expect_true(all(grepl("^Ibex_", colnames(result2))))
+})
diff --git a/tests/testthat/test-runIbex.R b/tests/testthat/test-runIbex.R
index 6b6d856..aa3dc68 100644
--- a/tests/testthat/test-runIbex.R
+++ b/tests/testthat/test-runIbex.R
@@ -1,8 +1,10 @@
# test script for runIbex.R - testcases are NOT comprehensive!
-library(Ibex)
ibex_example <- get(data("ibex_example"))
test_that("runIbex handles incorrect inputs gracefully", {
+
+ local_reproducible_output(unicode = FALSE)
+
expect_error(runIbex(sc.data = ibex_example, chain = "Middle", method = "encoder"),
"'arg' should be one of \"Heavy\", \"Light\"")
expect_error(runIbex(sc.data = ibex_example, chain = "Heavy", method = "xyz"),
@@ -15,97 +17,94 @@ test_that("runIbex handles incorrect inputs gracefully", {
"non-numeric argument to mathematical function")
})
-keras_installed <- reticulate::py_module_available("keras")
-numpy_installed <- reticulate::py_module_available("numpy")
+test_that("runIbex works with Seurat object", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ suppressWarnings(sc_example <- CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
+ sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
+ sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
+
+ result <- runIbex(sc_example,
+ chain = "Heavy",
+ method = "encoder",
+ encoder.model = "VAE",
+ encoder.input = "atchleyFactors",
+ reduction.name = "IbexTest",
+ verbose = FALSE)
+
+ expect_true("IbexTest" %in% names(result@reductions))
+ expect_true(inherits(result, "Seurat"))
+})
+
+test_that("runIbex works with geometric method", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
+ sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
+ sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
+
+ result <- runIbex(sc_example,
+ chain = "Heavy",
+ method = "geometric",
+ geometric.theta = pi / 4,
+ reduction.name = "IbexGeo",
+ verbose = FALSE)
+
+ expect_true("IbexGeo" %in% names(result@reductions))
+ expect_true(inherits(result, "Seurat"))
+})
+
+test_that("runIbex filters cells correctly", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ sc_example <- suppressWarnings(CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
+ sc_example[["CTaa"]] <- c(rep("CASSL", 50), rep(NA, 50))
+ sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
+ result <- runIbex(sc_example,
+ chain = "Heavy",
+ method = "encoder",
+ encoder.model = "VAE",
+ encoder.input = "atchleyFactors",
+ reduction.name = "IbexFiltered",
+ verbose = FALSE)
+
+ expect_true("IbexFiltered" %in% names(result@reductions))
+ expect_lt(ncol(result), 100) # Ensures some cells were filtered out
+})
-# 2. If not installed, skip everything:
-if (!keras_installed || !numpy_installed) {
- test_that("Skipping runIbex tests", {
- skip("Required Python modules (Keras, NumPy) are not available.")
- })
-} else {
-
- test_that("runIbex works with Seurat object", {
- suppressWarnings(sc_example <- CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
- sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
- sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
-
- result <- runIbex(sc_example,
+test_that("runIbex stops if amino acid sequences are missing", {
+
+ skip_if_py_not_installed(c("keras", "numpy"))
+ local_reproducible_output(unicode = FALSE)
+
+ sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
+
+ expect_error(runIbex(sc_example,
+ chain = "Heavy",
+ method = "encoder",
+ encoder.model = "VAE",
+ encoder.input = "atchleyFactors",
+ verbose = FALSE),
+ "Amino acid sequences are not added to the single-cell object correctly.")
+})
+
+test_that("runIbex works with different reduction names", {
+ skip_if_py_not_installed(c("keras", "numpy"))
+ sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
+ sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
+ sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
+ result1 <- runIbex(sc_example,
chain = "Heavy",
method = "encoder",
encoder.model = "VAE",
encoder.input = "atchleyFactors",
- reduction.name = "IbexTest",
+ reduction.name = "Ibex1",
verbose = FALSE)
-
- expect_true("IbexTest" %in% names(result@reductions))
- expect_true(inherits(result, "Seurat"))
- })
- test_that("runIbex works with geometric method", {
- sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
- sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
- sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
-
- result <- runIbex(sc_example,
- chain = "Heavy",
- method = "geometric",
- geometric.theta = pi / 4,
- reduction.name = "IbexGeo",
- verbose = FALSE)
-
- expect_true("IbexGeo" %in% names(result@reductions))
- expect_true(inherits(result, "Seurat"))
- })
-
- test_that("runIbex filters cells correctly", {
- sc_example <- suppressWarnings(CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
- sc_example[["CTaa"]] <- c(rep("CASSL", 50), rep(NA, 50))
- sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
- result <- runIbex(sc_example,
- chain = "Heavy",
+ result2 <- runIbex(sc_example, chain = "Heavy",
method = "encoder",
encoder.model = "VAE",
encoder.input = "atchleyFactors",
- reduction.name = "IbexFiltered",
+ reduction.name = "Ibex2",
verbose = FALSE)
-
- expect_true("IbexFiltered" %in% names(result@reductions))
- expect_lt(ncol(result), 100) # Ensures some cells were filtered out
- })
- test_that("runIbex stops if amino acid sequences are missing", {
- sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
-
- expect_error(runIbex(sc_example,
- chain = "Heavy",
- method = "encoder",
- encoder.model = "VAE",
- encoder.input = "atchleyFactors",
- verbose = FALSE),
- "Amino acid sequences are not added to the single-cell object correctly.")
- })
-
- test_that("runIbex works with different reduction names", {
- sc_example <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = matrix(rnorm(1000), nrow = 10, ncol = 100)))
- sc_example[["CTaa"]] <- sample(c("CASSL", "CASST", NA, "NA_IGHV1", "None_IGHV2"), 100, replace = TRUE)
- sc_example[["CTgene"]] <- sample(c("NA_IGHV1.IGD1.IGJ1.IGM", "NA_IGHV1.IGD1.IGJ1.IGM", NA, "NA_IGHV1.IGD1.IGJ1.IGM", "None_IGHV1.IGD1.IGJ1.IGM"), 100, replace = TRUE)
- result1 <- runIbex(sc_example,
- chain = "Heavy",
- method = "encoder",
- encoder.model = "VAE",
- encoder.input = "atchleyFactors",
- reduction.name = "Ibex1",
- verbose = FALSE)
-
- result2 <- runIbex(sc_example, chain = "Heavy",
- method = "encoder",
- encoder.model = "VAE",
- encoder.input = "atchleyFactors",
- reduction.name = "Ibex2",
- verbose = FALSE)
-
- expect_true("Ibex1" %in% names(result1@reductions))
- expect_true("Ibex2" %in% names(result2@reductions))
- })
-}
+ expect_true("Ibex1" %in% names(result1@reductions))
+ expect_true("Ibex2" %in% names(result2@reductions))
+})