Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Ibex
Title: Methods for BCR single-cell embedding
Version: 1.0.0.9000
Version: 1.0.0.9001
Authors@R: c(
person(given = "Nick", family = "Borcherding", role = c("aut", "cre", "cph"), email = "ncborch@gmail.com"),
person(given = "Qile", family = "Yang", role = "ctb", email = "qile.yang@berkeley.edu", comment = c(ORCID = "0009-0005-0148-2499")))
Expand Down Expand Up @@ -33,6 +33,7 @@ Suggests:
ggplot2,
kableExtra,
knitr,
lifecycle,
markdown,
mumosa,
patchwork,
Expand Down
4 changes: 3 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# Ibex Development Version

- removed rlang from Imports
- `Ibex_matrix()` now accepts character vectors of amino acid sequences directly
- removed rlang from Imports, added lifecycle
- Reformatted `NEWS` and `README.md`
- As per `basilisk` documentation:
- Add `.BBSoptions` with `UnsupportedPlatforms: win32`
- Add `configure` and `configure.win` scripts
- Add Docker infrastructure with `Dockerfile` and `.devcontainer/devcontainer.json`
- Add `.markdownlint.json` to sourcecode
- Improved testthat compatibility across platforms
- Improve adherence to verbosity arguments

# Ibex 1.0.0

Expand Down
22 changes: 18 additions & 4 deletions R/Ibex_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,21 @@
#' chain = "Heavy",
#' method = "geometric",
#' geometric.theta = pi)
#'
#' # Using a character vector of amino acid sequences
#' sequences <- c("CARDYW", "CARDSSGYW", "CARDTGYW")
#' ibex_values <- Ibex_matrix(sequences,
#' chain = "Heavy",
#' method = "geometric")
#'
#' @param input.data Input data, which can be:
#' - A Single Cell Object in Seurat or SingleCellExperiment format
#' - The output of [scRepertoire::combineBCR()] or [combineExpandedBCR()]
#' - `r lifecycle::badge("experimental")` A character vector of amino acid
#' sequences. The `chain` parameter specifies whether these are heavy or
#' light chain sequences. For expanded models (CNN.EXP/VAE.EXP), sequences
#' should be formatted as CDR1-CDR2-CDR3 separated by hyphens. If the vector
#' is named, the names will be used as row names in the output.
#' @param chain Character. Specifies which chain to analyze:
#' - "Heavy" for the heavy chain
#' - "Light" for the light chain
Expand Down Expand Up @@ -76,6 +87,11 @@ Ibex_matrix <- function(input.data,
expanded.sequences <- FALSE
}

# Handle character vector input - convert to data.frame format for getIR()
if (is.character(input.data)) {
input.data <- .convert_aa_vector_to_bcr_df(input.data, chain)
}

# Define loci based on chain selection
loci <- if (chain == "Heavy") "IGH" else c("IGK", "IGL")

Expand Down Expand Up @@ -134,7 +150,7 @@ Ibex_matrix <- function(input.data,

tryCatch({
model <- keras$models$load_model(mpath)
pred <- model$predict(xmat)
pred <- model$predict(xmat, verbose = if (verbose) "auto" else 0)
as.array(pred) # This will be the return value
}, finally = {
rm(pred)
Expand All @@ -151,13 +167,11 @@ Ibex_matrix <- function(input.data,
} else if (method == "geometric") {
if (verbose) print("Performing geometric transformation...")
BCR[,"cdr3_aa"] <- gsub("-", "", BCR[,"cdr3_aa"])
reduction <- suppressMessages(geometricEncoder(BCR[,"cdr3_aa"], theta = geometric.theta))[[3]]
reduction <- geometricEncoder(BCR[,"cdr3_aa"], theta = geometric.theta, verbose = FALSE)[[3]]
}
reduction <- as.data.frame(reduction)
barcodes <- BCR[,"barcode"]
rownames(reduction) <- barcodes
colnames(reduction) <- paste0("Ibex_", seq_len(ncol(reduction)))
return(reduction)
}


40 changes: 40 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,46 @@

amino.acids <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")

# Convert a character vector of amino acid sequences to a BCR data.frame
# that getIR() can process
.convert_aa_vector_to_bcr_df <- function(sequences, chain) {
# Validate input contains only valid amino acids (and hyphens for expanded)
for (i in seq_along(sequences)) {
seq <- sequences[i]
# Remove hyphens for validation (allowed for CDR1-CDR2-CDR3 format)
aas <- toupper(strsplit(gsub("-", "", seq), "")[[1]])
invalid <- aas[!(aas %in% amino.acids)]
if (length(invalid) > 0) {
stop(
"Invalid character(s) '", paste(unique(invalid), collapse = "', '"),
"' found in sequence ", i, ". Only standard amino acids",
" (and hyphens for expanded CDR1-CDR2-CDR3 format) are allowed."
)
}
}

# Format CTaa and CTgene based on chain

# getIR() expects Heavy_Light format, where Light is in position 2 after "_"
if (chain == "Heavy") {
ctaa <- sequences # Heavy chain is in position 1 (no underscore needed)
ctgene <- "NA.VH.NA.NA"
} else {
# Light chain needs to be in position 2: "None_SEQUENCE"
ctaa <- paste0("None_", sequences)
ctgene <- "None_NA.VL.NA.NA"
}

# Build data.frame compatible with getIR()
data.frame(
row.names = NULL,
barcode = if (!is.null(names(sequences))) names(sequences)
else as.character(seq_along(sequences)),
CTaa = ctaa,
CTgene = ctgene
)
}

# Add to meta data some of the metrics calculated
#' @importFrom SingleCellExperiment colData
add.meta.data <- function(sc, meta, header) {
Expand Down
11 changes: 11 additions & 0 deletions man/Ibex_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 60 additions & 5 deletions tests/testthat/test-Ibex_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ test_that("Ibex_matrix handles incorrect inputs gracefully", {

local_reproducible_output(unicode = FALSE)

expect_error(Ibex_matrix(input.data = ibex_example, chain = "Middle", method = "encoder"),
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Middle", method = "encoder", verbose = FALSE),
"'arg' should be one of \"Heavy\", \"Light\"")
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "xyz"),
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "xyz", verbose = FALSE),
"'arg' should be one of \"encoder\", \"geometric\"")
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC"),
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC", verbose = FALSE),
"'arg' should be one of \"CNN\", \"VAE\", \"CNN.EXP\", \"VAE.EXP\"")
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ"),
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ", verbose = FALSE),
"arg' should be one of \"atchleyFactors\", \"crucianiProperties\", \"kideraFactors\", \"MSWHIM\", \"tScales\", \"OHE\"")
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric"),
expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric", verbose = FALSE),
"non-numeric argument to mathematical function")
})

Expand Down Expand Up @@ -75,3 +75,58 @@ test_that("Ibex_matrix handles different species options", {
expect_true(all(grepl("^Ibex_", colnames(result1))))
expect_true(all(grepl("^Ibex_", colnames(result2))))
})

test_that("Ibex_matrix works with character vector input", {
# Test with unnamed vector
sequences <- c("CARDYWGQGTLVTVSS", "CARDSSGYWGQGTLVTVSS", "CARDTGYWGQGTLVTVSS")
result <- Ibex_matrix(input.data = sequences,
chain = "Heavy",
method = "geometric",
verbose = FALSE)
expect_true(is.data.frame(result))
expect_equal(nrow(result), 3)
expect_equal(rownames(result), c("1", "2", "3"))
expect_true(all(grepl("^Ibex_", colnames(result))))

# Test with named vector
named_sequences <- c(cell1 = "CARDYWGQGTLVTVSS", cell2 = "CARDSSGYWGQGTLVTVSS")
result_named <- Ibex_matrix(input.data = named_sequences,
chain = "Heavy",
method = "geometric",
verbose = FALSE)
expect_equal(rownames(result_named), c("cell1", "cell2"))
})

test_that("Ibex_matrix character input validates amino acids", {
# Test with invalid characters
bad_sequences <- c("CARDYW123", "CARDSSGYW")
expect_error(
Ibex_matrix(input.data = bad_sequences, chain = "Heavy", method = "geometric", verbose = FALSE),
"Invalid character"
)
})

test_that("Ibex_matrix character input works with light chain", {
sequences <- c("CQQYNSYPLTFG", "CQQSYSTPLTFG")
result <- Ibex_matrix(input.data = sequences,
chain = "Light",
method = "geometric",
verbose = FALSE)
expect_true(is.data.frame(result))
expect_equal(nrow(result), 2)
})

test_that("Ibex_matrix character input works with encoder method", {
skip_if_py_not_installed(c("keras", "numpy"))
sequences <- c("CARDYWGQGTLVTVSS", "CARDSSGYWGQGTLVTVSS")
result <- Ibex_matrix(input.data = sequences,
chain = "Heavy",
method = "encoder",
encoder.model = "VAE",
encoder.input = "atchleyFactors",
species = "Human",
verbose = FALSE)
expect_true(is.data.frame(result))
expect_equal(nrow(result), 2)
expect_true(all(grepl("^Ibex_", colnames(result))))
})