From f6664efcc194b12ae423de88c20c9874fda8c939 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Tue, 9 Dec 2025 01:32:45 -0800 Subject: [PATCH 1/4] fix verbosity --- R/Ibex_matrix.R | 4 ++-- tests/testthat/test-Ibex_matrix.R | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/Ibex_matrix.R b/R/Ibex_matrix.R index d14ccbf..f29bfe6 100644 --- a/R/Ibex_matrix.R +++ b/R/Ibex_matrix.R @@ -134,7 +134,7 @@ Ibex_matrix <- function(input.data, tryCatch({ model <- keras$models$load_model(mpath) - pred <- model$predict(xmat) + pred <- model$predict(xmat, verbose = if (verbose) "auto" else 0) as.array(pred) # This will be the return value }, finally = { rm(pred) @@ -151,7 +151,7 @@ Ibex_matrix <- function(input.data, } else if (method == "geometric") { if (verbose) print("Performing geometric transformation...") BCR[,"cdr3_aa"] <- gsub("-", "", BCR[,"cdr3_aa"]) - reduction <- suppressMessages(geometricEncoder(BCR[,"cdr3_aa"], theta = geometric.theta))[[3]] + reduction <- geometricEncoder(BCR[,"cdr3_aa"], theta = geometric.theta, verbose = FALSE)[[3]] } reduction <- as.data.frame(reduction) barcodes <- BCR[,"barcode"] diff --git a/tests/testthat/test-Ibex_matrix.R b/tests/testthat/test-Ibex_matrix.R index 53eca53..afedd3d 100644 --- a/tests/testthat/test-Ibex_matrix.R +++ b/tests/testthat/test-Ibex_matrix.R @@ -5,15 +5,15 @@ test_that("Ibex_matrix handles incorrect inputs gracefully", { local_reproducible_output(unicode = FALSE) - expect_error(Ibex_matrix(input.data = ibex_example, chain = "Middle", method = "encoder"), + expect_error(Ibex_matrix(input.data = ibex_example, chain = "Middle", method = "encoder", verbose = FALSE), "'arg' should be one of \"Heavy\", \"Light\"") - expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "xyz"), + expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "xyz", verbose = FALSE), "'arg' should be one of \"encoder\", \"geometric\"") - expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC"), + expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.model = "ABC", verbose = FALSE), "'arg' should be one of \"CNN\", \"VAE\", \"CNN.EXP\", \"VAE.EXP\"") - expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ"), + expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "encoder", encoder.input = "XYZ", verbose = FALSE), "arg' should be one of \"atchleyFactors\", \"crucianiProperties\", \"kideraFactors\", \"MSWHIM\", \"tScales\", \"OHE\"") - expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric"), + expect_error(Ibex_matrix(input.data = ibex_example, chain = "Heavy", method = "geometric", geometric.theta = "not_numeric", verbose = FALSE), "non-numeric argument to mathematical function") }) From fdea76e95a9c0580036a943ffe7dd84101be9710 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Tue, 9 Dec 2025 01:34:22 -0800 Subject: [PATCH 2/4] sync news --- NEWS | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS b/NEWS index 5a20b98..b1e752a 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,7 @@ - Add Docker infrastructure with `Dockerfile` and `.devcontainer/devcontainer.json` - Add `.markdownlint.json` to sourcecode - Improved testthat compatibility across platforms +- Improve adherence to verbosity arguments # Ibex 1.0.0 From 19eb6674e7199600c62b11ef3e31e983052d62b4 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Tue, 9 Dec 2025 01:52:14 -0800 Subject: [PATCH 3/4] make ibex matrix accept vectors --- DESCRIPTION | 1 + NEWS | 3 +- R/Ibex_matrix.R | 18 ++++++++-- R/utils.R | 40 ++++++++++++++++++++++ man/Ibex_matrix.Rd | 11 +++++++ tests/testthat/test-Ibex_matrix.R | 55 +++++++++++++++++++++++++++++++ 6 files changed, 125 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ae66fcd..6dacdeb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,6 +33,7 @@ Suggests: ggplot2, kableExtra, knitr, + lifecycle, markdown, mumosa, patchwork, diff --git a/NEWS b/NEWS index b1e752a..87f2fb2 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,7 @@ # Ibex Development Version -- removed rlang from Imports +- `Ibex_matrix()` now accepts character vectors of amino acid sequences directly +- removed rlang from Imports, added lifecycle - Reformatted `NEWS` and `README.md` - As per `basilisk` documentation: - Add `.BBSoptions` with `UnsupportedPlatforms: win32` diff --git a/R/Ibex_matrix.R b/R/Ibex_matrix.R index f29bfe6..3b23b7d 100644 --- a/R/Ibex_matrix.R +++ b/R/Ibex_matrix.R @@ -21,10 +21,21 @@ #' chain = "Heavy", #' method = "geometric", #' geometric.theta = pi) +#' +#' # Using a character vector of amino acid sequences +#' sequences <- c("CARDYW", "CARDSSGYW", "CARDTGYW") +#' ibex_values <- Ibex_matrix(sequences, +#' chain = "Heavy", +#' method = "geometric") #' #' @param input.data Input data, which can be: #' - A Single Cell Object in Seurat or SingleCellExperiment format #' - The output of [scRepertoire::combineBCR()] or [combineExpandedBCR()] +#' - `r lifecycle::badge("experimental")` A character vector of amino acid +#' sequences. The `chain` parameter specifies whether these are heavy or +#' light chain sequences. For expanded models (CNN.EXP/VAE.EXP), sequences +#' should be formatted as CDR1-CDR2-CDR3 separated by hyphens. If the vector +#' is named, the names will be used as row names in the output. #' @param chain Character. Specifies which chain to analyze: #' - "Heavy" for the heavy chain #' - "Light" for the light chain @@ -76,6 +87,11 @@ Ibex_matrix <- function(input.data, expanded.sequences <- FALSE } + # Handle character vector input - convert to data.frame format for getIR() + if (is.character(input.data)) { + input.data <- .convert_aa_vector_to_bcr_df(input.data, chain) + } + # Define loci based on chain selection loci <- if (chain == "Heavy") "IGH" else c("IGK", "IGL") @@ -159,5 +175,3 @@ Ibex_matrix <- function(input.data, colnames(reduction) <- paste0("Ibex_", seq_len(ncol(reduction))) return(reduction) } - - diff --git a/R/utils.R b/R/utils.R index b1ddac7..a85dd45 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,6 +2,46 @@ amino.acids <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") +# Convert a character vector of amino acid sequences to a BCR data.frame +# that getIR() can process +.convert_aa_vector_to_bcr_df <- function(sequences, chain) { + # Validate input contains only valid amino acids (and hyphens for expanded) + for (i in seq_along(sequences)) { + seq <- sequences[i] + # Remove hyphens for validation (allowed for CDR1-CDR2-CDR3 format) + aas <- toupper(strsplit(gsub("-", "", seq), "")[[1]]) + invalid <- aas[!(aas %in% amino.acids)] + if (length(invalid) > 0) { + stop( + "Invalid character(s) '", paste(unique(invalid), collapse = "', '"), + "' found in sequence ", i, ". Only standard amino acids", + " (and hyphens for expanded CDR1-CDR2-CDR3 format) are allowed." + ) + } + } + + # Format CTaa and CTgene based on chain + + # getIR() expects Heavy_Light format, where Light is in position 2 after "_" + if (chain == "Heavy") { + ctaa <- sequences # Heavy chain is in position 1 (no underscore needed) + ctgene <- "NA.VH.NA.NA" + } else { + # Light chain needs to be in position 2: "None_SEQUENCE" + ctaa <- paste0("None_", sequences) + ctgene <- "None_NA.VL.NA.NA" + } + + # Build data.frame compatible with getIR() + data.frame( + row.names = NULL, + barcode = if (!is.null(names(sequences))) names(sequences) + else as.character(seq_along(sequences)), + CTaa = ctaa, + CTgene = ctgene + ) +} + # Add to meta data some of the metrics calculated #' @importFrom SingleCellExperiment colData add.meta.data <- function(sc, meta, header) { diff --git a/man/Ibex_matrix.Rd b/man/Ibex_matrix.Rd index 97610bb..1d7a1a4 100644 --- a/man/Ibex_matrix.Rd +++ b/man/Ibex_matrix.Rd @@ -21,6 +21,11 @@ Ibex_matrix( \itemize{ \item A Single Cell Object in Seurat or SingleCellExperiment format \item The output of \code{\link[scRepertoire:combineBCR]{scRepertoire::combineBCR()}} or \code{\link[=combineExpandedBCR]{combineExpandedBCR()}} +\item \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A character vector of amino acid +sequences. The \code{chain} parameter specifies whether these are heavy or +light chain sequences. For expanded models (CNN.EXP/VAE.EXP), sequences +should be formatted as CDR1-CDR2-CDR3 separated by hyphens. If the vector +is named, the names will be used as row names in the output. }} \item{chain}{Character. Specifies which chain to analyze: @@ -82,6 +87,12 @@ ibex_values <- Ibex_matrix(ibex_example, method = "geometric", geometric.theta = pi) +# Using a character vector of amino acid sequences +sequences <- c("CARDYW", "CARDSSGYW", "CARDTGYW") +ibex_values <- Ibex_matrix(sequences, + chain = "Heavy", + method = "geometric") + } \seealso{ \code{\link[immApex:sequenceEncoder]{immApex::propertyEncoder()}}, diff --git a/tests/testthat/test-Ibex_matrix.R b/tests/testthat/test-Ibex_matrix.R index afedd3d..37aafdb 100644 --- a/tests/testthat/test-Ibex_matrix.R +++ b/tests/testthat/test-Ibex_matrix.R @@ -75,3 +75,58 @@ test_that("Ibex_matrix handles different species options", { expect_true(all(grepl("^Ibex_", colnames(result1)))) expect_true(all(grepl("^Ibex_", colnames(result2)))) }) + +test_that("Ibex_matrix works with character vector input", { + # Test with unnamed vector + sequences <- c("CARDYWGQGTLVTVSS", "CARDSSGYWGQGTLVTVSS", "CARDTGYWGQGTLVTVSS") + result <- Ibex_matrix(input.data = sequences, + chain = "Heavy", + method = "geometric", + verbose = FALSE) + expect_true(is.data.frame(result)) + expect_equal(nrow(result), 3) + expect_equal(rownames(result), c("1", "2", "3")) + expect_true(all(grepl("^Ibex_", colnames(result)))) + + # Test with named vector + named_sequences <- c(cell1 = "CARDYWGQGTLVTVSS", cell2 = "CARDSSGYWGQGTLVTVSS") + result_named <- Ibex_matrix(input.data = named_sequences, + chain = "Heavy", + method = "geometric", + verbose = FALSE) + expect_equal(rownames(result_named), c("cell1", "cell2")) +}) + +test_that("Ibex_matrix character input validates amino acids", { + # Test with invalid characters + bad_sequences <- c("CARDYW123", "CARDSSGYW") + expect_error( + Ibex_matrix(input.data = bad_sequences, chain = "Heavy", method = "geometric", verbose = FALSE), + "Invalid character" + ) +}) + +test_that("Ibex_matrix character input works with light chain", { + sequences <- c("CQQYNSYPLTFG", "CQQSYSTPLTFG") + result <- Ibex_matrix(input.data = sequences, + chain = "Light", + method = "geometric", + verbose = FALSE) + expect_true(is.data.frame(result)) + expect_equal(nrow(result), 2) +}) + +test_that("Ibex_matrix character input works with encoder method", { + skip_if_py_not_installed(c("keras", "numpy")) + sequences <- c("CARDYWGQGTLVTVSS", "CARDSSGYWGQGTLVTVSS") + result <- Ibex_matrix(input.data = sequences, + chain = "Heavy", + method = "encoder", + encoder.model = "VAE", + encoder.input = "atchleyFactors", + species = "Human", + verbose = FALSE) + expect_true(is.data.frame(result)) + expect_equal(nrow(result), 2) + expect_true(all(grepl("^Ibex_", colnames(result)))) +}) From 02fff1f7c87cfc08a9e134f13142e57e72f3ba06 Mon Sep 17 00:00:00 2001 From: Qile0317 Date: Tue, 9 Dec 2025 01:53:30 -0800 Subject: [PATCH 4/4] increment dev version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6dacdeb..1c0804a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Ibex Title: Methods for BCR single-cell embedding -Version: 1.0.0.9000 +Version: 1.0.0.9001 Authors@R: c( person(given = "Nick", family = "Borcherding", role = c("aut", "cre", "cph"), email = "ncborch@gmail.com"), person(given = "Qile", family = "Yang", role = "ctb", email = "qile.yang@berkeley.edu", comment = c(ORCID = "0009-0005-0148-2499")))