From 8e3df266b1618a8f9a24097c8592ecf4d39dd29c Mon Sep 17 00:00:00 2001 From: VisruthSK <67435125+VisruthSK@users.noreply.github.com> Date: Thu, 25 Dec 2025 19:18:53 -0800 Subject: [PATCH 1/6] Add qs2 fast option for saving models objects --- DESCRIPTION | 1 + R/fit.R | 21 ++++++++++++++++++--- man/fit-method-save_object.Rd | 12 ++++++++++-- tests/testthat/test-fit-shared.R | 10 ++++++++++ vignettes/cmdstanr.Rmd | 26 ++++++++------------------ 5 files changed, 47 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 78bc2fa3a..3280baa04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,6 +52,7 @@ Suggests: ggplot2, knitr (>= 1.37), loo (>= 2.0.0), + qs2, rmarkdown, testthat (>= 2.1.0), Rcpp diff --git a/R/fit.R b/R/fit.R index 58005ca63..a00c49bfd 100644 --- a/R/fit.R +++ b/R/fit.R @@ -108,12 +108,19 @@ CmdStanFit <- R6::R6Class( #' read into R lazily (i.e., as needed), the `$save_object()` method is the #' safest way to guarantee that everything has been read in before saving. #' +#' If you have a big object to save, use `format = "qs2"` to save using the +#' **qs2** package (with its fast preset). +#' #' See the "Saving fitted model objects" section of the #' [_Getting started with CmdStanR_](https://mc-stan.org/cmdstanr/articles/cmdstanr.html) #' vignette for some suggestions on faster model saving for large models. #' #' @param file (string) Path where the file should be saved. -#' @param ... Other arguments to pass to [base::saveRDS()] besides `object` and `file`. +#' @param format (string) Serialization format for the object. The default is +#' `"rds"`. The `"qs2"` format uses `qs2::qsave()` with the `"fast"` preset and +#' requires the **qs2** package. +#' @param ... Other arguments to pass to [base::saveRDS()] (for `format = "rds"`) +#' or `qs2::qsave()` (for `format = "qs2"`). #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -129,12 +136,20 @@ CmdStanFit <- R6::R6Class( #' fit$summary() #' } #' -save_object <- function(file, ...) { +save_object <- function(file, format = c("rds", "qs2"), ...) { self$draws() try(self$sampler_diagnostics(), silent = TRUE) try(self$init(), silent = TRUE) try(self$profiles(), silent = TRUE) - saveRDS(self, file = file, ...) + format <- match.arg(format) + if (format == "rds") { + saveRDS(self, file = file, ...) + } else { + if (!requireNamespace("qs2", quietly = TRUE)) { + stop("The 'qs2' package is required for format = \"qs2\".", call. = FALSE) + } + qs2::qsave(x = self, file = file, preset = "fast", ...) + } invisible(self) } CmdStanFit$set("public", name = "save_object", value = save_object) diff --git a/man/fit-method-save_object.Rd b/man/fit-method-save_object.Rd index 6b1999fb9..faaf08c4d 100644 --- a/man/fit-method-save_object.Rd +++ b/man/fit-method-save_object.Rd @@ -5,12 +5,17 @@ \alias{save_object} \title{Save fitted model object to a file} \usage{ -save_object(file, ...) +save_object(file, format = c("rds", "qs2"), ...) } \arguments{ \item{file}{(string) Path where the file should be saved.} -\item{...}{Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} besides \code{object} and \code{file}.} +\item{format}{(string) Serialization format for the object. The default is +\code{"rds"}. The \code{"qs2"} format uses \code{qs2::qsave()} with the \code{"fast"} preset and +requires the \strong{qs2} package.} + +\item{...}{Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} (for \code{format = "rds"}) +or \code{qs2::qsave()} (for \code{format = "qs2"}).} } \description{ This method is a wrapper around \code{\link[base:readRDS]{base::saveRDS()}} that ensures @@ -19,6 +24,9 @@ model object. Because the contents of the CmdStan output CSV files are only read into R lazily (i.e., as needed), the \verb{$save_object()} method is the safest way to guarantee that everything has been read in before saving. +If you have a big object to save, use \code{format = "qs2"} to save using the +\strong{qs2} package (with its fast preset). + See the "Saving fitted model objects" section of the \href{https://mc-stan.org/cmdstanr/articles/cmdstanr.html}{\emph{Getting started with CmdStanR}} vignette for some suggestions on faster model saving for large models. diff --git a/tests/testthat/test-fit-shared.R b/tests/testthat/test-fit-shared.R index 0f98e64ac..024ef3a8a 100644 --- a/tests/testthat/test-fit-shared.R +++ b/tests/testthat/test-fit-shared.R @@ -178,6 +178,16 @@ test_that("save_object() method works", { expect_identical(fit$summary(), s) }) +test_that("save_object() method works with qs2 format", { + skip_if_not_installed("qs2") + fit <- fits[["sample"]] + temp_qs_file <- tempfile(fileext = ".qs2") + fit$save_object(temp_qs_file, format = "qs2") + fit2 <- qs2::qread(temp_qs_file) + expect_identical(fit2$summary(), fit$summary()) + expect_identical(fit2$return_codes(), fit$return_codes()) +}) + test_that("save_object() method works with profiles", { mod <- testing_model("logistic_profiling") utils::capture.output( diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index 8d9e30468..03c2fe02d 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -481,26 +481,16 @@ fit2 <- readRDS("fit.RDS") But if your model object is large, then [`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -could take a long time. -[`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -reads the CmdStan results files into memory, stores them in the model object, -and saves the object with `saveRDS()`. To speed up the process, you can emulate -[`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -and replace `saveRDS` with the much faster `qsave()` function from the -[`qs`](https://github.com/traversc/qs) package. +could take a long time when saving in the default RDS format. For large objects, +use the much faster [`qs2`](https://github.com/traversc/qs2) format, which uses +the fast preset. ```{r save_object_qs_full, eval = FALSE} -# Load CmdStan output files into the fitted model object. -fit$draws() # Load posterior draws into the object. -try(fit$sampler_diagnostics(), silent = TRUE) # Load sampler diagnostics. -try(fit$init(), silent = TRUE) # Load user-defined initial values. -try(fit$profiles(), silent = TRUE) # Load profiling samples. - -# Save the object to a file. -qs::qsave(x = fit, file = "fit.qs") +# Save the object to a file using qs2. +fit$save_object(file = "fit.qs2", format = "qs2") # Read the object. -fit2 <- qs::qread("fit.qs") +fit2 <- qs2::qread("fit.qs2") ``` Storage is even faster if you discard results you do not need to save. @@ -512,10 +502,10 @@ sampler diagnostics, user-specified initial values, and profiling data. fit$draws() # Save the object to a file. -qs::qsave(x = fit, file = "fit.qs") +qs2::qsave(x = fit, file = "fit.qs2", preset = "fast") # Read the object. -fit2 <- qs::qread("fit.qs") +fit2 <- qs2::qread("fit.qs2") ``` See the vignette [_How does CmdStanR work?_](http://mc-stan.org/cmdstanr/articles/cmdstanr-internals.html) From 7540ab0bdbcd70ef1e063637393ae0e31ba64e38 Mon Sep 17 00:00:00 2001 From: VisruthSK <67435125+VisruthSK@users.noreply.github.com> Date: Thu, 25 Dec 2025 20:23:36 -0800 Subject: [PATCH 2/6] Fixed typo --- R/fit.R | 6 +++--- man/fit-method-save_object.Rd | 4 ++-- vignettes/cmdstanr.Rmd | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/fit.R b/R/fit.R index a00c49bfd..ce2c51a4a 100644 --- a/R/fit.R +++ b/R/fit.R @@ -117,10 +117,10 @@ CmdStanFit <- R6::R6Class( #' #' @param file (string) Path where the file should be saved. #' @param format (string) Serialization format for the object. The default is -#' `"rds"`. The `"qs2"` format uses `qs2::qsave()` with the `"fast"` preset and +#' `"rds"`. The `"qs2"` format uses `qs2::qs_save()` with the `"fast"` preset and #' requires the **qs2** package. #' @param ... Other arguments to pass to [base::saveRDS()] (for `format = "rds"`) -#' or `qs2::qsave()` (for `format = "qs2"`). +#' or `qs2::qs_save()` (for `format = "qs2"`). #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -148,7 +148,7 @@ save_object <- function(file, format = c("rds", "qs2"), ...) { if (!requireNamespace("qs2", quietly = TRUE)) { stop("The 'qs2' package is required for format = \"qs2\".", call. = FALSE) } - qs2::qsave(x = self, file = file, preset = "fast", ...) + qs2::qs_save(x = self, file = file, preset = "fast", ...) } invisible(self) } diff --git a/man/fit-method-save_object.Rd b/man/fit-method-save_object.Rd index faaf08c4d..0fbfe7e6d 100644 --- a/man/fit-method-save_object.Rd +++ b/man/fit-method-save_object.Rd @@ -11,11 +11,11 @@ save_object(file, format = c("rds", "qs2"), ...) \item{file}{(string) Path where the file should be saved.} \item{format}{(string) Serialization format for the object. The default is -\code{"rds"}. The \code{"qs2"} format uses \code{qs2::qsave()} with the \code{"fast"} preset and +\code{"rds"}. The \code{"qs2"} format uses \code{qs2::qs_save()} with the \code{"fast"} preset and requires the \strong{qs2} package.} \item{...}{Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} (for \code{format = "rds"}) -or \code{qs2::qsave()} (for \code{format = "qs2"}).} +or \code{qs2::qs_save()} (for \code{format = "qs2"}).} } \description{ This method is a wrapper around \code{\link[base:readRDS]{base::saveRDS()}} that ensures diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index 03c2fe02d..a6775849a 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -490,7 +490,7 @@ the fast preset. fit$save_object(file = "fit.qs2", format = "qs2") # Read the object. -fit2 <- qs2::qread("fit.qs2") +fit2 <- qs2::qs_read("fit.qs2") ``` Storage is even faster if you discard results you do not need to save. @@ -502,10 +502,10 @@ sampler diagnostics, user-specified initial values, and profiling data. fit$draws() # Save the object to a file. -qs2::qsave(x = fit, file = "fit.qs2", preset = "fast") +qs2::qs_save(x = fit, file = "fit.qs2", preset = "fast") # Read the object. -fit2 <- qs2::qread("fit.qs2") +fit2 <- qs2::qs_read("fit.qs2") ``` See the vignette [_How does CmdStanR work?_](http://mc-stan.org/cmdstanr/articles/cmdstanr-internals.html) From 79423a2ecdffe6b1dcba569d19949bce438ee593 Mon Sep 17 00:00:00 2001 From: VisruthSK <67435125+VisruthSK@users.noreply.github.com> Date: Thu, 25 Dec 2025 22:06:36 -0800 Subject: [PATCH 3/6] Fixed hallucination --- R/fit.R | 8 ++++---- man/fit-method-save_object.Rd | 6 +++--- vignettes/cmdstanr.Rmd | 5 ++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/R/fit.R b/R/fit.R index ce2c51a4a..fe8f2af7b 100644 --- a/R/fit.R +++ b/R/fit.R @@ -109,7 +109,7 @@ CmdStanFit <- R6::R6Class( #' safest way to guarantee that everything has been read in before saving. #' #' If you have a big object to save, use `format = "qs2"` to save using the -#' **qs2** package (with its fast preset). +#' **qs2** package. #' #' See the "Saving fitted model objects" section of the #' [_Getting started with CmdStanR_](https://mc-stan.org/cmdstanr/articles/cmdstanr.html) @@ -117,8 +117,8 @@ CmdStanFit <- R6::R6Class( #' #' @param file (string) Path where the file should be saved. #' @param format (string) Serialization format for the object. The default is -#' `"rds"`. The `"qs2"` format uses `qs2::qs_save()` with the `"fast"` preset and -#' requires the **qs2** package. +#' `"rds"`. The `"qs2"` format uses `qs2::qs_save()` and requires the **qs2** +#' package. #' @param ... Other arguments to pass to [base::saveRDS()] (for `format = "rds"`) #' or `qs2::qs_save()` (for `format = "qs2"`). #' @@ -148,7 +148,7 @@ save_object <- function(file, format = c("rds", "qs2"), ...) { if (!requireNamespace("qs2", quietly = TRUE)) { stop("The 'qs2' package is required for format = \"qs2\".", call. = FALSE) } - qs2::qs_save(x = self, file = file, preset = "fast", ...) + qs2::qs_save(x = self, file = file, ...) } invisible(self) } diff --git a/man/fit-method-save_object.Rd b/man/fit-method-save_object.Rd index 0fbfe7e6d..080f46eb6 100644 --- a/man/fit-method-save_object.Rd +++ b/man/fit-method-save_object.Rd @@ -11,8 +11,8 @@ save_object(file, format = c("rds", "qs2"), ...) \item{file}{(string) Path where the file should be saved.} \item{format}{(string) Serialization format for the object. The default is -\code{"rds"}. The \code{"qs2"} format uses \code{qs2::qs_save()} with the \code{"fast"} preset and -requires the \strong{qs2} package.} +\code{"rds"}. The \code{"qs2"} format uses \code{qs2::qs_save()} and requires the \strong{qs2} +package.} \item{...}{Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} (for \code{format = "rds"}) or \code{qs2::qs_save()} (for \code{format = "qs2"}).} @@ -25,7 +25,7 @@ read into R lazily (i.e., as needed), the \verb{$save_object()} method is the safest way to guarantee that everything has been read in before saving. If you have a big object to save, use \code{format = "qs2"} to save using the -\strong{qs2} package (with its fast preset). +\strong{qs2} package. See the "Saving fitted model objects" section of the \href{https://mc-stan.org/cmdstanr/articles/cmdstanr.html}{\emph{Getting started with CmdStanR}} diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index a6775849a..a5a4fe42e 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -482,8 +482,7 @@ fit2 <- readRDS("fit.RDS") But if your model object is large, then [`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) could take a long time when saving in the default RDS format. For large objects, -use the much faster [`qs2`](https://github.com/traversc/qs2) format, which uses -the fast preset. +use the much faster [`qs2`](https://github.com/traversc/qs2) format. ```{r save_object_qs_full, eval = FALSE} # Save the object to a file using qs2. @@ -502,7 +501,7 @@ sampler diagnostics, user-specified initial values, and profiling data. fit$draws() # Save the object to a file. -qs2::qs_save(x = fit, file = "fit.qs2", preset = "fast") +qs2::qs_save(x = fit, file = "fit.qs2") # Read the object. fit2 <- qs2::qs_read("fit.qs2") From e33f4692cb304d520d89f6dfc0920a71fe5c16f6 Mon Sep 17 00:00:00 2001 From: VisruthSK <67435125+VisruthSK@users.noreply.github.com> Date: Thu, 25 Dec 2025 22:50:45 -0800 Subject: [PATCH 4/6] Fixed two more typos --- R/fit.R | 2 +- vignettes/cmdstanr.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/fit.R b/R/fit.R index fe8f2af7b..703bc5ef9 100644 --- a/R/fit.R +++ b/R/fit.R @@ -148,7 +148,7 @@ save_object <- function(file, format = c("rds", "qs2"), ...) { if (!requireNamespace("qs2", quietly = TRUE)) { stop("The 'qs2' package is required for format = \"qs2\".", call. = FALSE) } - qs2::qs_save(x = self, file = file, ...) + qs2::qs_save(self, file = file, ...) } invisible(self) } diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index a5a4fe42e..c59b52df7 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -501,7 +501,7 @@ sampler diagnostics, user-specified initial values, and profiling data. fit$draws() # Save the object to a file. -qs2::qs_save(x = fit, file = "fit.qs2") +qs2::qs_save(fit, file = "fit.qs2") # Read the object. fit2 <- qs2::qs_read("fit.qs2") From 223e85dfe055167261a5cf5509f23799aae6461e Mon Sep 17 00:00:00 2001 From: VisruthSK <67435125+VisruthSK@users.noreply.github.com> Date: Fri, 26 Dec 2025 21:11:35 -0800 Subject: [PATCH 5/6] Fixing more hallucinations --- tests/testthat/test-fit-shared.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-fit-shared.R b/tests/testthat/test-fit-shared.R index 024ef3a8a..47538ca0b 100644 --- a/tests/testthat/test-fit-shared.R +++ b/tests/testthat/test-fit-shared.R @@ -183,7 +183,7 @@ test_that("save_object() method works with qs2 format", { fit <- fits[["sample"]] temp_qs_file <- tempfile(fileext = ".qs2") fit$save_object(temp_qs_file, format = "qs2") - fit2 <- qs2::qread(temp_qs_file) + fit2 <- qs2::qs_read(temp_qs_file) expect_identical(fit2$summary(), fit$summary()) expect_identical(fit2$return_codes(), fit$return_codes()) }) From 1d4bb91d3f83f11e2ddc11f3b0634da28f2d43ea Mon Sep 17 00:00:00 2001 From: jgabry Date: Fri, 2 Jan 2026 10:45:36 -0700 Subject: [PATCH 6/6] remove outdated section of vignette --- vignettes/cmdstanr.Rmd | 46 ++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index c59b52df7..c52765578 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -432,9 +432,9 @@ fit_pf$print("theta") Let's extract the draws, make the same plot we made after running the other -algorithms, and compare them all. approximation, and compare them all. In this -simple example the distributions are quite similar, but this will not always be -the case for more challenging problems. +algorithms, and compare them all. In this simple example the distributions are +quite similar, but this will not always be the case for more challenging +problems. ```{r plot-compare-pf, message = FALSE} mcmc_hist(fit_pf$draws("theta"), binwidth = 0.025) + @@ -469,46 +469,29 @@ For more details on the `$optimize()`, `$laplace()`, `$variational()`, and ## Saving fitted model objects The [`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -method provided by CmdStanR is the most convenient way to save a fitted model object -to disk and ensure that all of the contents are available when reading the object back into R. +method provided by CmdStanR is the most convenient way to save a fitted model +object to disk and ensure that all of the contents are available when reading +the object back into R. By default, `fit$save_object()` will use the `RDS` +format to save the object. The saved object can then be read back into R using +`readRDS()`. ```{r save_object, eval=FALSE} fit$save_object(file = "fit.RDS") -# can be read back in using readRDS fit2 <- readRDS("fit.RDS") ``` -But if your model object is large, then -[`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -could take a long time when saving in the default RDS format. For large objects, -use the much faster [`qs2`](https://github.com/traversc/qs2) format. +But if your model object is large, then `fit$save_object()` can take a long time +if saving in the default RDS format. For large objects, we recommend using the +much faster [`qs2`](https://github.com/traversc/qs2) format. The saved object +can then be read back into R using `qs2::qs_read()`. ```{r save_object_qs_full, eval = FALSE} -# Save the object to a file using qs2. fit$save_object(file = "fit.qs2", format = "qs2") -# Read the object. fit2 <- qs2::qs_read("fit.qs2") ``` -Storage is even faster if you discard results you do not need to save. -The following example saves only posterior draws and discards -sampler diagnostics, user-specified initial values, and profiling data. - -```{r save_object_qs_small, eval = FALSE} -# Load posterior draws into the fitted model object and omit other output. -fit$draws() - -# Save the object to a file. -qs2::qs_save(fit, file = "fit.qs2") - -# Read the object. -fit2 <- qs2::qs_read("fit.qs2") -``` - -See the vignette [_How does CmdStanR work?_](http://mc-stan.org/cmdstanr/articles/cmdstanr-internals.html) -for more information about the composition of CmdStanR objects. ## Comparison with RStan @@ -526,7 +509,8 @@ To ask a question please post on the Stan forums: * https://discourse.mc-stan.org/ -To report a bug, suggest a feature (including additions to these vignettes), or to start contributing to CmdStanR -development (new contributors welcome!) please open an issue on GitHub: +To report a bug, suggest a feature (including additions to these vignettes), or +to start contributing to CmdStanR development (new contributors welcome!) please +open an issue on GitHub: * https://github.com/stan-dev/cmdstanr/issues