diff --git a/DESCRIPTION b/DESCRIPTION index 78bc2fa3a..3280baa04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,6 +52,7 @@ Suggests: ggplot2, knitr (>= 1.37), loo (>= 2.0.0), + qs2, rmarkdown, testthat (>= 2.1.0), Rcpp diff --git a/R/fit.R b/R/fit.R index 58005ca63..703bc5ef9 100644 --- a/R/fit.R +++ b/R/fit.R @@ -108,12 +108,19 @@ CmdStanFit <- R6::R6Class( #' read into R lazily (i.e., as needed), the `$save_object()` method is the #' safest way to guarantee that everything has been read in before saving. #' +#' If you have a big object to save, use `format = "qs2"` to save using the +#' **qs2** package. +#' #' See the "Saving fitted model objects" section of the #' [_Getting started with CmdStanR_](https://mc-stan.org/cmdstanr/articles/cmdstanr.html) #' vignette for some suggestions on faster model saving for large models. #' #' @param file (string) Path where the file should be saved. -#' @param ... Other arguments to pass to [base::saveRDS()] besides `object` and `file`. +#' @param format (string) Serialization format for the object. The default is +#' `"rds"`. The `"qs2"` format uses `qs2::qs_save()` and requires the **qs2** +#' package. +#' @param ... Other arguments to pass to [base::saveRDS()] (for `format = "rds"`) +#' or `qs2::qs_save()` (for `format = "qs2"`). #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -129,12 +136,20 @@ CmdStanFit <- R6::R6Class( #' fit$summary() #' } #' -save_object <- function(file, ...) { +save_object <- function(file, format = c("rds", "qs2"), ...) { self$draws() try(self$sampler_diagnostics(), silent = TRUE) try(self$init(), silent = TRUE) try(self$profiles(), silent = TRUE) - saveRDS(self, file = file, ...) + format <- match.arg(format) + if (format == "rds") { + saveRDS(self, file = file, ...) + } else { + if (!requireNamespace("qs2", quietly = TRUE)) { + stop("The 'qs2' package is required for format = \"qs2\".", call. = FALSE) + } + qs2::qs_save(self, file = file, ...) + } invisible(self) } CmdStanFit$set("public", name = "save_object", value = save_object) diff --git a/man/fit-method-save_object.Rd b/man/fit-method-save_object.Rd index 6b1999fb9..080f46eb6 100644 --- a/man/fit-method-save_object.Rd +++ b/man/fit-method-save_object.Rd @@ -5,12 +5,17 @@ \alias{save_object} \title{Save fitted model object to a file} \usage{ -save_object(file, ...) +save_object(file, format = c("rds", "qs2"), ...) } \arguments{ \item{file}{(string) Path where the file should be saved.} -\item{...}{Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} besides \code{object} and \code{file}.} +\item{format}{(string) Serialization format for the object. The default is +\code{"rds"}. The \code{"qs2"} format uses \code{qs2::qs_save()} and requires the \strong{qs2} +package.} + +\item{...}{Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} (for \code{format = "rds"}) +or \code{qs2::qs_save()} (for \code{format = "qs2"}).} } \description{ This method is a wrapper around \code{\link[base:readRDS]{base::saveRDS()}} that ensures @@ -19,6 +24,9 @@ model object. Because the contents of the CmdStan output CSV files are only read into R lazily (i.e., as needed), the \verb{$save_object()} method is the safest way to guarantee that everything has been read in before saving. +If you have a big object to save, use \code{format = "qs2"} to save using the +\strong{qs2} package. + See the "Saving fitted model objects" section of the \href{https://mc-stan.org/cmdstanr/articles/cmdstanr.html}{\emph{Getting started with CmdStanR}} vignette for some suggestions on faster model saving for large models. diff --git a/tests/testthat/test-fit-shared.R b/tests/testthat/test-fit-shared.R index 0f98e64ac..47538ca0b 100644 --- a/tests/testthat/test-fit-shared.R +++ b/tests/testthat/test-fit-shared.R @@ -178,6 +178,16 @@ test_that("save_object() method works", { expect_identical(fit$summary(), s) }) +test_that("save_object() method works with qs2 format", { + skip_if_not_installed("qs2") + fit <- fits[["sample"]] + temp_qs_file <- tempfile(fileext = ".qs2") + fit$save_object(temp_qs_file, format = "qs2") + fit2 <- qs2::qs_read(temp_qs_file) + expect_identical(fit2$summary(), fit$summary()) + expect_identical(fit2$return_codes(), fit$return_codes()) +}) + test_that("save_object() method works with profiles", { mod <- testing_model("logistic_profiling") utils::capture.output( diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index 8d9e30468..c52765578 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -432,9 +432,9 @@ fit_pf$print("theta") Let's extract the draws, make the same plot we made after running the other -algorithms, and compare them all. approximation, and compare them all. In this -simple example the distributions are quite similar, but this will not always be -the case for more challenging problems. +algorithms, and compare them all. In this simple example the distributions are +quite similar, but this will not always be the case for more challenging +problems. ```{r plot-compare-pf, message = FALSE} mcmc_hist(fit_pf$draws("theta"), binwidth = 0.025) + @@ -469,57 +469,29 @@ For more details on the `$optimize()`, `$laplace()`, `$variational()`, and ## Saving fitted model objects The [`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -method provided by CmdStanR is the most convenient way to save a fitted model object -to disk and ensure that all of the contents are available when reading the object back into R. +method provided by CmdStanR is the most convenient way to save a fitted model +object to disk and ensure that all of the contents are available when reading +the object back into R. By default, `fit$save_object()` will use the `RDS` +format to save the object. The saved object can then be read back into R using +`readRDS()`. ```{r save_object, eval=FALSE} fit$save_object(file = "fit.RDS") -# can be read back in using readRDS fit2 <- readRDS("fit.RDS") ``` -But if your model object is large, then -[`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -could take a long time. -[`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -reads the CmdStan results files into memory, stores them in the model object, -and saves the object with `saveRDS()`. To speed up the process, you can emulate -[`$save_object()`](http://mc-stan.org/cmdstanr/reference/fit-method-save_object.html) -and replace `saveRDS` with the much faster `qsave()` function from the -[`qs`](https://github.com/traversc/qs) package. +But if your model object is large, then `fit$save_object()` can take a long time +if saving in the default RDS format. For large objects, we recommend using the +much faster [`qs2`](https://github.com/traversc/qs2) format. The saved object +can then be read back into R using `qs2::qs_read()`. ```{r save_object_qs_full, eval = FALSE} -# Load CmdStan output files into the fitted model object. -fit$draws() # Load posterior draws into the object. -try(fit$sampler_diagnostics(), silent = TRUE) # Load sampler diagnostics. -try(fit$init(), silent = TRUE) # Load user-defined initial values. -try(fit$profiles(), silent = TRUE) # Load profiling samples. +fit$save_object(file = "fit.qs2", format = "qs2") -# Save the object to a file. -qs::qsave(x = fit, file = "fit.qs") - -# Read the object. -fit2 <- qs::qread("fit.qs") -``` - -Storage is even faster if you discard results you do not need to save. -The following example saves only posterior draws and discards -sampler diagnostics, user-specified initial values, and profiling data. - -```{r save_object_qs_small, eval = FALSE} -# Load posterior draws into the fitted model object and omit other output. -fit$draws() - -# Save the object to a file. -qs::qsave(x = fit, file = "fit.qs") - -# Read the object. -fit2 <- qs::qread("fit.qs") +fit2 <- qs2::qs_read("fit.qs2") ``` -See the vignette [_How does CmdStanR work?_](http://mc-stan.org/cmdstanr/articles/cmdstanr-internals.html) -for more information about the composition of CmdStanR objects. ## Comparison with RStan @@ -537,7 +509,8 @@ To ask a question please post on the Stan forums: * https://discourse.mc-stan.org/ -To report a bug, suggest a feature (including additions to these vignettes), or to start contributing to CmdStanR -development (new contributors welcome!) please open an issue on GitHub: +To report a bug, suggest a feature (including additions to these vignettes), or +to start contributing to CmdStanR development (new contributors welcome!) please +open an issue on GitHub: * https://github.com/stan-dev/cmdstanr/issues