From d7ebe5fdd3374834ab10b1ffa9833643a1c67e1e Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 10 Jun 2025 12:20:26 -0700 Subject: [PATCH 1/4] irwls weights now always sum to nobs --- DESCRIPTION | 2 +- R/sgl_irwls.R | 2 +- R/sparsegl.R | 3 ++- man/cv.sparsegl.Rd | 3 ++- man/sparsegl-package.Rd | 2 +- man/sparsegl.Rd | 3 ++- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7a9bc01..04655e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ Authors@R: c( person("James", "Yang", role = "ctb") ) Description: Efficient implementation of sparse group lasso with optional - bound constraints on the coefficients; see + bound constraints on the coefficients; see Liang, et al., (2024) . It supports the use of a sparse design matrix as well as returning coefficient estimates in a sparse matrix. Furthermore, it correctly calculates the degrees of freedom to allow diff --git a/R/sgl_irwls.R b/R/sgl_irwls.R index b410986..5104468 100644 --- a/R/sgl_irwls.R +++ b/R/sgl_irwls.R @@ -52,7 +52,7 @@ sgl_irwls <- function( if (is.null(weights)) { weights <- rep(1, nobs) } else { - weights <- weights / sum(weights) + weights <- weights / sum(weights) * nobs } etastart <- 0 mustart <- NULL diff --git a/R/sparsegl.R b/R/sparsegl.R index 36e6878..f60d3dc 100644 --- a/R/sparsegl.R +++ b/R/sparsegl.R @@ -84,7 +84,8 @@ #' or of length the number of groups. Must be non-negative numbers only. #' Default value for each entry is `Inf`. #' @param weights Double vector. Optional observation weights. These can -#' only be used with a [stats::family()] object. +#' only be used with a [stats::family()] object. Internally coerced to sum +#' to the number of observations. #' @param offset Double vector. Optional offset (constant predictor without a #' corresponding coefficient). These can only be used with a #' [stats::family()] object. diff --git a/man/cv.sparsegl.Rd b/man/cv.sparsegl.Rd index 35df03d..0ced9f8 100644 --- a/man/cv.sparsegl.Rd +++ b/man/cv.sparsegl.Rd @@ -74,7 +74,8 @@ identifying which fold each observation is in. If supplied, \code{nfolds} can be missing.} \item{weights}{Double vector. Optional observation weights. These can -only be used with a \code{\link[stats:family]{stats::family()}} object.} +only be used with a \code{\link[stats:family]{stats::family()}} object. Internally coerced to sum +to the number of observations.} \item{offset}{Double vector. Optional offset (constant predictor without a corresponding coefficient). These can only be used with a diff --git a/man/sparsegl-package.Rd b/man/sparsegl-package.Rd index 1957a20..a169db7 100644 --- a/man/sparsegl-package.Rd +++ b/man/sparsegl-package.Rd @@ -7,7 +7,7 @@ \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -Efficient implementation of sparse group lasso with optional bound constraints on the coefficients; see \doi{10.18637/jss.v110.i06}. It supports the use of a sparse design matrix as well as returning coefficient estimates in a sparse matrix. Furthermore, it correctly calculates the degrees of freedom to allow for information criteria rather than cross-validation with very large data. Finally, the interface to compiled code avoids unnecessary copies and allows for the use of long integers. +Efficient implementation of sparse group lasso with optional bound constraints on the coefficients; see Liang, et al., (2024) \doi{10.18637/jss.v110.i06}. It supports the use of a sparse design matrix as well as returning coefficient estimates in a sparse matrix. Furthermore, it correctly calculates the degrees of freedom to allow for information criteria rather than cross-validation with very large data. Finally, the interface to compiled code avoids unnecessary copies and allows for the use of long integers. } \references{ Liang, X., Cohen, A., Sólon Heinsfeld, A., Pestilli, F., and diff --git a/man/sparsegl.Rd b/man/sparsegl.Rd index 578e9fd..1352329 100644 --- a/man/sparsegl.Rd +++ b/man/sparsegl.Rd @@ -113,7 +113,8 @@ or of length the number of groups. Must be non-negative numbers only. Default value for each entry is \code{Inf}.} \item{weights}{Double vector. Optional observation weights. These can -only be used with a \code{\link[stats:family]{stats::family()}} object.} +only be used with a \code{\link[stats:family]{stats::family()}} object. Internally coerced to sum +to the number of observations.} \item{offset}{Double vector. Optional offset (constant predictor without a corresponding coefficient). These can only be used with a From 359833a1b8ba2cc5b4ac65a3d772744ec49945a0 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 10 Jun 2025 12:21:26 -0700 Subject: [PATCH 2/4] bump news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 4b12c3e..10fd22a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # sparsegl (development version) +* Force `weights` to sum to `nobs` for all IRWLS cases. + # sparsegl 1.1.1 * Add CITATION and links to JSS article From 9d0dc103d4c2c20db1e8cc8b6646e2fecd9c2f00 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 10 Jun 2025 13:50:56 -0700 Subject: [PATCH 3/4] modify documentation --- DESCRIPTION | 2 +- LICENSE | 2 +- LICENSE.md | 2 +- R/sparsegl-package.R | 6 +++--- man/sparsegl-package.Rd | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 04655e7..8dc4fe3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Package: sparsegl Title: Sparse Group Lasso Version: 1.1.1.9000 Authors@R: c( - person("Daniel J.", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")), + person("Daniel J.", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre", "cph")), person("Xiaoxuan", "Liang", , "xiaoxuan.liang@stat.ubc.ca", role = "aut"), person("Anibal", "Solón Heinsfeld", , "anibalsolon@gmail.com", role = "aut"), person("Aaron", "Cohen", , "cohenaa@indiana.edu", role = "aut"), diff --git a/LICENSE b/LICENSE index ffadc7e..cf5b4aa 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2024 +YEAR: 2025 COPYRIGHT HOLDER: sparsegl authors diff --git a/LICENSE.md b/LICENSE.md index 97d2634..77c31ad 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2024 sparsegl authors +Copyright (c) 2025 sparsegl authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/R/sparsegl-package.R b/R/sparsegl-package.R index 7ea76d9..25f46a4 100644 --- a/R/sparsegl-package.R +++ b/R/sparsegl-package.R @@ -11,7 +11,7 @@ #' @keywords internal #' @references Liang, X., Cohen, A., Sólon Heinsfeld, A., Pestilli, F., and #' McDonald, D.J. 2024. -#' \emph{sparsegl: An `R` Package for Estimating Sparse Group Lasso.} -#' Journal of Statistical Software, Vol. 110(6): 1–23. -#' \doi{10.18637/jss.v110.i06}. +#' "sparsegl: An `R` Package for Estimating Sparse Group Lasso." +#' _Journal of Statistical Software_, **110**(6): 1–23. +#' [doi:10.18637/jss.v110.i06](https://doi.org/10.18637/jss.v110.i06). "_PACKAGE" diff --git a/man/sparsegl-package.Rd b/man/sparsegl-package.Rd index a169db7..b9509fc 100644 --- a/man/sparsegl-package.Rd +++ b/man/sparsegl-package.Rd @@ -12,9 +12,9 @@ Efficient implementation of sparse group lasso with optional bound constraints o \references{ Liang, X., Cohen, A., Sólon Heinsfeld, A., Pestilli, F., and McDonald, D.J. 2024. -\emph{sparsegl: An \code{R} Package for Estimating Sparse Group Lasso.} -Journal of Statistical Software, Vol. 110(6): 1–23. -\doi{10.18637/jss.v110.i06}. +"sparsegl: An \code{R} Package for Estimating Sparse Group Lasso." +\emph{Journal of Statistical Software}, \strong{110}(6): 1–23. +\href{https://doi.org/10.18637/jss.v110.i06}{doi:10.18637/jss.v110.i06}. } \seealso{ Useful links: @@ -26,7 +26,7 @@ Useful links: } \author{ -\strong{Maintainer}: Daniel J. McDonald \email{daniel@stat.ubc.ca} +\strong{Maintainer}: Daniel J. McDonald \email{daniel@stat.ubc.ca} [copyright holder] Authors: \itemize{ From 4655a0b5947a5c07ec19c1b095a4a1fca545cb7c Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 10 Jun 2025 14:19:44 -0700 Subject: [PATCH 4/4] rm magrittr pipe --- DESCRIPTION | 1 - NAMESPACE | 2 -- NEWS.md | 1 + R/data.R | 8 ++++---- R/plot.cv.sparsegl.R | 3 +-- R/plot.sparsegl.R | 3 +-- R/utils-pipe.R | 14 -------------- data-raw/trust-experts.R | 28 ++++++++++++++-------------- man/pipe.Rd | 20 -------------------- man/trust_experts.Rd | 8 ++++---- vignettes/sparsegl.Rmd | 8 ++++---- 11 files changed, 29 insertions(+), 67 deletions(-) delete mode 100644 R/utils-pipe.R delete mode 100644 man/pipe.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 8dc4fe3..918aec4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,6 @@ Imports: cli, dotCall64, ggplot2, - magrittr, Matrix, methods, rlang, diff --git a/NAMESPACE b/NAMESPACE index b661ddf..6431422 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,7 +22,6 @@ S3method(print,summary.cvsparsegl) S3method(print,summary.sparsegl) S3method(summary,cv.sparsegl) S3method(summary,sparsegl) -export("%>%") export(cv.sparsegl) export(estimate_risk) export(gr_one_norm) @@ -45,7 +44,6 @@ importFrom(dotCall64,.C64) importFrom(dotCall64,integer_dc) importFrom(dotCall64,numeric_dc) importFrom(dotCall64,vector_dc) -importFrom(magrittr,"%>%") importFrom(methods,as) importFrom(methods,cbind2) importFrom(methods,rbind2) diff --git a/NEWS.md b/NEWS.md index 10fd22a..d68ac0e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # sparsegl (development version) * Force `weights` to sum to `nobs` for all IRWLS cases. +* Remove `magrittr` from imports # sparsegl 1.1.1 diff --git a/R/data.R b/R/data.R index 2b566b6..2e720db 100644 --- a/R/data.R +++ b/R/data.R @@ -46,13 +46,13 @@ #' \dontrun{ #' library(splines) #' library(dplyr) -#' library(magrittr) #' df <- 10 #' -#' trust_experts <- trust_experts %>% -#' mutate(across( +#' trust_experts <- mutate( +#' trust_experts, +#' across( #' where(is.factor), -#' ~ set_attr(.x, "contrasts", contr.sum(nlevels(.x), FALSE, TRUE)) +#' ~ rlang::set_attrs(.x, "contrasts", contr.sum(nlevels(.x), FALSE, TRUE)) #' )) #' #' x <- Matrix::sparse.model.matrix( diff --git a/R/plot.cv.sparsegl.R b/R/plot.cv.sparsegl.R index 6b3b32f..c7e546e 100644 --- a/R/plot.cv.sparsegl.R +++ b/R/plot.cv.sparsegl.R @@ -36,8 +36,7 @@ plot.cv.sparsegl <- function(x, log_axis = c("xy", "x", "y", "none"), ) log_axis <- match.arg(log_axis) sign.lambda <- sign(sign.lambda) - g <- dat %>% - ggplot2::ggplot(ggplot2::aes(x = .data$X, y = .data$y)) + + g <- ggplot2::ggplot(dat, ggplot2::aes(x = .data$X, y = .data$y)) + ggplot2::geom_errorbar( ggplot2::aes(ymin = .data$lower, ymax = .data$upper), color = "darkgrey" diff --git a/R/plot.sparsegl.R b/R/plot.sparsegl.R index b803f9d..0b0376e 100644 --- a/R/plot.sparsegl.R +++ b/R/plot.sparsegl.R @@ -60,8 +60,7 @@ plot.sparsegl <- function(x, df$lambda <- x$lambda df$penalty <- sgnorm / max(sgnorm) - df <- df %>% - tidyr::pivot_longer(!c(.data$lambda, .data$penalty), names_to = y_axis) + df <- tidyr::pivot_longer(df, !c(.data$lambda, .data$penalty), names_to = y_axis) plot_layer <- ggplot2::ggplot( df, ggplot2::aes( diff --git a/R/utils-pipe.R b/R/utils-pipe.R deleted file mode 100644 index fd0b1d1..0000000 --- a/R/utils-pipe.R +++ /dev/null @@ -1,14 +0,0 @@ -#' Pipe operator -#' -#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -#' -#' @name %>% -#' @rdname pipe -#' @keywords internal -#' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -#' @param lhs A value or the magrittr placeholder. -#' @param rhs A function call using the magrittr semantics. -#' @return The result of calling `rhs(lhs)`. -NULL diff --git a/data-raw/trust-experts.R b/data-raw/trust-experts.R index 7425b64..8c56f4d 100644 --- a/data-raw/trust-experts.R +++ b/data-raw/trust-experts.R @@ -2,44 +2,44 @@ library(tidyverse) ## Downloaded on 18 November 2022 covidcast_url <- "https://www.cmu.edu/delphi-web/surveys/monthly-rollup/monthly_state_all_indicators_age_gender_raceethnicity.csv.gz" symp <- read_csv(covidcast_url) -symp <- symp %>% +symp <- symp |> select(period_start, region, age, gender, raceethnicity, starts_with("val_")) -trust_experts <- symp %>% +trust_experts <- symp |> select( period_start, region, age, gender, raceethnicity, contains("val_pct_trust_covid_info"), val_pct_cli, val_pct_hh_cmnty_cli, val_pct_wearing_mask_5d, val_pct_wearing_mask_7d - ) %>% - mutate(period = lubridate::ymd(period_start)) %>% - filter(period > lubridate::ymd("2021-05-01")) %>% # remove pre-survey period - select(-period_start) %>% + ) |> + mutate(period = lubridate::ymd(period_start)) |> + filter(period > lubridate::ymd("2021-05-01")) |> # remove pre-survey period + select(-period_start) |> mutate( age = str_c(replace_na(age, "NotReported")), gender = str_c(replace_na(gender, "NotReported")), raceethnicity = str_c(replace_na(raceethnicity, "NotReported")) - ) %>% + ) |> rename_with(~ str_remove(.x, "val_pct_"), starts_with("val_pct")) -trust_experts <- trust_experts %>% - select(!(trust_covid_info_politicians:trust_covid_info_religious)) %>% +trust_experts <- trust_experts |> + select(!(trust_covid_info_politicians:trust_covid_info_religious)) |> mutate( region = as.factor(region), age = as.factor(age), gender = as.factor(gender), raceethnicity = as.factor(raceethnicity), period = as.factor(period) - ) %>% - rowwise() %>% + ) |> + rowwise() |> mutate( trust_experts = mean(c_across(starts_with("trust_covid")), na.rm = TRUE), masking = mean(c_across(starts_with("wearing_mask")), na.rm = TRUE) - ) %>% - select(!starts_with("trust_covid") & !starts_with("wearing_mask")) %>% - select(!masking) %>% + ) |> + select(!starts_with("trust_covid") & !starts_with("wearing_mask")) |> + select(!masking) |> ungroup() cc <- complete.cases(trust_experts) diff --git a/man/pipe.Rd b/man/pipe.Rd deleted file mode 100644 index a648c29..0000000 --- a/man/pipe.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-pipe.R -\name{\%>\%} -\alias{\%>\%} -\title{Pipe operator} -\usage{ -lhs \%>\% rhs -} -\arguments{ -\item{lhs}{A value or the magrittr placeholder.} - -\item{rhs}{A function call using the magrittr semantics.} -} -\value{ -The result of calling \code{rhs(lhs)}. -} -\description{ -See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -} -\keyword{internal} diff --git a/man/trust_experts.Rd b/man/trust_experts.Rd index c796a62..44a8043 100644 --- a/man/trust_experts.Rd +++ b/man/trust_experts.Rd @@ -57,13 +57,13 @@ on 05 February 2022. \dontrun{ library(splines) library(dplyr) -library(magrittr) df <- 10 -trust_experts <- trust_experts \%>\% - mutate(across( +trust_experts <- mutate( + trust_experts, + across( where(is.factor), - ~ set_attr(.x, "contrasts", contr.sum(nlevels(.x), FALSE, TRUE)) + ~ rlang::set_attrs(.x, "contrasts", contr.sum(nlevels(.x), FALSE, TRUE)) )) x <- Matrix::sparse.model.matrix( diff --git a/vignettes/sparsegl.Rmd b/vignettes/sparsegl.Rmd index dcd47f7..06b2256 100644 --- a/vignettes/sparsegl.Rmd +++ b/vignettes/sparsegl.Rmd @@ -270,11 +270,11 @@ risk <- estimate_risk(fit1, X, approx_df = FALSE) library(dplyr) library(tidyr) library(ggplot2) -er <- risk %>% - dplyr::select(-df) %>% +er <- risk |> + dplyr::select(-df) |> pivot_longer(-lambda, values_to = "risk") -err <- er %>% - group_by(name) %>% +err <- er |> + group_by(name) |> summarise(lambda = lambda[which.min(risk)]) ggplot(er, aes(lambda, risk, color = name)) + geom_line() +