From 9dfdb608b2d1c4aaecfedac8467dd5833f055f25 Mon Sep 17 00:00:00 2001 From: John Ehrlinger Date: Thu, 3 Jul 2025 08:25:11 -0400 Subject: [PATCH 1/3] Add a partial.rfsrc function --- NAMESPACE | 2 + R/surv_partial.rfsrc.R | 123 ++++++++++++++++++++++++++++++++++++++ man/surv_partial.rfsrc.Rd | 108 +++++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+) create mode 100644 R/surv_partial.rfsrc.R create mode 100644 man/surv_partial.rfsrc.Rd diff --git a/NAMESPACE b/NAMESPACE index b7434d2..65ebfa0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(kaplan) export(nelson) export(quantile_pts) export(r_data_types) +export(surv_partial.rfsrc) export(varpro_feature_name) importFrom(dplyr,across) importFrom(dplyr,mutate) @@ -49,6 +50,7 @@ importFrom(ggplot2,labs) importFrom(ggplot2,theme) importFrom(parallel,mclapply) importFrom(randomForest,randomForest) +importFrom(randomForestSRC,partial.rfsrc) importFrom(randomForestSRC,vimp) importFrom(stats,median) importFrom(stats,na.omit) diff --git a/R/surv_partial.rfsrc.R b/R/surv_partial.rfsrc.R new file mode 100644 index 0000000..ab7b5c7 --- /dev/null +++ b/R/surv_partial.rfsrc.R @@ -0,0 +1,123 @@ +#' Calculate survival curve partial plot. +#' +#' @param rforest the randomForestSrc object +#' @param var_list a list of variables of interest. These variables should be a +#' subset of rforest$xvar.names +#' @param npts the number of points to segment the xvar of interest +#' @param partial.type the return prediction type. +#' For survival forests: type c("surv", "mort", "chf") +#' For competing risk forests: type c("years.lost", "cif", "chf") +#' see \code{randomForestSRC::partial.rfsrc} or more information +#' +#' @importFrom randomForestSRC partial.rfsrc +#' @examples +#' ## ------------------------------------------------------------ +#' ## survival +#' ## ------------------------------------------------------------ +#' +#' data(veteran, package = "randomForestSRC") +#' v.obj <- randomForestSRC::rfsrc(Surv(time,status)~., +#' veteran, nsplit = 10, ntree = 100) +#' +#' spart <- surv_partial.rfsrc(v.obj, var_list="age", partial.type = "mort") +#' +#' ## partial effect of age on mortality +#' partial.obj <- partial(v.obj, +#' partial.type = "mort", +#' partial.xvar = "age", +#' partial.values = v.obj$xvar$age, +#' partial.time = v.obj$time.interest) +#' pdta <- get.partial.plot.data(partial.obj) +#' +#' plot(lowess(pdta$x, pdta$yhat, f = 1/3), +#' type = "l", xlab = "age", ylab = "adjusted mortality") +#' +#' ## example where x is discrete - partial effect of age on mortality +#' ## we use the granule=TRUE option +#' partial.obj <- partial(v.obj, +#' partial.type = "mort", +#' partial.xvar = "trt", +#' partial.values = v.obj$xvar$trt, +#' partial.time = v.obj$time.interest) +#' pdta <- get.partial.plot.data(partial.obj, granule = TRUE) +#' boxplot(pdta$yhat ~ pdta$x, xlab = "treatment", ylab = "partial effect") +#' +#' +#' ## partial effects of karnofsky score on survival +#' karno <- quantile(v.obj$xvar$karno) +#' partial.obj <- partial(v.obj, +#' partial.type = "surv", +#' partial.xvar = "karno", +#' partial.values = karno, +#' partial.time = v.obj$time.interest) +#' pdta <- get.partial.plot.data(partial.obj) +#' +#' matplot(pdta$partial.time, t(pdta$yhat), type = "l", lty = 1, +#' xlab = "time", ylab = "karnofsky adjusted survival") +#' legend("topright", legend = paste0("karnofsky = ", karno), fill = 1:5) +#' +#' +#' ## ------------------------------------------------------------ +#' ## competing risk +#' ## ------------------------------------------------------------ +#' +#' data(follic, package = "randomForestSRC") +#' follic.obj <- rfsrc(Surv(time, status) ~ ., follic, nsplit = 3, ntree = 100) +#' +#' ## partial effect of age on years lost +#' partial.obj <- partial(follic.obj, +#' partial.type = "years.lost", +#' partial.xvar = "age", +#' partial.values = follic.obj$xvar$age, +#' partial.time = follic.obj$time.interest) +#' pdta1 <- get.partial.plot.data(partial.obj, target = 1) +#' pdta2 <- get.partial.plot.data(partial.obj, target = 2) +#' +#' par(mfrow=c(2,2)) +#' plot(lowess(pdta1$x, pdta1$yhat), +#' type = "l", xlab = "age", ylab = "adjusted years lost relapse") +#' plot(lowess(pdta2$x, pdta2$yhat), +#' type = "l", xlab = "age", ylab = "adjusted years lost death") +#' +#' ## partial effect of age on cif +#' partial.obj <- partial(follic.obj, +#' partial.type = "cif", +#' partial.xvar = "age", +#' partial.values = quantile(follic.obj$xvar$age), +#' partial.time = follic.obj$time.interest) +#' pdta1 <- get.partial.plot.data(partial.obj, target = 1) +#' pdta2 <- get.partial.plot.data(partial.obj, target = 2) +#' +#' matplot(pdta1$partial.time, t(pdta1$yhat), type = "l", lty = 1, +#' xlab = "time", ylab = "age adjusted cif for relapse") +#' matplot(pdta2$partial.time, t(pdta2$yhat), type = "l", lty = 1, +#' xlab = "time", ylab = "age adjusted cif for death") +#' +#' @export surv_partial.rfsrc +surv_partial.rfsrc <- function(rforest, var_list, npts=25, partial.type = "surv") { + ###----------Partial dependency estimation, for each variable, at each time point ---- + surv.lst <- lapply(var_list, function(xvar) { + ## extract the key variable + cat("partial plot for:", xvar, "\n") + + ## determine the partial plot data + xv <- sort(unique(rforest$xvar[, xvar])) + xv <- unique(xv[seq(1, length(xv), length = npts)]) + + ## Get the partial.plot.data + partial.dta <- randomForestSRC::get.partial.plot.data( + randomForestSRC::partial.rfsrc( + rforest, + partial.type = partial.type, + partial.xvar = xvar, + partial.values = xv, + partial.time = rforest$time.interest + ) + ) + + list(name=xvar, + dta = partial.dta) + + }) + return(surv.lst) +} diff --git a/man/surv_partial.rfsrc.Rd b/man/surv_partial.rfsrc.Rd new file mode 100644 index 0000000..87bbf61 --- /dev/null +++ b/man/surv_partial.rfsrc.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/surv_partial.rfsrc.R +\name{surv_partial.rfsrc} +\alias{surv_partial.rfsrc} +\title{Calculate survival curve partial plot.} +\usage{ +surv_partial.rfsrc(rforest, var_list, npts = 25, partial.type = "surv") +} +\arguments{ +\item{rforest}{the randomForestSrc object} + +\item{var_list}{a list of variables of interest. These variables should be a +subset of rforest$xvar.names} + +\item{npts}{the number of points to segment the xvar of interest} + +\item{partial.type}{the return prediction type. +For survival forests: type c("surv", "mort", "chf") +For competing risk forests: type c("years.lost", "cif", "chf") +see \code{randomForestSRC::partial.rfsrc} or more information} +} +\description{ +Calculate survival curve partial plot. +} +\examples{ +## ------------------------------------------------------------ +## survival +## ------------------------------------------------------------ + +data(veteran, package = "randomForestSRC") +v.obj <- randomForestSRC::rfsrc(Surv(time,status)~., + veteran, nsplit = 10, ntree = 100) + +spart <- surv_partial.rfsrc(v.obj, var_list="age", partial.type = "mort") + +## partial effect of age on mortality +partial.obj <- partial(v.obj, + partial.type = "mort", + partial.xvar = "age", + partial.values = v.obj$xvar$age, + partial.time = v.obj$time.interest) +pdta <- get.partial.plot.data(partial.obj) + +plot(lowess(pdta$x, pdta$yhat, f = 1/3), + type = "l", xlab = "age", ylab = "adjusted mortality") + +## example where x is discrete - partial effect of age on mortality +## we use the granule=TRUE option +partial.obj <- partial(v.obj, + partial.type = "mort", + partial.xvar = "trt", + partial.values = v.obj$xvar$trt, + partial.time = v.obj$time.interest) +pdta <- get.partial.plot.data(partial.obj, granule = TRUE) +boxplot(pdta$yhat ~ pdta$x, xlab = "treatment", ylab = "partial effect") + + +## partial effects of karnofsky score on survival +karno <- quantile(v.obj$xvar$karno) +partial.obj <- partial(v.obj, + partial.type = "surv", + partial.xvar = "karno", + partial.values = karno, + partial.time = v.obj$time.interest) +pdta <- get.partial.plot.data(partial.obj) + +matplot(pdta$partial.time, t(pdta$yhat), type = "l", lty = 1, + xlab = "time", ylab = "karnofsky adjusted survival") +legend("topright", legend = paste0("karnofsky = ", karno), fill = 1:5) + + +## ------------------------------------------------------------ +## competing risk +## ------------------------------------------------------------ + +data(follic, package = "randomForestSRC") +follic.obj <- rfsrc(Surv(time, status) ~ ., follic, nsplit = 3, ntree = 100) + +## partial effect of age on years lost +partial.obj <- partial(follic.obj, + partial.type = "years.lost", + partial.xvar = "age", + partial.values = follic.obj$xvar$age, + partial.time = follic.obj$time.interest) +pdta1 <- get.partial.plot.data(partial.obj, target = 1) +pdta2 <- get.partial.plot.data(partial.obj, target = 2) + +par(mfrow=c(2,2)) +plot(lowess(pdta1$x, pdta1$yhat), + type = "l", xlab = "age", ylab = "adjusted years lost relapse") +plot(lowess(pdta2$x, pdta2$yhat), + type = "l", xlab = "age", ylab = "adjusted years lost death") + +## partial effect of age on cif +partial.obj <- partial(follic.obj, + partial.type = "cif", + partial.xvar = "age", + partial.values = quantile(follic.obj$xvar$age), + partial.time = follic.obj$time.interest) +pdta1 <- get.partial.plot.data(partial.obj, target = 1) +pdta2 <- get.partial.plot.data(partial.obj, target = 2) + +matplot(pdta1$partial.time, t(pdta1$yhat), type = "l", lty = 1, + xlab = "time", ylab = "age adjusted cif for relapse") +matplot(pdta2$partial.time, t(pdta2$yhat), type = "l", lty = 1, + xlab = "time", ylab = "age adjusted cif for death") + +} From 443ccc3f36477bafaaa59e33755b413e1b06ace6 Mon Sep 17 00:00:00 2001 From: John Ehrlinger Date: Thu, 17 Jul 2025 14:18:56 -0400 Subject: [PATCH 2/3] rename varpro_features_names --- NAMESPACE | 2 +- R/varpro_feature_names.R | 2 +- man/{varpro_feature_name.Rd => varpro_feature_names.Rd} | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) rename man/{varpro_feature_name.Rd => varpro_feature_names.Rd} (88%) diff --git a/NAMESPACE b/NAMESPACE index 65ebfa0..c3f9871 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,7 +38,7 @@ export(nelson) export(quantile_pts) export(r_data_types) export(surv_partial.rfsrc) -export(varpro_feature_name) +export(varpro_feature_names) importFrom(dplyr,across) importFrom(dplyr,mutate) importFrom(dplyr,n_distinct) diff --git a/R/varpro_feature_names.R b/R/varpro_feature_names.R index 5e85efc..08153fa 100644 --- a/R/varpro_feature_names.R +++ b/R/varpro_feature_names.R @@ -10,7 +10,7 @@ #' #' @importFrom stringr str_sub #' @export -varpro_feature_name <- function(varpro_names, dataset) { +varpro_feature_names <- function(varpro_names, dataset) { inc_set <- varpro_names[which(varpro_names %in% colnames(dataset))] one_set <- varpro_names[which(!varpro_names %in% colnames(dataset))] while (length(one_set) > 0) { diff --git a/man/varpro_feature_name.Rd b/man/varpro_feature_names.Rd similarity index 88% rename from man/varpro_feature_name.Rd rename to man/varpro_feature_names.Rd index e00c8ec..96b95ed 100644 --- a/man/varpro_feature_name.Rd +++ b/man/varpro_feature_names.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/varpro_feature_names.R -\name{varpro_feature_name} -\alias{varpro_feature_name} +\name{varpro_feature_names} +\alias{varpro_feature_names} \title{varpro one hot encodes features, so we need to get the "raw" original variable names. This loops through the variable names not in the original dataset, and cuts one character off the end until we find the variable name in the original data.} \usage{ -varpro_feature_name(varpro_names, dataset) +varpro_feature_names(varpro_names, dataset) } \arguments{ \item{varpro_names}{vector of names output from varpro analysis} From 986c012d395a72dc2fea36bb88da4b9c847eec90 Mon Sep 17 00:00:00 2001 From: John Ehrlinger Date: Thu, 17 Jul 2025 14:21:47 -0400 Subject: [PATCH 3/3] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index affb231..3d71a1c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ggRandomForests Type: Package Title: Visually Exploring Random Forests -Version: 2.4.0 +Version: 2.4.1 Date: 2025-06-17 Authors@R: person("John", "Ehrlinger", role = c("aut", "cre"),