diff --git a/DESCRIPTION b/DESCRIPTION index affb231..3d71a1c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ggRandomForests Type: Package Title: Visually Exploring Random Forests -Version: 2.4.0 +Version: 2.4.1 Date: 2025-06-17 Authors@R: person("John", "Ehrlinger", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index b7434d2..c3f9871 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,7 +37,8 @@ export(kaplan) export(nelson) export(quantile_pts) export(r_data_types) -export(varpro_feature_name) +export(surv_partial.rfsrc) +export(varpro_feature_names) importFrom(dplyr,across) importFrom(dplyr,mutate) importFrom(dplyr,n_distinct) @@ -49,6 +50,7 @@ importFrom(ggplot2,labs) importFrom(ggplot2,theme) importFrom(parallel,mclapply) importFrom(randomForest,randomForest) +importFrom(randomForestSRC,partial.rfsrc) importFrom(randomForestSRC,vimp) importFrom(stats,median) importFrom(stats,na.omit) diff --git a/R/surv_partial.rfsrc.R b/R/surv_partial.rfsrc.R new file mode 100644 index 0000000..ab7b5c7 --- /dev/null +++ b/R/surv_partial.rfsrc.R @@ -0,0 +1,123 @@ +#' Calculate survival curve partial plot. +#' +#' @param rforest the randomForestSrc object +#' @param var_list a list of variables of interest. These variables should be a +#' subset of rforest$xvar.names +#' @param npts the number of points to segment the xvar of interest +#' @param partial.type the return prediction type. +#' For survival forests: type c("surv", "mort", "chf") +#' For competing risk forests: type c("years.lost", "cif", "chf") +#' see \code{randomForestSRC::partial.rfsrc} or more information +#' +#' @importFrom randomForestSRC partial.rfsrc +#' @examples +#' ## ------------------------------------------------------------ +#' ## survival +#' ## ------------------------------------------------------------ +#' +#' data(veteran, package = "randomForestSRC") +#' v.obj <- randomForestSRC::rfsrc(Surv(time,status)~., +#' veteran, nsplit = 10, ntree = 100) +#' +#' spart <- surv_partial.rfsrc(v.obj, var_list="age", partial.type = "mort") +#' +#' ## partial effect of age on mortality +#' partial.obj <- partial(v.obj, +#' partial.type = "mort", +#' partial.xvar = "age", +#' partial.values = v.obj$xvar$age, +#' partial.time = v.obj$time.interest) +#' pdta <- get.partial.plot.data(partial.obj) +#' +#' plot(lowess(pdta$x, pdta$yhat, f = 1/3), +#' type = "l", xlab = "age", ylab = "adjusted mortality") +#' +#' ## example where x is discrete - partial effect of age on mortality +#' ## we use the granule=TRUE option +#' partial.obj <- partial(v.obj, +#' partial.type = "mort", +#' partial.xvar = "trt", +#' partial.values = v.obj$xvar$trt, +#' partial.time = v.obj$time.interest) +#' pdta <- get.partial.plot.data(partial.obj, granule = TRUE) +#' boxplot(pdta$yhat ~ pdta$x, xlab = "treatment", ylab = "partial effect") +#' +#' +#' ## partial effects of karnofsky score on survival +#' karno <- quantile(v.obj$xvar$karno) +#' partial.obj <- partial(v.obj, +#' partial.type = "surv", +#' partial.xvar = "karno", +#' partial.values = karno, +#' partial.time = v.obj$time.interest) +#' pdta <- get.partial.plot.data(partial.obj) +#' +#' matplot(pdta$partial.time, t(pdta$yhat), type = "l", lty = 1, +#' xlab = "time", ylab = "karnofsky adjusted survival") +#' legend("topright", legend = paste0("karnofsky = ", karno), fill = 1:5) +#' +#' +#' ## ------------------------------------------------------------ +#' ## competing risk +#' ## ------------------------------------------------------------ +#' +#' data(follic, package = "randomForestSRC") +#' follic.obj <- rfsrc(Surv(time, status) ~ ., follic, nsplit = 3, ntree = 100) +#' +#' ## partial effect of age on years lost +#' partial.obj <- partial(follic.obj, +#' partial.type = "years.lost", +#' partial.xvar = "age", +#' partial.values = follic.obj$xvar$age, +#' partial.time = follic.obj$time.interest) +#' pdta1 <- get.partial.plot.data(partial.obj, target = 1) +#' pdta2 <- get.partial.plot.data(partial.obj, target = 2) +#' +#' par(mfrow=c(2,2)) +#' plot(lowess(pdta1$x, pdta1$yhat), +#' type = "l", xlab = "age", ylab = "adjusted years lost relapse") +#' plot(lowess(pdta2$x, pdta2$yhat), +#' type = "l", xlab = "age", ylab = "adjusted years lost death") +#' +#' ## partial effect of age on cif +#' partial.obj <- partial(follic.obj, +#' partial.type = "cif", +#' partial.xvar = "age", +#' partial.values = quantile(follic.obj$xvar$age), +#' partial.time = follic.obj$time.interest) +#' pdta1 <- get.partial.plot.data(partial.obj, target = 1) +#' pdta2 <- get.partial.plot.data(partial.obj, target = 2) +#' +#' matplot(pdta1$partial.time, t(pdta1$yhat), type = "l", lty = 1, +#' xlab = "time", ylab = "age adjusted cif for relapse") +#' matplot(pdta2$partial.time, t(pdta2$yhat), type = "l", lty = 1, +#' xlab = "time", ylab = "age adjusted cif for death") +#' +#' @export surv_partial.rfsrc +surv_partial.rfsrc <- function(rforest, var_list, npts=25, partial.type = "surv") { + ###----------Partial dependency estimation, for each variable, at each time point ---- + surv.lst <- lapply(var_list, function(xvar) { + ## extract the key variable + cat("partial plot for:", xvar, "\n") + + ## determine the partial plot data + xv <- sort(unique(rforest$xvar[, xvar])) + xv <- unique(xv[seq(1, length(xv), length = npts)]) + + ## Get the partial.plot.data + partial.dta <- randomForestSRC::get.partial.plot.data( + randomForestSRC::partial.rfsrc( + rforest, + partial.type = partial.type, + partial.xvar = xvar, + partial.values = xv, + partial.time = rforest$time.interest + ) + ) + + list(name=xvar, + dta = partial.dta) + + }) + return(surv.lst) +} diff --git a/R/varpro_feature_names.R b/R/varpro_feature_names.R index 5e85efc..08153fa 100644 --- a/R/varpro_feature_names.R +++ b/R/varpro_feature_names.R @@ -10,7 +10,7 @@ #' #' @importFrom stringr str_sub #' @export -varpro_feature_name <- function(varpro_names, dataset) { +varpro_feature_names <- function(varpro_names, dataset) { inc_set <- varpro_names[which(varpro_names %in% colnames(dataset))] one_set <- varpro_names[which(!varpro_names %in% colnames(dataset))] while (length(one_set) > 0) { diff --git a/man/surv_partial.rfsrc.Rd b/man/surv_partial.rfsrc.Rd new file mode 100644 index 0000000..87bbf61 --- /dev/null +++ b/man/surv_partial.rfsrc.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/surv_partial.rfsrc.R +\name{surv_partial.rfsrc} +\alias{surv_partial.rfsrc} +\title{Calculate survival curve partial plot.} +\usage{ +surv_partial.rfsrc(rforest, var_list, npts = 25, partial.type = "surv") +} +\arguments{ +\item{rforest}{the randomForestSrc object} + +\item{var_list}{a list of variables of interest. These variables should be a +subset of rforest$xvar.names} + +\item{npts}{the number of points to segment the xvar of interest} + +\item{partial.type}{the return prediction type. +For survival forests: type c("surv", "mort", "chf") +For competing risk forests: type c("years.lost", "cif", "chf") +see \code{randomForestSRC::partial.rfsrc} or more information} +} +\description{ +Calculate survival curve partial plot. +} +\examples{ +## ------------------------------------------------------------ +## survival +## ------------------------------------------------------------ + +data(veteran, package = "randomForestSRC") +v.obj <- randomForestSRC::rfsrc(Surv(time,status)~., + veteran, nsplit = 10, ntree = 100) + +spart <- surv_partial.rfsrc(v.obj, var_list="age", partial.type = "mort") + +## partial effect of age on mortality +partial.obj <- partial(v.obj, + partial.type = "mort", + partial.xvar = "age", + partial.values = v.obj$xvar$age, + partial.time = v.obj$time.interest) +pdta <- get.partial.plot.data(partial.obj) + +plot(lowess(pdta$x, pdta$yhat, f = 1/3), + type = "l", xlab = "age", ylab = "adjusted mortality") + +## example where x is discrete - partial effect of age on mortality +## we use the granule=TRUE option +partial.obj <- partial(v.obj, + partial.type = "mort", + partial.xvar = "trt", + partial.values = v.obj$xvar$trt, + partial.time = v.obj$time.interest) +pdta <- get.partial.plot.data(partial.obj, granule = TRUE) +boxplot(pdta$yhat ~ pdta$x, xlab = "treatment", ylab = "partial effect") + + +## partial effects of karnofsky score on survival +karno <- quantile(v.obj$xvar$karno) +partial.obj <- partial(v.obj, + partial.type = "surv", + partial.xvar = "karno", + partial.values = karno, + partial.time = v.obj$time.interest) +pdta <- get.partial.plot.data(partial.obj) + +matplot(pdta$partial.time, t(pdta$yhat), type = "l", lty = 1, + xlab = "time", ylab = "karnofsky adjusted survival") +legend("topright", legend = paste0("karnofsky = ", karno), fill = 1:5) + + +## ------------------------------------------------------------ +## competing risk +## ------------------------------------------------------------ + +data(follic, package = "randomForestSRC") +follic.obj <- rfsrc(Surv(time, status) ~ ., follic, nsplit = 3, ntree = 100) + +## partial effect of age on years lost +partial.obj <- partial(follic.obj, + partial.type = "years.lost", + partial.xvar = "age", + partial.values = follic.obj$xvar$age, + partial.time = follic.obj$time.interest) +pdta1 <- get.partial.plot.data(partial.obj, target = 1) +pdta2 <- get.partial.plot.data(partial.obj, target = 2) + +par(mfrow=c(2,2)) +plot(lowess(pdta1$x, pdta1$yhat), + type = "l", xlab = "age", ylab = "adjusted years lost relapse") +plot(lowess(pdta2$x, pdta2$yhat), + type = "l", xlab = "age", ylab = "adjusted years lost death") + +## partial effect of age on cif +partial.obj <- partial(follic.obj, + partial.type = "cif", + partial.xvar = "age", + partial.values = quantile(follic.obj$xvar$age), + partial.time = follic.obj$time.interest) +pdta1 <- get.partial.plot.data(partial.obj, target = 1) +pdta2 <- get.partial.plot.data(partial.obj, target = 2) + +matplot(pdta1$partial.time, t(pdta1$yhat), type = "l", lty = 1, + xlab = "time", ylab = "age adjusted cif for relapse") +matplot(pdta2$partial.time, t(pdta2$yhat), type = "l", lty = 1, + xlab = "time", ylab = "age adjusted cif for death") + +} diff --git a/man/varpro_feature_name.Rd b/man/varpro_feature_names.Rd similarity index 88% rename from man/varpro_feature_name.Rd rename to man/varpro_feature_names.Rd index e00c8ec..96b95ed 100644 --- a/man/varpro_feature_name.Rd +++ b/man/varpro_feature_names.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/varpro_feature_names.R -\name{varpro_feature_name} -\alias{varpro_feature_name} +\name{varpro_feature_names} +\alias{varpro_feature_names} \title{varpro one hot encodes features, so we need to get the "raw" original variable names. This loops through the variable names not in the original dataset, and cuts one character off the end until we find the variable name in the original data.} \usage{ -varpro_feature_name(varpro_names, dataset) +varpro_feature_names(varpro_names, dataset) } \arguments{ \item{varpro_names}{vector of names output from varpro analysis}