From ae769fcf13b023728c5bd505b24df6bc536763aa Mon Sep 17 00:00:00 2001
From: John Ehrlinger <john.ehrlinger@gmail.com>
Date: Thu, 19 Jun 2025 08:19:32 -0400
Subject: [PATCH 1/3] partial plot

---
 CRAN-SUBMISSION          |  4 ++--
 R/gg_partial_df.R        | 50 ++++++++++++++++++++++++++++++++++++++++
 R/varpro_feature_names.R | 18 +++++++++++++++
 3 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 R/gg_partial_df.R
 create mode 100644 R/varpro_feature_names.R

diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
index 029c987..99f9f73 100644
--- a/CRAN-SUBMISSION
+++ b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
 Version: 2.4.0
-Date: 2025-04-17 13:57:56 UTC
-SHA: 9fa3109a41a9bd0ca095a30527ca34ba233c1e34
+Date: 2025-06-17 20:25:38 UTC
+SHA: 32dc9e0e856f599cdf2dab33ac994973fadb5c2f
diff --git a/R/gg_partial_df.R b/R/gg_partial_df.R
new file mode 100644
index 0000000..68154da
--- /dev/null
+++ b/R/gg_partial_df.R
@@ -0,0 +1,50 @@
+##=============================================================================
+## Split partial lots into continuous or categorical datasets
+df_partial = function(part_dta, nvars = NULL, cat_limit = 10, name=NULL) {
+  ## Prepare the partial dependencies data for panel plots
+  if (is.null(nvars)) {
+    nvars = length(part_dta$plotthis)
+  }
+  
+  cont_list = list()
+  cat_list = list()
+  for (feature in seq(nvars)) {
+    ## Format any continuous features (those with fewer than cat_limit unique values)
+    if (length(unique(part_dta$plotthis[[feature]]$x)) > cat_limit) {
+      plt.df = as.data.frame(
+        cbind(
+          x = part_dta$plotthis[[feature]]$x,
+          yhat = part_dta$plotthis[[feature]]$yhat
+        )
+      )
+      plt.df$name = names(part_dta$plotthis)[[feature]]
+      
+      cont_list[[feature]] <- plt.df
+    } else{
+      ## Categorical features
+      
+      ## Though VarPro works with logical or continuous only. Factors are
+      ## one hot encoded internal to the varPro call.
+      plt.df = as.data.frame(
+        cbind(
+          x = factor(part_dta$plotthis[[feature]]$x),
+          yhat = part_dta$plotthis[[feature]]$yhat
+        )
+      )
+      plt.df$name = names(part_dta$plotthis)[[feature]]
+      
+      cat_list[[feature]] <- plt.df
+    }
+  }
+  continuous = bind_rows(cont_list)
+  categorical = bind_rows(cat_list)
+  
+  if(!is.na(name)){
+    continuous$model <- categorical$model <- name
+  }
+  
+  return(list(
+    continuous = continuous,
+    categorical = categorical
+  ))
+}
\ No newline at end of file
diff --git a/R/varpro_feature_names.R b/R/varpro_feature_names.R
new file mode 100644
index 0000000..18d1769
--- /dev/null
+++ b/R/varpro_feature_names.R
@@ -0,0 +1,18 @@
+
+##=============================================================================
+## varpro one hot encodes features, so we need to get the "raw"
+## original variable names. This loops through the variable names
+## not in the original dataset, and cuts one character off the end
+## until we find the variable name in the original data.
+
+varpro_feature_name <- function(varpro_names, dataset) {
+  inc_set <- varpro_names[which(varpro_names %in% colnames(dataset))]
+  one_set <- varpro_names[which(!varpro_names %in% colnames(dataset))]
+  while (length(one_set) > 0) {
+    orig <- unlist(lapply(one_set, str_sub, 1,-2))
+    inc_set <-
+      union(inc_set, orig[which(orig %in% colnames(dataset))])
+    one_set <- orig[which(!orig %in% colnames(dataset))]
+  }
+  return(inc_set)
+}

From 3c7ffab9588ca975554ea3ab5cb4e1d92dd0ddd9 Mon Sep 17 00:00:00 2001
From: John Ehrlinger <john.ehrlinger@gmail.com>
Date: Thu, 19 Jun 2025 09:01:17 -0400
Subject: [PATCH 2/3] Add some functionality

---
 DESCRIPTION                |  3 ++-
 NAMESPACE                  |  3 +++
 R/gg_partial_df.R          | 14 +++++++++++---
 R/varpro_feature_names.R   | 17 +++++++++++------
 man/df_partial.Rd          | 22 ++++++++++++++++++++++
 man/varpro_feature_name.Rd | 22 ++++++++++++++++++++++
 6 files changed, 71 insertions(+), 10 deletions(-)
 create mode 100644 man/df_partial.Rd
 create mode 100644 man/varpro_feature_name.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 4a00307..affb231 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,7 +21,8 @@ Imports:
     parallel,
     tidyr,
     dplyr,
-    ggplot2
+    ggplot2,
+    stringr
 Suggests:
     testthat,
     bookdown,
diff --git a/NAMESPACE b/NAMESPACE
index 22aebc4..15f6e69 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -22,6 +22,7 @@ S3method(plot,gg_variable)
 S3method(plot,gg_vimp)
 export(calc_auc)
 export(calc_roc)
+export(df_partial)
 export(gg_error)
 export(gg_error.randomForest)
 export(gg_error.randomForest.formula)
@@ -35,6 +36,7 @@ export(kaplan)
 export(nelson)
 export(quantile_pts)
 export(r_data_types)
+export(varpro_feature_name)
 importFrom(dplyr,across)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n_distinct)
@@ -53,6 +55,7 @@ importFrom(stats,predict)
 importFrom(stats,qnorm)
 importFrom(stats,quantile)
 importFrom(stats,xtabs)
+importFrom(stringr,str_sub)
 importFrom(survival,Surv)
 importFrom(survival,strata)
 importFrom(survival,survfit)
diff --git a/R/gg_partial_df.R b/R/gg_partial_df.R
index 68154da..c7bb9d1 100644
--- a/R/gg_partial_df.R
+++ b/R/gg_partial_df.R
@@ -1,5 +1,13 @@
 ##=============================================================================
-## Split partial lots into continuous or categorical datasets
+#' Split partial lots into continuous or categorical datasets
+#' @param part_dta partial plot data from \code{rfsrc::plot.variable}
+#' @param nvars how many of the partial plot variables to calculate
+#' @param cat_limit Categorical features are build when there are fewer than 
+#'  cat_limit unique features.
+#' @param name a label name applied to all features. Useful when combining 
+#'  multiple partial plot objects in figures.
+#'    
+#' @export
 df_partial = function(part_dta, nvars = NULL, cat_limit = 10, name=NULL) {
   ## Prepare the partial dependencies data for panel plots
   if (is.null(nvars)) {
@@ -36,8 +44,8 @@ df_partial = function(part_dta, nvars = NULL, cat_limit = 10, name=NULL) {
       cat_list[[feature]] <- plt.df
     }
   }
-  continuous = bind_rows(cont_list)
-  categorical = bind_rows(cat_list)
+  continuous = dplyr::bind_rows(cont_list)
+  categorical = dplyr::bind_rows(cat_list)
   
   if(!is.na(name)){
     continuous$model <- categorical$model <- name
diff --git a/R/varpro_feature_names.R b/R/varpro_feature_names.R
index 18d1769..5e85efc 100644
--- a/R/varpro_feature_names.R
+++ b/R/varpro_feature_names.R
@@ -1,15 +1,20 @@
 
 ##=============================================================================
-## varpro one hot encodes features, so we need to get the "raw"
-## original variable names. This loops through the variable names
-## not in the original dataset, and cuts one character off the end
-## until we find the variable name in the original data.
-
+#' varpro one hot encodes features, so we need to get the "raw"
+#' original variable names. This loops through the variable names
+#' not in the original dataset, and cuts one character off the end
+#' until we find the variable name in the original data.
+#' 
+#' @param varpro_names vector of names output from varpro analysis
+#' @param dataset the dataset used for varpro input.
+#' 
+#' @importFrom stringr str_sub
+#' @export
 varpro_feature_name <- function(varpro_names, dataset) {
   inc_set <- varpro_names[which(varpro_names %in% colnames(dataset))]
   one_set <- varpro_names[which(!varpro_names %in% colnames(dataset))]
   while (length(one_set) > 0) {
-    orig <- unlist(lapply(one_set, str_sub, 1,-2))
+    orig <- unlist(lapply(one_set, stringr::str_sub, 1,-2))
     inc_set <-
       union(inc_set, orig[which(orig %in% colnames(dataset))])
     one_set <- orig[which(!orig %in% colnames(dataset))]
diff --git a/man/df_partial.Rd b/man/df_partial.Rd
new file mode 100644
index 0000000..3f1158d
--- /dev/null
+++ b/man/df_partial.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gg_partial_df.R
+\name{df_partial}
+\alias{df_partial}
+\title{Split partial lots into continuous or categorical datasets}
+\usage{
+df_partial(part_dta, nvars = NULL, cat_limit = 10, name = NULL)
+}
+\arguments{
+\item{part_dta}{partial plot data from \code{rfsrc::plot.variable}}
+
+\item{nvars}{how many of the partial plot variables to calculate}
+
+\item{cat_limit}{Categorical features are build when there are fewer than 
+cat_limit unique features.}
+
+\item{name}{a label name applied to all features. Useful when combining 
+multiple partial plot objects in figures.}
+}
+\description{
+Split partial lots into continuous or categorical datasets
+}
diff --git a/man/varpro_feature_name.Rd b/man/varpro_feature_name.Rd
new file mode 100644
index 0000000..e00c8ec
--- /dev/null
+++ b/man/varpro_feature_name.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/varpro_feature_names.R
+\name{varpro_feature_name}
+\alias{varpro_feature_name}
+\title{varpro one hot encodes features, so we need to get the "raw"
+original variable names. This loops through the variable names
+not in the original dataset, and cuts one character off the end
+until we find the variable name in the original data.}
+\usage{
+varpro_feature_name(varpro_names, dataset)
+}
+\arguments{
+\item{varpro_names}{vector of names output from varpro analysis}
+
+\item{dataset}{the dataset used for varpro input.}
+}
+\description{
+varpro one hot encodes features, so we need to get the "raw"
+original variable names. This loops through the variable names
+not in the original dataset, and cuts one character off the end
+until we find the variable name in the original data.
+}

From 2fb60119e15592da36779b4a2d46a061c2da08a7 Mon Sep 17 00:00:00 2001
From: John Ehrlinger <john.ehrlinger@gmail.com>
Date: Thu, 19 Jun 2025 16:11:36 -0400
Subject: [PATCH 3/3] varpro partial dataframe

---
 NAMESPACE            |  1 +
 R/gg_partialpro_df.R | 79 ++++++++++++++++++++++++++++++++++++++++++++
 man/df_partialpro.Rd | 22 ++++++++++++
 3 files changed, 102 insertions(+)
 create mode 100644 R/gg_partialpro_df.R
 create mode 100644 man/df_partialpro.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 15f6e69..fd95900 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -23,6 +23,7 @@ S3method(plot,gg_vimp)
 export(calc_auc)
 export(calc_roc)
 export(df_partial)
+export(df_partialpro)
 export(gg_error)
 export(gg_error.randomForest)
 export(gg_error.randomForest.formula)
diff --git a/R/gg_partialpro_df.R b/R/gg_partialpro_df.R
new file mode 100644
index 0000000..0965e63
--- /dev/null
+++ b/R/gg_partialpro_df.R
@@ -0,0 +1,79 @@
+##=============================================================================
+##=============================================================================
+#' Split partial lots into continuous or categorical datasets
+#' @param part_dta partial plot data from \code{rfsrc::plot.variable}
+#' @param nvars how many of the partial plot variables to calculate
+#' @param cat_limit Categorical features are build when there are fewer than 
+#'  cat_limit unique features.
+#' @param name a label name applied to all features. Useful when combining 
+#'  multiple partial plot objects in figures.
+#'    
+#' @export
+#' 
+df_partialpro = function(part_dta, nvars = NULL, cat_limit=12, name=NULL) {
+  ## Prepare the partial pro  dependencies data for panel plots
+  if (is.null(nvars)) {
+    nvars = length(part_dta)
+  }
+  
+  cont_list = list()
+  cat_list = list()
+  for (feature in seq(nvars)) {
+    ## Format any continuous features (those with fewer than 10 unique values)
+    if (length(part_dta[[feature]]$xvirtual) > cat_limit) {
+      plt.df = as.data.frame(
+        cbind(
+          variable = part_dta[[feature]]$xvirtual,
+          parametric = colMeans(part_dta[[feature]]$yhat.par, na.rm =
+                                  TRUE),
+          nonparametric = colMeans(part_dta[[feature]]$yhat.nonpar, na.rm =
+                                     TRUE),
+          causal = colMeans(part_dta[[feature]]$yhat.causal, na.rm =
+                              TRUE)
+        )
+      )
+      plt.df$name = names(part_dta)[[feature]]
+      
+      cont_list[[feature]] <- plt.df
+    } else{
+      ## Categorical features
+      
+      ## Though VarPro works with logical or continuous only. Factors are
+      ## one hot encoded internal to the varPro call.
+      cat_feat = list()
+      ## Each yhat has at least 2 columns, for logical values...
+      for (ind in seq(length(unique(part_dta[[feature]]$xorg)))) {
+        cat_feat[[ind]] = as.data.frame(
+          cbind(
+            parametric = part_dta[[feature]]$yhat.par[, ind],
+            nonparametric = part_dta[[feature]]$yhat.nonpar[, ind],
+            causal = part_dta[[feature]]$yhat.causal[, ind]
+          )
+        )
+        cat_feat[[ind]]$variable <-
+          unique(part_dta[[feature]]$xorg)[ind]
+        if (ind == 1) {
+          plt.df <- cat_feat[[ind]]
+        } else{
+          plt.df <- dplyr::bind_rows(plt.df, cat_feat[[ind]])
+        }
+      }
+      
+      plt.df$name = names(part_dta)[[feature]]
+      
+      cat_list[[feature]] <- plt.df
+    }
+  }
+  
+  continuous = dplyr::bind_rows(cont_list)
+  categorical = dplyr::bind_rows(cat_list)
+  
+  if(!is.na(name)){
+    continuous$model <- categorical$model <- name
+  }
+  
+  return(list(
+    continuous = continuous,
+    categorical = categorical
+  ))
+}
\ No newline at end of file
diff --git a/man/df_partialpro.Rd b/man/df_partialpro.Rd
new file mode 100644
index 0000000..5e485ef
--- /dev/null
+++ b/man/df_partialpro.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gg_partialpro_df.R
+\name{df_partialpro}
+\alias{df_partialpro}
+\title{Split partial lots into continuous or categorical datasets}
+\usage{
+df_partialpro(part_dta, nvars = NULL, cat_limit = 12, name = NULL)
+}
+\arguments{
+\item{part_dta}{partial plot data from \code{rfsrc::plot.variable}}
+
+\item{nvars}{how many of the partial plot variables to calculate}
+
+\item{cat_limit}{Categorical features are build when there are fewer than 
+cat_limit unique features.}
+
+\item{name}{a label name applied to all features. Useful when combining 
+multiple partial plot objects in figures.}
+}
+\description{
+Split partial lots into continuous or categorical datasets
+}