-
Notifications
You must be signed in to change notification settings - Fork 9
Open
Description
The following code allows. package performanceEstimation to to call package caret's caret::train.formula.
As far as I can tell it does work.
# TIMESTAMP APRIL 3 2016 runPerformanceEstimation3.R
options(error = recover)
runPerformanceEstimation3 <- function() {
runPerformanceEstimation3_inner <- function() {
require(performanceEstimation)
require(quantmod)
require(randomForest)
# GSPC <- getSymbols('^GSPC',from='2008-01-01',to='2012-12-31', auto.assign = FALSE)
load(file = "GSPCshrange.RData")
data.model <- specifyModel(Next(100*Delt(Ad(GSPC))) ~ Delt(Ad(GSPC),k=1:10))
bigdata <- as.data.frame(modelData(data.model))
colnames(bigdata)[1] <- 'PercVarClose'
bigdata[,"scalesrowid"] <- 1:NROW(bigdata) # required for non MC scaling # optional(ignored) MC
# required for task@dataSource TO SEE
assign("bigdata", bigdata, envir = .GlobalEnv )
attach_me <- list(
trainCaret = function(frmula, dat, ..., scales = NULL, scalescolname = NULL, otherArgsText = "", trControlText = "caret::trainControl(method = 'none')", tuneGridText = "") {
restargs <- list(...)
scales <- eval(parse(text=scales))
if(!is.null(scalescolname)) scalesrowid <- as.vector(unlist( dat[,scalescolname] ))
NROWdat <- NROW(dat)
NROWscales <- NROW(scales)
# expecting NON-MonteCarlo
# if LARGE length of case weights is sent, then collectION is expected to be non-ordered by random-ish positions
if(!is.null(scales) && ( NROWdat != NROWscales ) ) {
samplescales <- scales[scalesrowid]
}
# expecting MonteCarlo # user may typically send '1:NROW(dat)' # function(dat) # function of sample size
# if SMALL length of case weights are sent, then collection is expected to be ordered by exact position
# scalescolname IS NOT required ( and Not used) in MonteCarlo,
# For user programming consisency/ease
# or for an 'extra' check, scalescolname, may be sent anyways
if( ( !is.null(scales) ) &&
( NROWdat == NROWscales ) &&
( if(!is.null(scalescolname)) { !is.unsorted(as.vector(unlist(dat[,scalescolname]))) } else { TRUE } ) ) {
samplescales <- scales
}
# if no case weights are sent
if( is.null(scales) ) { samplescales <- NULL }
if( NROW(samplescales) != NROW(dat) ) stop("Incorrect case weights ( scales )")
others <- if(nchar(otherArgsText) == 0) { NULL } else { eval(parse(text=otherArgsText)) }
# if it has a scalesrowid, Remove it. It is not part of the dat.
if(!is.null(scalescolname)) {
# remove the scalescolname variable so it does not exist
dat[,scalescolname] <- NULL
# dynamically remove the scalescolname variable from the formula
# I do not care if the variable exists or NOT
frmula <- eval(substitute(update(frmula, ~ . - scalescolname_var),list(scalescolname_var = as.symbol(scalescolname))))
}
f <- substitute(
do.call(caret::train,c(list(form = frmula, data = dat, weights = samplescales), others, restargs, list( tuneGrid=eval(parse( text=tuneGridText))),
list(trControl=eval(parse(text=trControlText))) ))
)
eval(f)
}
)
pkgpos <- which("package:me" == search())
if(length(pkgpos) > 0){ detach(pos = pkgpos)}
attach(what=attach_me, pos = 2L, name = "package:me", warn.conflicts = TRUE)
# if doing case weights, scales and scalescolname, are required.
# if doing case weights, and doing Monte Carlo, scalecolnames is not required ( and ignored )
spExp1 <- performanceEstimation(
PredTask(PercVarClose ~ .,bigdata,'SP500_2012'),c(
workflowVariants(wf='standardWF',wfID="CVstandGBM", # THIS ONE IS O.K.
learner="trainCaret",
learner.pars=list(method="gbm",
distribution = c('gaussian'),
tuneGridText = "data.frame(n.trees = 10, interaction.depth = 2, shrinkage = 0.25, n.minobsinnode = 2)",
scales = "1:NROW(bigdata)", scalescolname = 'scalesrowid')) # CONSTANT dat population size
),
EstimationTask(metrics="acc",method=CV(nReps=2,nFolds=5))
)
spExp2 <- performanceEstimation(
PredTask(PercVarClose ~ .,bigdata,'SP500_2012'),c(
workflowVariants(wf='standardWF',wfID="MCstandGBM", # THIS ONE IS O.K. ( NOT LOGICALLY SENSIBLE)
learner="trainCaret",
learner.pars=list(method="gbm",
distribution = c('gaussian'),
tuneGridText = "data.frame(n.trees = 10, interaction.depth = 2, shrinkage = 0.25, n.minobsinnode = 2)",
scales = "1:NROW(dat)", scalescolname = 'scalesrowid')), # CONSTANT dat sample size
workflowVariants(wf='timeseriesWF',wfID="MCgrowGBM", # THIS ONE IS O.K.
learner="trainCaret",
learner.pars=list(method="gbm",
distribution = c('gaussian'),
tuneGridText = "data.frame(n.trees = 10, interaction.depth = 2, shrinkage = 0.25, n.minobsinnode = 2)",
scales = "1:NROW(dat)", scalescolname = 'scalesrowid'), # VARIABLE(GROWING) dat sample size
type="grow", relearn.step=30),
workflowVariants(wf='timeseriesWF',wfID="MCslideGBM", # THIS ONE IS O.K
learner="trainCaret",
learner.pars=list(method="gbm",
distribution = c('gaussian'),
tuneGridText = "data.frame(n.trees = 10, interaction.depth = 2, shrinkage = 0.25, n.minobsinnode = 2)",
scales = "1:NROW(dat)", scalescolname = 'scalesrowid'), # CONSTANT dat sample size
type="slide", relearn.step=30)
),
EstimationTask(metrics=c("mse","theil"),method=MonteCarlo(nReps=5,szTrain=0.5,szTest=0.25))
)
bookmarkhere <- 1
}
runPerformanceEstimation3_inner()
return(invisible(NULL))
}
# restart R, then
# rm(list = ls(envir = .GlobalEnv), envir = .GlobalEnv)
# devtools::load_all("~/doParallel")
# devtools::load_all("~/foreach")
# devtools::load_all("~/performanceEstimation")
# debugSource(paste0("~/runPerformanceEstimation3.R"))Andre Mikulec
Andre_Mikulec@Hotmail.com
Metadata
Metadata
Assignees
Labels
No labels