From 5e68d638a7b5dc591db9013fdf16ea62eeb10a69 Mon Sep 17 00:00:00 2001 From: Chris Halpert Date: Mon, 14 Dec 2015 22:56:53 -0500 Subject: [PATCH 1/4] fixed anova --- bikes_analysis.R | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/bikes_analysis.R b/bikes_analysis.R index 7d895be..bfd6b12 100644 --- a/bikes_analysis.R +++ b/bikes_analysis.R @@ -177,16 +177,10 @@ kruskal.test(count ~ windspeed, data=train) summary(aov(count ~ day, data=train)) summary(aov(count ~ hour, data=train)) - summary(aov(count ~ season, data=train)) - summary(aov(count ~ weather, data=train)) - summary(aov(count ~ day+hour, data=train)) - -summary(aov(count ~ day+hour+season, data=train)) - -anova.fit <- aov(log(count + 1) ~ day+hour+season+weather+year, data=train) +anova.fit <- aov(count ~ day+hour+season, data=train) summary(anova.fit) print(model.tables(anova.fit,"means"),digits=3) @@ -246,7 +240,7 @@ library(leaps) library(caret) train.model.mat <- model.matrix(formula, data=train.data) - + set.seed(1) k.cv = 10 p <- dim(train.model.mat)[2] - 1 From 160cb9f7f8aefc6ff60c215d17cd10f136b872d6 Mon Sep 17 00:00:00 2001 From: Chris Halpert Date: Mon, 14 Dec 2015 23:15:24 -0500 Subject: [PATCH 2/4] More anova changes --- bikes_analysis.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bikes_analysis.R b/bikes_analysis.R index 29f7c6d..d2118af 100644 --- a/bikes_analysis.R +++ b/bikes_analysis.R @@ -181,14 +181,12 @@ summary(aov(count ~ season, data=train)) summary(aov(count ~ weather, data=train)) summary(aov(count ~ day+hour, data=train)) anova.fit <- aov(count ~ day+hour+season, data=train) + summary(anova.fit) print(model.tables(anova.fit,"means"),digits=3) -par(mfrow=c(2,2)) -plot(anova.fit) -#pairwise.t.test(train$count, train$day, p.adjust="bonferroni") From 2bcfa92debf90fdcba4d7ea1d1126b0e2b7b9a48 Mon Sep 17 00:00:00 2001 From: Chris Halpert Date: Mon, 14 Dec 2015 23:48:15 -0500 Subject: [PATCH 3/4] Gam changes --- bikes_analysis.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bikes_analysis.R b/bikes_analysis.R index fb87452..a7c3709 100644 --- a/bikes_analysis.R +++ b/bikes_analysis.R @@ -276,8 +276,9 @@ summary(lm.bestfit) par(mfrow=c(2,2)) plot(lm.bestfit) - -#### Forward selection with log transformed response variable #### +########################################### +#### Log transformed response variable #### +########################################### # Specify functional form formulalog <- as.formula(log(count)~.) @@ -393,19 +394,20 @@ lines(lower, lwd=2, col='grey', lty=2) ####################### ## GAM + + ####################### library(mgcv) library(gamclass) -form <- as.formula(log(count)~s(as.integer(hour))+s(humidity)+s(temp)+s(windspeed)+s(windspeed)+s(as.integer(days.from.start)) +form <- as.formula(log(count)~hour+weather+ns(humidity)+ns(atemp)+ns(windspeed)+s(as.integer(days.from.start))) gam.fit <- gam(form, data=train) -par(mfrow=c(2,2)) -plot(gam.fit) summary(gam.fit) - gam.cv=CVgam(form, data=train, nfold=10, seed=1) mean((exp(gam.cv$fitted)-train$count)^2) +par(mfrow=c(2,2)) +plot(gam.fit) From 2062b63049b04f4532578fdbb9cc5e380038f4b3 Mon Sep 17 00:00:00 2001 From: Chris Halpert Date: Tue, 15 Dec 2015 00:06:13 -0500 Subject: [PATCH 4/4] throwaway --- bikes_analysis.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/bikes_analysis.R b/bikes_analysis.R index a7c3709..1c45888 100644 --- a/bikes_analysis.R +++ b/bikes_analysis.R @@ -337,7 +337,7 @@ library(boot) # 0.89 adj r^2 f <- as.formula(count~(atemp+humidity+windspeed+days.from.start+holiday+day)*hour) rf <- glm(f, data=train) -cv.glm(train, rf, K = 5)$delta[1] +cvs <- cv.glm(train, rf, K = 10)$delta[1] summary(lm(f, data=train)) f <- as.formula(log(count)~(atemp+humidity+windspeed+days.from.start+holiday+day)*hour) @@ -349,8 +349,8 @@ summary(lm(f, data=train)) f <- as.formula(count~season+atemp+humidity+windspeed+hour+day+days.from.start+weather) -#rf <- glm(f, data=train) -#cv.glm(train, rf, K = 7)$delta[1] +rf <- glm(f, data=train) +cv.glm(train, rf, K = 7)$delta[1] summary(lm(f, data=train)) f <- as.formula(log(count)~(atemp+humidity+windspeed+days.from.start+holiday+day*hour)) @@ -394,8 +394,6 @@ lines(lower, lwd=2, col='grey', lty=2) ####################### ## GAM - - ####################### library(mgcv) library(gamclass)