diff --git a/bikes_analysis.R b/bikes_analysis.R index b6f0e84..1c45888 100644 --- a/bikes_analysis.R +++ b/bikes_analysis.R @@ -177,24 +177,16 @@ kruskal.test(count ~ windspeed, data=train) summary(aov(count ~ day, data=train)) summary(aov(count ~ hour, data=train)) - summary(aov(count ~ season, data=train)) - summary(aov(count ~ weather, data=train)) - summary(aov(count ~ day+hour, data=train)) +anova.fit <- aov(count ~ day+hour+season, data=train) -summary(aov(count ~ day+hour+season, data=train)) - -anova.fit <- aov(log(count + 1) ~ day+hour+season+weather+year, data=train) summary(anova.fit) print(model.tables(anova.fit,"means"),digits=3) -par(mfrow=c(2,2)) -plot(anova.fit) -#pairwise.t.test(train$count, train$day, p.adjust="bonferroni") @@ -247,7 +239,7 @@ library(leaps) library(caret) train.model.mat <- model.matrix(formula, data=train.data) - + set.seed(1) k.cv = 10 p <- dim(train.model.mat)[2] - 1 @@ -284,8 +276,9 @@ summary(lm.bestfit) par(mfrow=c(2,2)) plot(lm.bestfit) - -#### Forward selection with log transformed response variable #### +########################################### +#### Log transformed response variable #### +########################################### # Specify functional form formulalog <- as.formula(log(count)~.) @@ -344,7 +337,7 @@ library(boot) # 0.89 adj r^2 f <- as.formula(count~(atemp+humidity+windspeed+days.from.start+holiday+day)*hour) rf <- glm(f, data=train) -cv.glm(train, rf, K = 5)$delta[1] +cvs <- cv.glm(train, rf, K = 10)$delta[1] summary(lm(f, data=train)) f <- as.formula(log(count)~(atemp+humidity+windspeed+days.from.start+holiday+day)*hour) @@ -356,8 +349,8 @@ summary(lm(f, data=train)) f <- as.formula(count~season+atemp+humidity+windspeed+hour+day+days.from.start+weather) -#rf <- glm(f, data=train) -#cv.glm(train, rf, K = 7)$delta[1] +rf <- glm(f, data=train) +cv.glm(train, rf, K = 7)$delta[1] summary(lm(f, data=train)) f <- as.formula(log(count)~(atemp+humidity+windspeed+days.from.start+holiday+day*hour)) @@ -404,17 +397,15 @@ lines(lower, lwd=2, col='grey', lty=2) ####################### library(mgcv) library(gamclass) -#form <- as.formula(log(count)~s(as.integer(hour))+s(humidity)+s(temp)+s(windspeed)+s(as.integer(days.from.start)) form <- as.formula(log(count)~hour+weather+ns(humidity)+ns(atemp)+ns(windspeed)+s(as.integer(days.from.start))) gam.fit <- gam(form, data=train) -par(mfrow=c(2,2)) -plot(gam.fit) summary(gam.fit) - gam.cv=CVgam(form, data=train, nfold=10, seed=1) mean((exp(gam.cv$fitted)-train$count)^2) +par(mfrow=c(2,2)) +plot(gam.fit)