diff --git a/118 - slaweks17/ES_RNN_SlawekSmyl.pdf b/118 - slaweks17/ES_RNN_SlawekSmyl.pdf new file mode 100644 index 0000000..9ab0732 Binary files /dev/null and b/118 - slaweks17/ES_RNN_SlawekSmyl.pdf differ diff --git a/118 - slaweks17/R/merge.R b/118 - slaweks17/R/merge.R new file mode 100644 index 0000000..9f6c52b --- /dev/null +++ b/118 - slaweks17/R/merge.R @@ -0,0 +1,143 @@ +# Merging outputs, per category, M4 competition, for point forecasts, so for ES_RNN and ES_RNN_E +# Author: Slawek Smyl, Mar-May 2018 + + +#The c++ executables write to one (occasinally two, sorry :-), so in such case move files to one dir before continuing) directories. +#(One logical run of several instances of the same program will produce a number files, e.g. outputs with different ibig value) +#This script merges, averages values, and writes them down to the same directory - FOREC_DIR +############################################################################### + +#directory that should include all *-train.csv files, as well as M4-info.csv +DATA_DIR="F:/progs/data/M4DataSet/" +m4Info_df=read.csv(paste0(DATA_DIR,"M4-info.csv")) +options(stringsAsFactors =FALSE) + +#directory with all the output files produced by the c++ code we want to merge +FOREC_DIR='F:\\progs\\data\\M4\\Quarterly2018-05-31_09_30' #do not end with separator + +LBACK=1 #shoud be as in the c++ code, LBACK>0 means backtesting +SP="Quarterly" +#SP="Yearly" +#SP="Daily" +#SP="Hourly" + +#//----------------PARAMS ---------- comment/uncomment following 3 variables +#for ES_RNN_E, so for all except Monthly and Quarterly runs: +#NUM_OF_SEEDS=1 +#NUM_OF_CHUNKS=1 +#IBIGS= + +#for ES_RNN (do for Monthly and Quarterly): +NUM_OF_CHUNKS=2 #same as NUM_OF_CHUNKS constant the the c++ cource code, changing it is not recommended. +NUM_OF_SEEDS=3 #It is equal to the number of seeds in the startup script, (or number of teams of worker processes) +# so number_of_concurrent_executables==number_of_lines_in_the_running script/NUM_OF_CHUNKS, and number_of_chunks +#E.g if using following script for ES_RNN: +# start 10 1 0 +# start 10 2 0 +# start 20 1 5 +# start 20 2 5 +# start 30 1 10 +# start 30 2 10 +# we have here three seeds: 10,20,30, and two chunks: 1,2. (The pairs of workes have IBIG offsets of 0,5,10) +IBIGS=3 #number of complete runs by each executables, so if programs are not interrupted, this should be equal to the constant BIG_LOOP in the c++ code, by default 3. + + +m4_df=read.csv(paste0(DATA_DIR,SP,"-train.csv")) + +sMAPE<-function(forec,actual) { + mean(abs(forec-actual)/(abs(forec)+abs(actual)))*200 +} +errorFunc=sMAPE + + +spInfo_df=m4Info_df[m4Info_df$SP==SP,] +ids=spInfo_df$M4id +horizon=spInfo_df[1,"Horizon"] + +#VARIABLE + "_" + to_string(seedForChunks) + "_" + to_string(chunkNo) + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv"; +inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*LB",LBACK), full.names = T) +if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) { + stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS") +} + + +comp_df=NULL +fil=inputFiles[1] +for (fil in inputFiles) { + print(fil) + c_df=read.csv(fil, header=F) + comp_df=rbind(comp_df,c_df) +} +names(comp_df)[1]='id' + +forecSeries=sort(unique(comp_df$id)) +if (length(forecSeries)!=length(ids) && LBACK==0) { + stop(paste0("Expected number of cases:",length(ids)," but got:",length(forecSeries))) +} + +SIZE_OF_CHUNK=1000 +out_df=NULL; ou_df=NULL +fSeries=forecSeries[1] +for (fSeries in forecSeries) { + oneSeriesForecs_df=comp_df[comp_df$id==fSeries,] + o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)]) + o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F) + ou_df=rbind(ou_df, o_df) + if (nrow(ou_df)>=SIZE_OF_CHUNK) { + out_df=rbind(out_df,ou_df) + ou_df=NULL + print(nrow(out_df)) + } +} +out_df=rbind(out_df,ou_df) +print(nrow(out_df)) +out_df=out_df[order(as.integer(substring(out_df$id, 2))),] + +#FOREC_DIR="e:\\temp" +outPath=paste0(FOREC_DIR,'\\',SP,"Forec.csv") +write.csv(out_df,file=outPath,row.names = F) + +################ Main work done, now just diagnostics calculations and plots + +#display a sample of forecasts and, if LBACK>0, actuals +MAX_NUM_OF_POINTS_TO_SHOW=200 +for (i in 1:100) { + irand=sample(1:length(forecSeries),1) + fSeries=forecSeries[irand] + forec=as.numeric(out_df[out_df$id==fSeries,2:ncol(out_df)]) + actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)]) + actual=actual[!is.na(actual)] + if (length(actual)>MAX_NUM_OF_POINTS_TO_SHOW) { + actual=actual[(length(actual)-MAX_NUM_OF_POINTS_TO_SHOW):length(actual)] + } + if (LBACK==0) { + plot(c(actual,forec), col=c(rep(1,length(actual)),rep(2,length(forec))), main=fSeries) + } else { + ymin=min(actual,forec) + ymax=max(actual,forec) + plot(1:length(actual),actual, main=fSeries, ylim=c(ymin,ymax)) + lines((length(actual)-length(forec)+1):length(actual), forec, col=2, type='p') + } + + Sys.sleep(5) +} + + +#calc error metrics +if (LBACK>0) { + summErrors=0 + fSeries=forecSeries[1] + i=1 + for (fSeries in forecSeries) { + if (i%%1000==0) + cat(".") + forec=as.numeric(out_df[out_df$id==fSeries,2:ncol(out_df)]) + actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)]) + actual=actual[!is.na(actual)] + actual=actual[(length(actual)-LBACK*horizon+1):(length(actual)-(LBACK-1)*horizon)] + summErrors=summErrors+errorFunc(forec,actual) + i=i+1 + } + print(".") + print(paste0("avg error:",round(summErrors/length(forecSeries),2))) +} diff --git a/118 - slaweks17/R/merge_PI.R b/118 - slaweks17/R/merge_PI.R new file mode 100644 index 0000000..6033d7a --- /dev/null +++ b/118 - slaweks17/R/merge_PI.R @@ -0,0 +1,210 @@ +# Merging outputs, per category, M4 competition, for Prediction Intervals , so for ES_RNN_PI and ES_RNN_E_PI +# Author: Slawek Smyl, Mar-May 2018 + + +#The c++ executables write to one (occasinally two, sorry :-), so in such case move files to one dir before continuing) directories. +#(One logical run of several instances of the same program will produce a number files, e.g. outputs with different ibig value) +#This script merges, averages values, and writes them down to the same directory - FOREC_DIR +############################################################################### + +#directory that should include all *-train.csv files, as well as M4-info.csv +DATA_DIR="F:/progs/data/M4DataSet/" +m4Info_df=read.csv(paste0(DATA_DIR,"M4-info.csv")) +options(stringsAsFactors =FALSE) +memory.limit(10000) + +#directory with all the output files produced by the c++ code we want to merge +FOREC_DIR='F:\\progs\\data\\M4\\Hourlygood' #do not end with separator + +LBACK=1 #shoud be as in the c++ code, LBACK>0 means backtesting +#SP="Quarterly" +#SP="Yearly" +#SP="Daily" +SP="Hourly" +m4_df=read.csv(paste0(DATA_DIR,SP,"-train.csv")) + + +#//----------------PARAMS ---------- comment/uncomment following 3 variables +#for ES_RNN_E_PI, so for all except Monthly and Quarterly runs: +NUM_OF_SEEDS=1 +NUM_OF_CHUNKS=1 +#IBIGS=/2 +IBIGS=6 + +#for ES_RNN_PI (do for Monthly and Quarterly): +#NUM_OF_CHUNKS=2 #same as NUM_OF_CHUNKS constant the the c++ cource code, changing it is not recommended. +#NUM_OF_SEEDS=3 #It is equal to the number of seeds in the startup script, (or number of teams of worker processes) +# so number_of_concurrent_executables==number_of_lines_in_the_running script/NUM_OF_CHUNKS, and number_of_chunks +#E.g if using following script for ES_RNN: +# start 10 1 0 +# start 10 2 0 +# start 20 1 5 +# start 20 2 5 +# start 30 1 10 +# start 30 2 10 +# we have here three seeds: 10,20,30, and two chunks: 1,2. (The pairs of workes have IBIG offsets of 0,5,10) +#IBIGS=3 #number of complete runs by each executables, so if programs are not interrupted, this should be equal to the constant BIG_LOOP in the c++ code, by default 3. + +ALPHA = 0.05; +ALPHA_MULTIP = 2 / ALPHA; + +MSIS<-function(forecL,forecH,actual) { + sumDiffs=0 + for (i in 1:(length(actual)-seasonality)) { + sumDiffs=sumDiffs+abs(actual[i+seasonality]-actual[i]) + } + avgAbsDiff=sumDiffs/(length(actual)-seasonality) + + actual=actual[(length(actual)-LBACK*horizon+1):(length(actual)-(LBACK-1)*horizon)] + + msis=sum(forecH-forecL)+sum(pmax(0,forecL-actual))*ALPHA_MULTIP+sum(pmax(0,actual-forecH))*ALPHA_MULTIP + msis/horizon/avgAbsDiff +} +errorFunc=MSIS + +spInfo_df=m4Info_df[m4Info_df$SP==SP,] +ids=spInfo_df$M4id +horizon=spInfo_df[1,"Horizon"] +seasonality=spInfo_df[1,"Frequency"] + + +#lower +#VARIABLE + "_" + to_string(seedForChunks) + "_" + to_string(chunkNo) + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv"; +inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*LLB",LBACK), full.names = T) +if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) { + stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS") +} + +comp_df=NULL +fil=inputFiles[1] +for (fil in inputFiles) { + print(fil) + c_df=read.csv(fil, header=F) + comp_df=rbind(comp_df,c_df) +} +names(comp_df)[1]='id' + +forecSeries=sort(unique(comp_df$id)) +if (length(forecSeries)!=length(ids) && LBACK==0) { + stop(paste0("Expected number of cases:",length(ids)," but got:",length(forecSeries))) +} + +SIZE_OF_CHUNK=1000 +out_df=NULL; ou_df=NULL +fSeries=forecSeries[1] +for (fSeries in forecSeries) { + oneSeriesForecs_df=comp_df[comp_df$id==fSeries,] + o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)]) + o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F) + ou_df=rbind(ou_df, o_df) + if (nrow(ou_df)>=SIZE_OF_CHUNK) { + out_df=rbind(out_df,ou_df) + ou_df=NULL + print(nrow(out_df)) + } +} +out_df=rbind(out_df,ou_df) +print(nrow(out_df)) +out_df=out_df[order(as.integer(substring(out_df$id, 2))),] + +outPath=paste0(FOREC_DIR,'\\',SP,"ForecL.csv") +write.csv(out_df,file=outPath,row.names = F) + +lower_df=out_df + +##################################### +#higher +inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*HLB",LBACK), full.names = T) +if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) { + stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS") +} + +comp_df=NULL +fil=inputFiles[1] +for (fil in inputFiles) { + print(fil) + c_df=read.csv(fil, header=F) + comp_df=rbind(comp_df,c_df) +} +names(comp_df)[1]='id' + +forecSeries=sort(unique(comp_df$id)) +if (length(forecSeries)!=length(ids) && LBACK==0) { + print(paste0("Warning. Expected number of cases:",length(ids)," but got:",length(forecSeries))) +} + +SIZE_OF_CHUNK=1000 +out_df=NULL; ou_df=NULL +fSeries=forecSeries[1] +for (fSeries in forecSeries) { + oneSeriesForecs_df=comp_df[comp_df$id==fSeries,] + o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)]) + o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F) + ou_df=rbind(ou_df, o_df) + if (nrow(ou_df)>=SIZE_OF_CHUNK) { + out_df=rbind(out_df,ou_df) + ou_df=NULL + print(nrow(out_df)) + } +} +out_df=rbind(out_df,ou_df) +print(nrow(out_df)) +out_df=out_df[order(as.integer(substring(out_df$id, 2))),] + +outPath=paste0(FOREC_DIR,'\\',SP,"ForecH.csv") +write.csv(out_df,file=outPath,row.names = F) + +higher_df=out_df + + +################ Main work done, now just diagnostics calculations and plots + +#display a sample of forecasts and, if LBACK>0, actuals +MAX_NUM_OF_POINTS_TO_SHOW=200 +i=1 +for (i in 1:100) { + irand=sample(1:length(forecSeries),1) + fSeries=forecSeries[irand] + forecL=as.numeric(lower_df[lower_df$id==fSeries,2:ncol(lower_df)]) + forecH=as.numeric(higher_df[higher_df$id==fSeries,2:ncol(higher_df)]) + actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)]) + actual=actual[!is.na(actual)] + if (length(actual)>MAX_NUM_OF_POINTS_TO_SHOW) { + actual=actual[(length(actual)-MAX_NUM_OF_POINTS_TO_SHOW):length(actual)] + } + if (LBACK==0) { + plot(c(actual,forecH), col=c(rep(1,length(actual)),rep(2,length(forecH))), main=fSeries) + lines(c(actual,forecL), col=c(rep(1,length(actual)),rep(3,length(forecL))), type='p') + } else { + ymin=min(actual,forecL) + ymax=max(actual,forecH) + plot(1:length(actual),actual, main=fSeries, ylim=c(ymin,ymax)) + lines((length(actual)-length(forecH)+1):length(actual), forecH, col=2, type='p') + lines((length(actual)-length(forecL)+1):length(actual), forecL, col=3, type='p') + } + + Sys.sleep(5) +} + + + +#calc error metric: MSIS +if (LBACK>0) { + summErrors=0 + fSeries=forecSeries[1] + i=1 + for (fSeries in forecSeries) { + if (i%%1000==0) + cat(".") + forecL=as.numeric(lower_df[lower_df$id==fSeries,2:ncol(lower_df)]) + forecH=as.numeric(higher_df[higher_df$id==fSeries,2:ncol(higher_df)]) + actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)]) + actual=actual[!is.na(actual)] + summErrors=summErrors+errorFunc(forecL, forecH, actual) + i=i+1 + } + print(".") + print(paste0("avg error:",round(summErrors/length(forecSeries),2))) +} + + diff --git a/118 - slaweks17/R/readme.txt b/118 - slaweks17/R/readme.txt new file mode 100644 index 0000000..013d8f7 --- /dev/null +++ b/118 - slaweks17/R/readme.txt @@ -0,0 +1,8 @@ +When the c++ workers run, they output results (forecasts) to a directory or two. +(Sorry occasionally two directories are filled, in such case first "manually" put all the output files to a single dir) +These scripts merge them into one file and save it, show a sample of graphs, and if this is backtesting run (LBACK>0), calculate some accuracy metrics. + +Both scripts needs to be updated with your input, output dirs, and other params, see inside, there are a lot of comments there. + +merge.R is meant to be used for point forecst runs, so for ES_RNN and ES_RNN_E programs. +mergePI.R - for Prediction Interval runs, so for ES_RNN_PI and ES_RNN_E_PI programs. diff --git a/118 - slaweks17/c++/ES_RNN.cc b/118 - slaweks17/c++/ES_RNN.cc new file mode 100644 index 0000000..43dc358 --- /dev/null +++ b/118 - slaweks17/c++/ES_RNN.cc @@ -0,0 +1,1193 @@ +/*ES-RNN: ES-RNN Exponential Smoothing Recurrent Neural Network hybrid. Point forecast. +Slawek Smyl, Jan-May 2017. + +Dilated LSTMs, with optional shortcuts, attention. +It is meant to be used for Monthly and Quarterly series of M4 competition, becasue the DE (Diversified Ensemble) version is too slow. +The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac. + +It has to be invoked in pair of executables, passing at least two integers: seedForChunks, chunkNo +so e.g. create a script with following lines on Windows +start 10 1 +start 10 2 +Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers: +# start 10 1 0 +# start 10 2 0 +# start 20 1 5 +# start 20 2 5 +# start 30 1 10 +# start 30 2 10 +seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2. +We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP. +Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, +so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R. +By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - +a decent number to ensemble (in a separate R script). + +There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. +The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. +The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. +So, you need comment/uncomment to have one block of interest active. + + +*/ + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + + + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- +string VARIABLE = "Quarterly"; +const string run = "50/45 (1,2),(4,8), LR=0.001/{10,1e-4f}, EPOCHS=15, LVP=80 40*"; +const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 45; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE. + +vector> dilations={{1,2},{4,8}};//Each vector represents one chunk of Dilateed LSTMS, connected in standard resnNet fashion +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM //so for Quarterly series, we do not use either the more advanced residual connections nor attention. +const bool ADD_NL_LAYER=false; //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer + +const float INITIAL_LEARNING_RATE = 0.001f; +const map LEARNING_RATES = { { 10,1e-4f } }; //at which epoch we set them up to what +const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate. + +const int NUM_OF_TRAIN_EPOCHS = 15; +const unsigned int STATE_HSIZE = 40; + +const int SEASONALITY = 4; +const unsigned int INPUT_SIZE = 4; +const int INPUT_SIZE_I= INPUT_SIZE; +const unsigned int OUTPUT_SIZE = 8; +const int OUTPUT_SIZE_I= OUTPUT_SIZE; +const int MIN_INP_SEQ_LEN = 0; +const float LEVEL_VARIABILITY_PENALTY = 80; //Multiplier for L" penalty against wigglines of level vector. Important. +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2; +const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years + + + +/* +string VARIABLE = "Monthly"; +const string run = "50/49 Res (1,3,6,12), LR=5e-4 {12,1e-4f}, EPOCHS=10, 20*"; +const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +#define USE_RESIDUAL_LSTM //so for Monthly we use only one block, so no standard resNet shortcuts, but instead but of the special residual shortcuts, after https://arxiv.org/abs/1701.03360. +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer + +vector> dilations={{1,3,6,12}};//so for Monthly we use only one block, so no standard resNet shortcut +const float INITIAL_LEARNING_RATE = 5e-4; +const map LEARNING_RATES = { { 12,1e-4f } }; //at which epoch we set them up to what +const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate. + +const int NUM_OF_TRAIN_EPOCHS = 10; +const unsigned int STATE_HSIZE = 50; + +const float LEVEL_VARIABILITY_PENALTY = 50; //Multiplier for L" penalty against wigglines of level vector. + +const int SEASONALITY = 12; +const unsigned int OUTPUT_SIZE = 18; +const unsigned int INPUT_SIZE = 12; +const int INPUT_SIZE_I= INPUT_SIZE; +const int OUTPUT_SIZE_I= OUTPUT_SIZE; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2; +const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years +*/ + + +/* +string VARIABLE = "Daily"; +const string run = "50/49 NL LRMult=1.5, 3/5 (1,7,28) LR=3e-4 {9,1e-4f} EPOCHS=15, LVP=100 HSIZE=40 20w"; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = true; + +const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const int TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 7; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,7,28 } }; + +const float INITIAL_LEARNING_RATE = 3e-4; +const map LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1.5; +const int NUM_OF_TRAIN_EPOCHS = 15; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 7; +const int INPUT_SIZE_I = INPUT_SIZE; +const unsigned int OUTPUT_SIZE = 14; +const int OUTPUT_SIZE_I = OUTPUT_SIZE; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK +const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to max of last 20 years +*/ + +Expression squash(const Expression& x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 40; +#else + const int MAX_NUM_OF_SERIES = -1; //use all series +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6;//in data provided +const int BIG_LOOP = 3; +const int NUM_OF_CHUNKS = 2; +const float EPS=1e-6; +const int AVERAGING_LEVEL=5; +const bool USE_MEDIAN = false; +const int MIDDLE_POS_FOR_AVG = 2; //if using medians + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=20; +const float C_STATE_PENALTY = 0; + +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN=true; +const float TAU = PERCENTILE / 100.; +const float TRAINING_TAU = TRAINING_PERCENTILE / 100.; +const unsigned ATTENTION_HSIZE=STATE_HSIZE; + +const bool USE_AUTO_LEARNING_RATE=false; +//if USE_AUTO_LEARNING_RATE, and only if LBACK>0 +const float MIN_LEARNING_RATE = 0.0001f; +const float LR_RATIO = sqrt(10); +const float LR_TOLERANCE_MULTIP = 1.005; +const int L3_PERIOD = 2; +const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2; + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE_I) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) { //chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; +}; + +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + vector levels; + vector seasons; +}; + + +Expression pinBallLoss(const Expression& out_ex, const Expression& actuals_ex) {//used by Dynet, learning loss function + vector losses; + for (unsigned int indx = 0; indx as_scalar(forec.value())) + losses.push_back((actual - forec)*TRAINING_TAU); + else + losses.push_back((actual - forec)*(TRAINING_TAU - 1)); + } + return sum(losses) / OUTPUT_SIZE * 2; +} + + +//weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 +float wQuantLoss(vector& out_vect, vector& actuals_vect) { + float sumf = 0; float suma=0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*TAU; + else + sumf = sumf + (actual - forec)*(TAU - 1); + } + return sumf / suma * 200; +} + +//used just for diagnostics, if LBACK>0 and PERCENTILE==50 +float sMAPE(vector& out_vect, vector& actuals_vect) { + float sumf = 0; + for (unsigned int indx = 0; indx& out_vect, vector& actuals_vect) { + if (PERCENTILE==50) + return sMAPE(out_vect, actuals_vect); + else + return wQuantLoss(out_vect, actuals_vect); +} + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples. + int chunkNo = 1; + int ibigOffset = 0; + if (argc >= 3) { + seedForChunks = atoi(argv[1]); + chunkNo = atoi(argv[2]); + } + if (argc >= 4) + ibigOffset = atoi(argv[3]); + + if (chunkNo > NUM_OF_CHUNKS) { + cerr << "chunkNo > NUM_OF_CHUNKS"; + exit(-1); + } + else if (chunkNo <= 0) { + cerr << "chunkNo <= 0"; + exit(-1); + } + + cout<0) + std::cout<< " ibigOffset:"<< ibigOffset; //if continuing prematurely stopped run + if (LBACK>0) + std::cout<<" lback:"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister) + mt19937 rngForChunks(seedForChunks); + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + allSeries_map[series] = m4Obj; + } + if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + + int series_len=(int)series_vect.size(); + int chunkSize= series_len/NUM_OF_CHUNKS; + std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize< uniOnSeries(0, chunkSize -1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5); + set diagSeries; + + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + ParameterCollection pc; + ParameterCollection perSeriesPC; + + float learning_rate= INITIAL_LEARNING_RATE; + AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS); + trainer.clip_threshold = GRADIENT_CLIPPING; + AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING; + + #if defined USE_RESIDUAL_LSTM + vector rNNStack; + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il oneChunk_vect(start,end); + if (PRINT_DIAGN) { + for (int k = 0; k<10; k++) //diag + cout << oneChunk_vect[k] << " "; + cout << endl; + } + if (chunkNo == NUM_OF_CHUNKS) + cout<<"last chunk size:"<< oneChunk_vect.size()< additionalParams_map((int)oneChunk_vect.size()*1.5); //per series + unordered_map*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup + string series = *iter; + AdditionalParams addParams; + addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5); //level smoothing + addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5); //seasonality smoothing + for (int isea = 0; isea(); + } + + for (int iEpoch=0; iEpoch testLosses; //test losses of all series in this epoch + vector testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch + vector trainingLosses; //training losses of all series in one epoch + vector forecLosses; vector levVarLosses; vector stateLosses; + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL)); + #endif + + for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) { + string series=*iter; + auto m4Obj = allSeries_map[series]; + + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); + #endif + + ComputationGraph cg; + for (int il=0; il season_exVect;//vector, because we do not know how long the series is + for (int iseas=0; iseas seas==1 + season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK. + } + season_exVect.push_back(season_exVect[0]); + + vector logDiffOfLevels_vect; + vector levels_exVect; + Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]); + levels_exVect.push_back(lev); + for (int i=1; i 0) { + vector levelVarLoss_v; + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY; + for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx+i]); + } + vector losses; + for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex=input(cg,{INPUT_SIZE},input_vect); + Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]); + joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect); + + first = m4Obj.vals.begin() +i+1; + pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect); + Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization + labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization + Expression labels_ex=squash(labels1_ex); + + Expression loss_ex=pinBallLoss(out_ex, labels_ex); + if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + } + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; it maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax = it; + layerOfMax = il; + chunkOfMax = irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + cout << " min season=" << minSeason << endl; + cout << " min level=" << minLevel << endl; + cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl; + + //diagSeries.insert(series); + pc.reset_gradient(); + perSeriesPC.reset_gradient(); + } + + //saving per-series values for diagnostics purposes + AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch); + histAdditionalParams.levSm=as_scalar(levSm_ex.value()); + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea=0; isea::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization + joinedInput_ex.emplace_back(squash(input1_ex)); + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality_ex = concatenate(outputSeasonality_exVect); + + Expression out_ex; + if (ADD_NL_LAYER) { + out_ex=MLPW_ex*rnn_ex+MLPB_ex; + out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex; + } else + out_ex=adapterW_ex*rnn_ex+adapterB_ex; + + out_ex = cmult(expand(out_ex), outputSeasonality_ex)*levels_exVect[i];//back to original scale + vector out_vect = as_vector(out_ex.value()); + + if (LBACK > 0) { + float qLoss = errorFunc(out_vect, m4Obj.testVals); + testLosses.push_back(qLoss); + } + + testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect; + if (iEpoch >= AVERAGING_LEVEL) { + if (USE_MEDIAN) { + if (testResults_map[series][AVERAGING_LEVEL].size() == 0) + testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten + for (int iii = 0; iii < OUTPUT_SIZE_I; iii++) { + vector temp_vect2; + for (int ii = 0; ii firstForec = testResults_map[series][0]; + testResults_map[series][AVERAGING_LEVEL] = firstForec; + for (int ii = 1; ii nextForec = testResults_map[series][ii]; + for (int iii = 0; iii 0) { + float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals); + testAvgLosses.push_back(qLoss); + + #if defined USE_ODBC //save + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL)); + + for (int io = 0; io < OUTPUT_SIZE_I; io++) { + int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io; + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io], 0, NULL)); + } + if (MAX_NUM_OF_SERIES<0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLExecute(hInsertStmt)); + #endif + } + } //time to average + }//last anchor point of the series + }//through TEST loop + }//through series + + + if (iEpoch % FREQ_OF_TEST == 0) { + float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size(); + + cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100; + if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forecast loss:" << averageForecLoss*100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + + float averageTestLoss=0; + if (LBACK > 0) { + float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size(); + cout<<" Test loss:" << averageTestLoss; + if (iEpoch >= AVERAGING_LEVEL) { + float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch + cout << " avgLoss:" << averageTestAvgLoss; + } + if (USE_AUTO_LEARNING_RATE) + perfValid_vect.push_back(averageTestLoss); + } + cout << endl; + } + + if (USE_AUTO_LEARNING_RATE) { + bool changeL2Rate = false; + if (iEpoch >= 2) { + if (iEpoch < L3_PERIOD) + changeL2Rate = perfValid_vect[perfValid_vect.size() - 2] MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) { + learning_rate /= LR_RATIO; + cout << "decreasing LR to:" << learning_rate << endl; + epochOfLastChangeOfLRate = iEpoch; + trainer.learning_rate = learning_rate; + } + } + #if defined USE_ODBC + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLEndTran( + SQL_HANDLE_DBC, + hDbc, + SQL_COMMIT)); + #endif + }//through epochs + + if (PRINT_DIAGN) {//some diagnostic info + set diagSeries; + for (int i = 0; i<1; i++) {//add a few normal ones + int irand = uniOnSeries(rng); + diagSeries.insert(oneChunk_vect[irand]); + } + for (auto series : diagSeries) { + cout << endl << series << endl; + array* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series]; + cout << "lSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levSm << " "; + cout << endl; + cout << "sSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).sSm << " "; + cout << endl; + cout << "seasons:" << endl; + for (int isea = 0; iseaat(iEpoch).initSeasonality[isea] << " "; + cout << endl; + } + cout << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levels.size()>0) { + cout << "levels:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).levels.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", "; + cout << endl; + cout << "seas:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).seasons.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", "; + cout << endl; + } + } + } + } + + //save the forecast to outputFile + ofstream outputFile; + outputFile.open(outputPath); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) { + string series = *iter; + outputFile<< series; + for (int io=0; io 0 +start 10 +start 20 +start 30 +on 4-core computer. +In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble. +Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1. +(Creating final forecasts is done in a supplied R script) + +There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive. +These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active. +*/ + + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +//#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- +string VARIABLE = "Hourly"; +const string run = "50/49 Att 4/5 1,4)(24,168) LR=0.01,{7,5e-3f},{18,1e-3f},{22,3e-4f} EPOCHS=27, LVP=10, CSP=1"; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 24; +const int SEASONALITY2 = 168; +vector> dilations = { { 1,4 },{ 24, 168 } }; + +const float INITIAL_LEARNING_RATE = 0.01f; +const map LEARNING_RATES = { { 7,5e-3f },{ 18,1e-3f },{ 22,3e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 27; + +float LEVEL_VARIABILITY_PENALTY = 10; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 1; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 24; +const unsigned int OUTPUT_SIZE = 48; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK +const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH; //==all +const int TOPN = 4; + + +/* +string VARIABLE = "Weekly"; +const string run = "50/47 Att 3/5 (1,52) LR=1e-3 {11,3e-4f}, {17,1e-4f} EPOCHS=23, LVP=100 6y"; + +const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const int TRAINING_PERCENTILE = 47; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 52; +const int SEASONALITY2 = 0; +vector> dilations = { { 1, 52 } }; + +const float INITIAL_LEARNING_RATE = 1e-3; +const map LEARNING_RATES = { { 11,3e-4f },{ 17,1e-4f } }; //at which epoch we manually set them up to what +const int NUM_OF_TRAIN_EPOCHS = 23; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; +const float PER_SERIES_LR_MULTIP = 1; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 10; +const unsigned int OUTPUT_SIZE = 13; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#81 380 935 1023 1604 2598 +const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH; //==all +const int TOPN = 3; +*/ + +/* +string VARIABLE = "Daily"; +const string run = "Final 50/49 730 4/5 (1,3)(7,14) LR=3e-4 {9,1e-4f} EPOCHS=13, LVP=100 13w"; +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const int TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 7; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,3 },{ 7, 14 } }; + +const float INITIAL_LEARNING_RATE = 3e-4; +const map LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 13; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 7; +const unsigned int OUTPUT_SIZE = 14; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //##93 323 2940 2357 4197 9919 +const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +/* +string VARIABLE = "Yearly"; +const string run = "50 Att 4/5 (1,6) LR=1e-4 EPOCHS=12, 60*"; + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 50; + +const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 0; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,6 } }; + +const float INITIAL_LEARNING_RATE = 1e-4; +const map LEARNING_RATES = { { 15,1e-5 } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 12; + +float LEVEL_VARIABILITY_PENALTY = 0; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 30; + +const unsigned int INPUT_SIZE = 4; +const unsigned int OUTPUT_SIZE = 6; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#13.00 20.00 29.00 31.32 40.00 835.00 +const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +//end of VARIABLE-specific params + +const int BIG_LOOP = 3; +const int NUM_OF_NETS = 5; +const unsigned int ATTENTION_HSIZE = STATE_HSIZE; + + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 20; +#else + const int MAX_NUM_OF_SERIES = -1; +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6; +const int AVERAGING_LEVEL = 5; +const float EPS=1e-6; + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=50; +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN = false; +const float TAU = PERCENTILE / 100.; +const float TRAINING_TAU = TRAINING_PERCENTILE / 100.; + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + + +Expression squash(const Expression& x) { + return log(x); +} +float squash(float x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} +float expand(float x) { + return exp(x); +} + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) {//chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + +#if defined USE_ODBC +void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); +#endif + + + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; + Parameter sSm2; + array initSeasonality2; +}; +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + float sSm2; + array initSeasonality2; + vector levels; + vector seasons; + vector seasons2; +}; + + +array perfToRanking (array perf_arr) { + array index; + + for (int itop=0; itop losses; + for (unsigned int indx = 0; indx as_scalar(forec.value())) + losses.push_back((actual - forec)*TRAINING_TAU); + else + losses.push_back((actual - forec)*(TRAINING_TAU - 1)); + } + return sum(losses) / OUTPUT_SIZE * 2; +} + + +// weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 +float wQuantLoss(vector& out_vect, vector& actuals_vect) { + float sumf = 0; float suma=0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*TAU; + else + sumf = sumf + (actual - forec)*(TAU - 1); + } + return sumf / suma * 200; +} + +//used just for diagnostics, if LBACK>0 and PERCENTILE==50 +float sMAPE(vector& out_vect, vector& actuals_vect) { + float sumf = 0; + for (unsigned int indx = 0; indx& out_vect, vector& actuals_vect) { + if (PERCENTILE==50) + return sMAPE(out_vect, actuals_vect); + else + return wQuantLoss(out_vect, actuals_vect); +} + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int ibigOffset = 0; + if (argc == 2) + ibigOffset = atoi(argv[1]); + + cout << VARIABLE<<" "< 0) { + cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister in this case) + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 24k for yearly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + allSeries_map[series] = m4Obj; + } + if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + cout << "num of series:" << series_vect.size() << endl; + + unsigned int series_len=(unsigned int)series_vect.size(); + uniform_int_distribution uniOnSeries(0,series_len-1); // closed interval [a, b] + uniform_int_distribution uniOnNets(0,NUM_OF_NETS-1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc... + unordered_map> finalResults_map((int)series_len*1.5);//per series + set diagSeries; + + unordered_map> netRanking_map; + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + //create nets + array paramsCollection_arr;//per net + array perSeriesParamsCollection_arr;//per net + array trainers_arr; + array perSeriesTrainers_arr; + + + #if defined USE_RESIDUAL_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #elif defined USE_ATTENTIVE_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #else + array, NUM_OF_NETS> rnnStack_arr; + #endif + + array MLPW_parArr; + array MLPB_parArr; + array adapterW_parArr; + array adapterB_parArr; + + //this is not a history, this is the real stuff + unordered_map* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + additionalParams_mapOfArr[series]=new array(); + } + + for (int inet=0; inetclip_threshold = GRADIENT_CLIPPING; + perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING; + + auto& rNNStack=rnnStack_arr[inet]; + #if defined USE_RESIDUAL_LSTM + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il* additionalParams_arr=additionalParams_mapOfArr[series]; + additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net + if (SEASONALITY_NUM > 0) { + additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + if (SEASONALITY_NUM > 1) { + additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + } + }//seting up, through nets + + //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS] + unordered_map, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5); + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + historyOfAdditionalParams_map[series]=new array, NUM_OF_NETS>(); + } + + //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted + array, NUM_OF_NETS> seriesAssignment;//every net has an array + for (int j=0; j> netPerf_map; + for (int inet=0; inetlearning_rate = LEARNING_RATES.at(iEpoch); + if (inet==0) + cout << "changing LR to:" << trainer->learning_rate << endl; + perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP; + } + + auto& rNNStack=rnnStack_arr[inet]; + Parameter& MLPW_par = MLPW_parArr[inet]; + Parameter& MLPB_par = MLPB_parArr[inet]; + Parameter& adapterW_par=adapterW_parArr[inet]; + Parameter& adapterB_par=adapterB_parArr[inet]; + + vector oneNetAssignments=seriesAssignment[inet]; + random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end()); + + vector epochLosses; + vector forecLosses; vector levVarLosses; vector stateLosses; + for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + + ComputationGraph cg; + for (int il=0; ilat(inet); + array& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet); + + Expression MLPW_ex,MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea logDiffOfLevels_vect; + vector levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + Expression levelVarLoss_ex; + if (LEVEL_VARIABILITY_PENALTY > 0) { + vector levelVarLoss_v; + for (int i = 1; i losses;//losses of steps through single time series + for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + + first = m4Obj.vals.begin() + i + 1; + pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + firstE = season_exVect.begin() + i + 1; + pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + + firstE = season2_exVect.begin() + i + 1; + pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + }//through points of a series + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; itupdate();//update shared weights + perSeriesTrainer->update(); //update params of this series only + } catch (exception& e) { //long diagnostics for this unlikely event :-) + cerr<<"cought exception while doing "< maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax=it; + layerOfMax=il; + chunkOfMax= irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + if (SEASONALITY_NUM > 0) + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + if (SEASONALITY_NUM > 1) + cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl; + cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax< 0) { + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea = 0; isea 1) { + histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value()); + for (int isea=0; isea 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forec loss:" << averageForecLoss * 100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + cout<at(inet); + Expression MLPW_ex, MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; i0 then this is shortened, so it always contains data awe have right to access + Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1]; + levels_exVect.push_back(newLevel_ex); + + Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i]; + season_exVect.push_back(newSeason_ex); + } + + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + + Expression inputSeasonality_ex; Expression inputSeasonality2_ex; + Expression outputSeasonality_ex; Expression outputSeasonality2_ex; + vector losses;//losses of steps through single time series + Expression out_ex;//we declare it here, bcause the last one will be the forecast + for (int i=INPUT_SIZE-1; i::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il::const_iterator first = m4Obj.vals.begin() + i + 1; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + Expression loss_ex = pinBallLoss(out_ex, labels_ex); + if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); //training area losses + } + + if (i==(m4Obj.n-1)) {//validation loss + out_ex=expand(out_ex)*levels_exVect[i];//back to original scale + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + out_ex = cmult(out_ex, outputSeasonality_ex);//reseasonalize + } + if (SEASONALITY_NUM > 1 ) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + out_ex = cmult(out_ex, outputSeasonality2_ex);//reseasonalize + } + //we do not need the matching label here, because we do not bother calculate valid losses of each net across all series. + //We care about best and topn performance + } + }//end of going through all point of a series + + Expression loss_exp = average(losses); + float loss = as_scalar(cg.forward(loss_exp));//training loss of a single series + netPerf_map[series][inet]=loss; + + //unordered_map, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc... + //No epoch here, because this will just reflect the current (latest) situation - the last few epochs + vector out_vect=as_vector(out_ex.value()); + testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect; + if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) { + vector firstForec=testResults_map[series][inet][0]; + testResults_map[series][inet][AVERAGING_LEVEL]=firstForec; + for (int ii=1; ii nextForec=testResults_map[series][inet][ii]; + for (int iii=0; iii0 && iEpoch % FREQ_OF_TEST==0) { + //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch. + vector bestEpochLosses; + vector bestEpochAvgLosses; + vector topnEpochLosses; + vector topnEpochAvgLosses; + + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); +#endif + + float avgLoss; + vector avgLatest; + vector avgAvg; + + for (int itop=0; itop 0) { + float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals); + bestEpochLosses.push_back(qLoss); + } + avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]; //used later for calculating topn loss + + if (iEpoch>=AVERAGING_LEVEL) { + if (LBACK > 0) { + float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals); + bestEpochAvgLosses.push_back(qLoss); + } + avgAvg=testResults_map[series][inet][AVERAGING_LEVEL]; + } + } else { + for (int iii=0; iii=AVERAGING_LEVEL) + avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii]; + } + } + }//through topn + + for (int iii=0; iii 0) { + float qLoss = errorFunc(avgLatest, m4Obj.testVals); + topnEpochLosses.push_back(qLoss); + } + + if (iEpoch>=AVERAGING_LEVEL) { + for (int iii = 0; iii 0) { +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL)); + + for (int iii=0; iii 0) { + float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size(); + float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size(); + cout<=AVERAGING_LEVEL) { + float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size(); + float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size(); + cout<<" bestAvg:"<> netRanking_map + netRanking_map[series]=perfToRanking(netPerf_map[series]); + + for (int itop=0; itop diagSeries; + for (int i=0; i<1; i++) {//add a few normal ones + int irand=uniOnSeries(rng); + diagSeries.insert(series_vect[irand]); + } + for(auto series : diagSeries) { + cout<at(inet); + for (int iEpoch=0; iEpoch 0 ) { + cout<<"sSm:"<at(inet); + for (int iEpoch=0; iEpoch 1 ) { + cout<<"sSm2:"<at(inet); + for (int iEpoch=0; iEpochat(inet); + for (int iEpoch = 0; iEpoch0) { + cout << "levels:" << iEpoch<<" "; + for (int iv = 0; iv 0 ) { + cout << "seasons:" << iEpoch<<" "; + for (int iv = 0; iv 1 ) { + cout << "seasons2:" << iEpoch<<" "; + for (int iv = 0; iv 0 +start 10 +start 20 +start 30 +on 4-core computer. +In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble. +Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1. +(Creating final forecasts is done in a supplied R script) + +There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive. +These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active. +*/ + + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +//#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- + +string VARIABLE = "Hourly"; +const string run0 = "(1,4)(24,168) LR=0.01, {25,3e-3f} EPOCHS=37, LVP=10, CSP=0"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 24; +const int SEASONALITY2 = 168; +vector> dilations = { { 1,4 },{ 24, 168 } }; + +const float INITIAL_LEARNING_RATE = 0.01f; +const map LEARNING_RATES = { { 20,1e-3f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 37; + +float LEVEL_VARIABILITY_PENALTY = 10; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 24; +const unsigned int OUTPUT_SIZE = 48; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK +const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH; //==all +const int TOPN = 4; + + +/* +string VARIABLE = "Weekly"; +const string run0 = "Att 4/5 (1,52) LR=1e-3 {15,3e-4f} EPOCHS=31, LVP=100 6y"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 52; +const int SEASONALITY2 = 0; +vector> dilations = { { 1, 52 } }; + +const float INITIAL_LEARNING_RATE = 1e-3; +const map LEARNING_RATES = { { 15,3e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 31; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 10; +const unsigned int OUTPUT_SIZE = 13; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#81 380 935 1023 1604 2598 +const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +/* + +string VARIABLE = "Daily"; +const string run0 = "4/5 (1,3)(7,14) LR=3e-4 {13,1e-4f} EPOCHS=21, LVP=100 13w"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER=false; + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 7; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,3 },{ 7, 14 } }; + +const float INITIAL_LEARNING_RATE = 3e-4; +const map LEARNING_RATES = { { 13,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 21; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 7; +const unsigned int OUTPUT_SIZE = 14; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //##93 323 2940 2357 4197 9919 +const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +/* +string VARIABLE = "Yearly"; +const string run0 = "Att NL 4/5 (1,6) LR=1e-4 {17,3e-5}{22,1e-5} EPOCHS=29, 60*"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = true; + +const int SEASONALITY_NUM = 0; //0 means no seasonality +const int SEASONALITY = 1; //for no seasonality, set it to 1, important +const int SEASONALITY2 = 0; +vector> dilations = { { 1,6 } }; + +const float INITIAL_LEARNING_RATE = 1e-4; +const map LEARNING_RATES = { { 17,3e-5 },{ 22,1e-5 } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 29; + +float LEVEL_VARIABILITY_PENALTY = 0; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 30; + +const unsigned int INPUT_SIZE = 4; +const unsigned int OUTPUT_SIZE = 6; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#13.00 20.00 29.00 31.32 40.00 835.00 +const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +const float ALPHA = 0.05; +const float TAUL = ALPHA / 2; +const float TAUH = 1 - TAUL; +const float ALPHA_MULTIP = 2 / ALPHA; + +const int BIG_LOOP = 3; +const int NUM_OF_NETS = 5; +const unsigned ATTENTION_HSIZE = STATE_HSIZE; + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 20; +#else + const int MAX_NUM_OF_SERIES = -1; +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6; +const int AVERAGING_LEVEL = 5; +const float EPS=1e-6; + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=50; +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN = false; + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + + +Expression squash(const Expression& x) { + return log(x); +} +float squash(float x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} +float expand(float x) { + return exp(x); +} + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + float meanAbsSeasDiff; + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + + meanAbsSeasDiff = 0; + float sumf = 0; + for (int ip = SEASONALITY; ip0) + meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY); + + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) {//chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + +#if defined USE_ODBC +void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); +#endif + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; + Parameter sSm2; + array initSeasonality2; +}; +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + float sSm2; + array initSeasonality2; + vector levels; + vector seasons; + vector seasons2; +}; + + +array perfToRanking (array perf_arr) { + array index; + + for (int itop=0; itop losses; + for (unsigned int indx = 0; indx as_scalar(forecH.value())) + loss = loss + (actual - forecH)*ALPHA_MULTIP; + losses.push_back(loss); + } + return sum(losses) / OUTPUT_SIZE; +} + +// weighted quantile Loss +float wQuantLoss(vector& out_vect, vector& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 + float sumf = 0; float suma = 0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*tau; + else + sumf = sumf + (actual - forec)*(tau - 1); + } + return sumf / suma * 200; +} + +float errorFunc(vector& out_vect, vector& actuals_vect, float meanAbsSeasDiff) { + float sumf=0; + for (unsigned int indx = 0; indx forecH) + loss = loss + (actualf - forecH)*ALPHA_MULTIP; + sumf+=loss; + } + return sumf / (OUTPUT_SIZE*meanAbsSeasDiff); +} + + + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int ibigOffset = 0; + if (argc == 2) + ibigOffset = atoi(argv[1]); + + cout< 0) { + cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister in this case) + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 24k for yearly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + if (m4Obj.meanAbsSeasDiff==0) { + cout<<"Warning, flat series:"<0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + cout << "num of series:" << series_vect.size() << endl; + + unsigned int series_len=(unsigned int)series_vect.size(); + uniform_int_distribution uniOnSeries(0,series_len-1); // closed interval [a, b] + uniform_int_distribution uniOnNets(0,NUM_OF_NETS-1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc... + unordered_map> finalResults_map((int)series_len*1.5);//per series + set diagSeries; + + unordered_map> netRanking_map; + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + //create nets + array paramsCollection_arr;//per net + array perSeriesParamsCollection_arr;//per net + array trainers_arr; + array perSeriesTrainers_arr; + + + #if defined USE_RESIDUAL_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #elif defined USE_ATTENTIVE_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #else + array, NUM_OF_NETS> rnnStack_arr; + #endif + + array MLPW_parArr; + array MLPB_parArr; + array adapterW_parArr; + array adapterB_parArr; + + //this is not a history, this is the real stuff + unordered_map* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + additionalParams_mapOfArr[series]=new array(); + } + + for (int inet=0; inetclip_threshold = GRADIENT_CLIPPING; + perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING; + + auto& rNNStack=rnnStack_arr[inet]; + #if defined USE_RESIDUAL_LSTM + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il* additionalParams_arr=additionalParams_mapOfArr[series]; + additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net + if (SEASONALITY_NUM > 0) { + additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + if (SEASONALITY_NUM > 1) { + additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + } + }//seting up, through nets + + //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS] + unordered_map, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5); + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + historyOfAdditionalParams_map[series]=new array, NUM_OF_NETS>(); + } + + //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted + array, NUM_OF_NETS> seriesAssignment;//every net has an array + for (int j=0; j> netPerf_map; + for (int inet=0; inetlearning_rate = LEARNING_RATES.at(iEpoch); + if (inet==0) + cout << "changing LR to:" << trainer->learning_rate << endl; + perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP; + } + + auto& rNNStack=rnnStack_arr[inet]; + Parameter& MLPW_par = MLPW_parArr[inet]; + Parameter& MLPB_par = MLPB_parArr[inet]; + Parameter& adapterW_par=adapterW_parArr[inet]; + Parameter& adapterB_par=adapterB_parArr[inet]; + + vector oneNetAssignments=seriesAssignment[inet]; + random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end()); + + vector epochLosses; + vector forecLosses; vector levVarLosses; vector stateLosses; + for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + + ComputationGraph cg; + for (int il=0; ilat(inet); + array& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet); + + Expression MLPW_ex,MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea logDiffOfLevels_vect; + vector levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + Expression levelVarLoss_ex; + if (LEVEL_VARIABILITY_PENALTY > 0) { + vector levelVarLoss_v; + for (int i = 1; i losses;//losses of steps through single time series + for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + + first = m4Obj.vals.begin() + i + 1; + pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + firstE = season_exVect.begin() + i + 1; + pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + + firstE = season2_exVect.begin() + i + 1; + pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + }//through points of a series + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; itupdate();//update shared weights + perSeriesTrainer->update();//update params of this series only + } catch (exception& e) {//it may happen occasionally. I believe it is due to not robust enough implementation of squashing functions in Dynet. When abs(x)>35 NAs appear. + //so the code below is trying to produce some diagnostics, hopefully useful when setting LEVEL_VARIABILITY_PENALTY and C_STATE_PENALTY. + cerr<<"cought exception while doing "< maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax=it; + layerOfMax=il; + chunkOfMax= irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + if (SEASONALITY_NUM > 0) + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + if (SEASONALITY_NUM > 1) + cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl; + cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax< 0) { + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea = 0; isea 1) { + histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value()); + for (int isea=0; isea 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forec loss:" << averageForecLoss * 100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + cout<at(inet); + Expression MLPW_ex, MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; i0 then this is shortened, so it always contains data awe have right to access + Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1]; + levels_exVect.push_back(newLevel_ex); + + Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i]; + season_exVect.push_back(newSeason_ex); + } + + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + + Expression inputSeasonality_ex; Expression inputSeasonality2_ex; + Expression outputSeasonality_ex; Expression outputSeasonality2_ex; + vector losses;//losses of steps through single time series + Expression out_ex;//we declare it here, bcause the last one will be the forecast + for (int i=INPUT_SIZE-1; i::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il::const_iterator first = m4Obj.vals.begin() + i + 1; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;//checking if enough elements is in the vecor was done a few pe + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + //Expression loss_ex = pinBallLoss(out_ex, labels_ex); + Expression loss_ex = MSIS(out_ex, labels_ex); + if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); //training area losses + } + + if (i==(m4Obj.n-1)) {//validation loss + out_ex=expand(out_ex)*levels_exVect[i];//back to original scale + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + for (int ios=0; ios 1 ) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + for (int ios = 0; ios, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc... + //No epoch here, because this will just reflect the current (latest) situation - the last few epochs + vector out_vect=as_vector(out_ex.value()); + testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect; + if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) { + vector firstForec=testResults_map[series][inet][0]; + testResults_map[series][inet][AVERAGING_LEVEL]=firstForec; + for (int ii=1; ii nextForec=testResults_map[series][inet][ii]; + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) + testResults_map[series][inet][AVERAGING_LEVEL][iii]+=nextForec[iii]; + } + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) + testResults_map[series][inet][AVERAGING_LEVEL][iii]/=AVERAGING_LEVEL; + } //time to average + }//through series + } //through nets + + if (iEpoch>0 && iEpoch % FREQ_OF_TEST==0) { + //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch. + vector bestEpochLosses; + vector bestEpochAvgLosses; + vector topnEpochLosses; + vector topnEpochAvgLosses; + vector bestEpochLossesL; + vector bestEpochAvgLossesL; + vector topnEpochLossesL; + vector topnEpochAvgLossesL; + vector bestEpochLossesH; + vector bestEpochAvgLossesH; + vector topnEpochLossesH; + vector topnEpochAvgLossesH; + + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); +#endif + + float avgLoss; + vector avgLatest; + vector avgAvg; + + for (int itop=0; itop 0) { + float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff); + bestEpochLosses.push_back(qLoss); + + qLoss=wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0); + bestEpochLossesL.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE); + bestEpochLossesH.push_back(qLoss); + } + avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]; //used later for calculating topn loss + + if (iEpoch>=AVERAGING_LEVEL) { + if (LBACK > 0) { + float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff); + bestEpochAvgLosses.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0); + bestEpochAvgLossesL.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE); + bestEpochAvgLossesH.push_back(qLoss); + } + avgAvg=testResults_map[series][inet][AVERAGING_LEVEL]; + } + } else { + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) { + avgLatest[iii]+=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL][iii];//calculate current topn + if (iEpoch>=AVERAGING_LEVEL) + avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii]; + } + } + }//through topn + + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) + avgLatest[iii]/=TOPN; + + if (LBACK > 0) { + float qLoss = errorFunc(avgLatest, m4Obj.testVals, m4Obj.meanAbsSeasDiff); + topnEpochLosses.push_back(qLoss); + + qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUL, 0); + topnEpochLossesL.push_back(qLoss); + + qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUH, OUTPUT_SIZE); + topnEpochLossesH.push_back(qLoss); + } + + if (iEpoch>=AVERAGING_LEVEL) { + for (int iii = 0; iii<2*OUTPUT_SIZE; iii++) + avgAvg[iii] /= TOPN; + + finalResults_map[series] = avgAvg; + + if (LBACK > 0) { +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL)); + + for (int iv=0; iv<2; iv++) { + if (iv==0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun)) + else + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun)); + + for (int iii=0; iii 0) { + float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size(); + float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size(); + float bestEpochLossL = accumulate(bestEpochLossesL.begin(), bestEpochLossesL.end(), 0.0) / bestEpochLossesL.size(); + float topnEpochLossL = accumulate(topnEpochLossesL.begin(), topnEpochLossesL.end(), 0.0) / topnEpochLossesL.size(); + float bestEpochLossH = accumulate(bestEpochLossesH.begin(), bestEpochLossesH.end(), 0.0) / bestEpochLossesH.size(); + float topnEpochLossH = accumulate(topnEpochLossesH.begin(), topnEpochLossesH.end(), 0.0) / topnEpochLossesH.size(); + cout<=AVERAGING_LEVEL) { + float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size(); + float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size(); + float bestEpochAvgLossL = accumulate(bestEpochAvgLossesL.begin(), bestEpochAvgLossesL.end(), 0.0) / bestEpochAvgLossesL.size(); + float topnEpochAvgLossL = accumulate(topnEpochAvgLossesL.begin(), topnEpochAvgLossesL.end(), 0.0) / topnEpochAvgLossesL.size(); + float bestEpochAvgLossH = accumulate(bestEpochAvgLossesH.begin(), bestEpochAvgLossesH.end(), 0.0) / bestEpochAvgLossesH.size(); + float topnEpochAvgLossH = accumulate(topnEpochAvgLossesH.begin(), topnEpochAvgLossesH.end(), 0.0) / topnEpochAvgLossesH.size(); + cout<<" bestAvg:"<> netRanking_map + netRanking_map[series]=perfToRanking(netPerf_map[series]); + + for (int itop=0; itop diagSeries; + for (int i=0; i<1; i++) {//add a few normal ones + int irand=uniOnSeries(rng); + diagSeries.insert(series_vect[irand]); + } + for(auto series : diagSeries) { + cout<at(inet); + for (int iEpoch=0; iEpoch 0 ) { + cout<<"sSm:"<at(inet); + for (int iEpoch=0; iEpoch 1 ) { + cout<<"sSm2:"<at(inet); + for (int iEpoch=0; iEpochat(inet); + for (int iEpoch = 0; iEpoch0) { + cout << "levels:" << iEpoch<<" "; + for (int iv = 0; iv 0 ) { + cout << "seasons:" << iEpoch<<" "; + for (int iv = 0; iv 1 ) { + cout << "seasons2:" << iEpoch<<" "; + for (int iv = 0; iv 10 1 +start 10 2 +Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers: +# start 10 1 0 +# start 10 2 0 +# start 20 1 5 +# start 20 2 5 +# start 30 1 10 +# start 30 2 10 +seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2. +We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP. +Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, +so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R. +By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - +a decent number to ensemble (in a separate, supplied R script). + +There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. +The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. +The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. +So, you need comment/uncomment to have one block of interest active. + + +*/ + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + + + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- +string VARIABLE = "Quarterly"; +const string run0 = "(1,2),(4,8), LR=1e-3/{7,3e-4f},{11,1e-4f}, EPOCHS=16, LVP=200 40*"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +vector> dilations = { { 1,2 },{ 4,8 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion +const float INITIAL_LEARNING_RATE = 1e-3f; +//else +const map LEARNING_RATES = { { 7,3e-4f },{ 11,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate. + +const float ALPHA = 0.05; +const float TAUL = ALPHA / 2; +const float TAUH = 1 - TAUL; +const float ALPHA_MULTIP = 2 / ALPHA; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer + +const int NUM_OF_TRAIN_EPOCHS = 16; +const unsigned int STATE_HSIZE = 40; + +const int SEASONALITY = 4; +const unsigned int INPUT_SIZE = 4; +const int INPUT_SIZE_I = INPUT_SIZE; +const unsigned int OUTPUT_SIZE = 8; +const int OUTPUT_SIZE_I = OUTPUT_SIZE; +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2; +const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years + +const float LEVEL_VARIABILITY_PENALTY = 200; //Multiplier for L" penalty against wigglines of level vector. + + +/* +string VARIABLE = "Monthly"; +const string run0 = "Res(1,3,6,12), LR=1e-3 {8,3e-4f},{13,1e-4f}, EPOCHS=14, LVP=50, 20*"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +vector> dilations = { { 1,3,6,12 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion^M +const float INITIAL_LEARNING_RATE = 1e-3f; +const map LEARNING_RATES = { { 8,3e-4f },{ 13,1e-4f } }; //at which epoch we set them up to what^M +const float PER_SERIES_LR_MULTIP = 1; + +const int NUM_OF_TRAIN_EPOCHS = 14; +const unsigned int STATE_HSIZE = 50; + +const float LEVEL_VARIABILITY_PENALTY = 50; //Multiplier for L" penalty against wigglines of level vector. + +const int SEASONALITY = 12; +const unsigned int OUTPUT_SIZE = 18; +const unsigned int INPUT_SIZE = 12; +const int INPUT_SIZE_I = INPUT_SIZE; +const int OUTPUT_SIZE_I = OUTPUT_SIZE; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2; +const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years + +const float ALPHA = 0.05; +const float TAUL = ALPHA / 2; +const float TAUH = 1 - TAUL; +const float ALPHA_MULTIP = 2 / ALPHA; +*/ + +Expression squash(const Expression& x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 40; +#else + const int MAX_NUM_OF_SERIES = -1; //use all series +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6;//in data provided +const int BIG_LOOP = 3; +const int NUM_OF_CHUNKS = 2; +const float EPS=1e-6; +const int AVERAGING_LEVEL=5; +const bool USE_MEDIAN = false; +const int MIDDLE_POS_FOR_AVG = 2; //if using medians + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=20; +const float C_STATE_PENALTY = 0; + +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN=true; +const unsigned ATTENTION_HSIZE=STATE_HSIZE; + +const bool USE_AUTO_LEARNING_RATE=false; +//if USE_AUTO_LEARNING_RATE, and only if LBACK>0 +const float MIN_LEARNING_RATE = 0.0001f; +const float LR_RATIO = sqrt(10); +const float LR_TOLERANCE_MULTIP = 1.005; +const int L3_PERIOD = 2; +const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2; + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + float meanAbsSeasDiff; + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + + meanAbsSeasDiff = 0; + float sumf = 0; + for (int ip = SEASONALITY; ip0) + meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY); + + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE_I) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) {//chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; +}; + +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + vector levels; + vector seasons; +}; + +//loss function +Expression MSIS(const Expression& out_ex, const Expression& actuals_ex) { + vector losses; + for (unsigned int indx = 0; indx as_scalar(forecH.value())) + loss = loss + (actual - forecH)*ALPHA_MULTIP; + losses.push_back(loss); + } + Expression ret = sum(losses) / OUTPUT_SIZE; + #if defined _DEBUG + float retf = as_scalar(ret.value()); + if (retf>100) { + vector out_vect = as_vector(out_ex.value()); + vector actuals_vect = as_vector(actuals_ex.value()); + for (int i = 0; i0 and PERCENTILE!=50 +float wQuantLoss(vector& out_vect, vector& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 + float sumf = 0; float suma = 0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*tau; + else + sumf = sumf + (actual - forec)*(tau - 1); + } + return sumf / suma * 200; +} + +//MSIS operating on floats, used for validation +float errorFunc(vector& out_vect, vector& actuals_vect, float meanAbsSeasDiff) { + float sumf=0; + for (unsigned int indx = 0; indx forecH) + loss = loss + (actualf - forecH)*ALPHA_MULTIP; + sumf+=loss; + } + return sumf / (OUTPUT_SIZE*meanAbsSeasDiff); +} + + + + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples. + int chunkNo = 1; + int ibigOffset = 0; + if (argc >= 3) { + seedForChunks = atoi(argv[1]); + chunkNo = atoi(argv[2]); + } + if (argc >= 4) + ibigOffset = atoi(argv[3]); + + if (chunkNo > NUM_OF_CHUNKS) { + cerr << "chunkNo > NUM_OF_CHUNKS"; + exit(-1); + } + else if (chunkNo <= 0) { + cerr << "chunkNo <= 0"; + exit(-1); + } + + cout<0) + std::cout<< " ibigOffset:"<< ibigOffset; //if continuing prematurely stopped run + if (LBACK>0) + std::cout<<" lback:"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister) + mt19937 rngForChunks(seedForChunks); + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + if (m4Obj.meanAbsSeasDiff==0) { + cout<<"Warning, flat series:"<0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + + int series_len=(int)series_vect.size(); + int chunkSize= series_len/NUM_OF_CHUNKS; + std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize< uniOnSeries(0, chunkSize -1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5); + set diagSeries; + + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + ParameterCollection pc; + ParameterCollection perSeriesPC; + + float learning_rate= INITIAL_LEARNING_RATE; + AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS); + trainer.clip_threshold = GRADIENT_CLIPPING; + AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING; + + #if defined USE_RESIDUAL_LSTM + vector rNNStack; + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il oneChunk_vect(start,end); + if (PRINT_DIAGN) { + for (int k = 0; k<10; k++) //diag + cout << oneChunk_vect[k] << " "; + cout << endl; + } + if (chunkNo == NUM_OF_CHUNKS) + cout<<"last chunk size:"<< oneChunk_vect.size()< additionalParams_map((int)oneChunk_vect.size()*1.5); //per series + unordered_map*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup + string series = *iter; + AdditionalParams addParams; + addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5); //level smoothing + addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5); //seasonality smoothing + for (int isea = 0; isea(); + } + + for (int iEpoch=0; iEpoch testLosses; //test losses of all series in this epoch + vector testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch + vector testLossesL; //lower quantile loss + vector testAvgLossesL; //lower quantile loss + vector testLossesH; //higher quantile loss + vector testAvgLossesH; //higher quantile loss + vector trainingLosses; //training losses of all series in one epoch + vector forecLosses; vector levVarLosses; vector stateLosses; + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL)); + #endif + + for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) { + string series=*iter; + auto m4Obj = allSeries_map[series]; + + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); + #endif + + ComputationGraph cg; + for (int il=0; il season_exVect;//vector, because we do not know how long the series is + for (int iseas=0; iseas seas==1 + season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK. + } + season_exVect.push_back(season_exVect[0]); + + vector logDiffOfLevels_vect; + vector levels_exVect; + Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]); + levels_exVect.push_back(lev); + for (int i=1; i 0) { + vector levelVarLoss_v; + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY; + for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx+i]); + } + vector losses; + for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex=input(cg,{INPUT_SIZE},input_vect); + Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]); + joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect); + + first = m4Obj.vals.begin() +i+1; + pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect); + Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization + labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization + Expression labels_ex=squash(labels1_ex); + + Expression loss_ex=MSIS(out_ex, labels_ex);//although out_ex has doubled size, labels_ex have normal size. NB, we do not have duplicated labels during training. + //Expression loss_ex=pinBallLoss(out_ex, labels_ex); + if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + } + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; it maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax = it; + layerOfMax = il; + chunkOfMax = irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + cout << " min season=" << minSeason << endl; + cout << " min level=" << minLevel << endl; + cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl; + + //diagSeries.insert(series); + pc.reset_gradient(); + perSeriesPC.reset_gradient(); + } + + //saving per-series values for diagnostics purposes + AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch); + histAdditionalParams.levSm=as_scalar(levSm_ex.value()); + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea=0; isea::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization + joinedInput_ex.emplace_back(squash(input1_ex)); + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + for (int ios=0; ios out_vect = as_vector(out_ex.value()); + + if (LBACK > 0) { + float qLoss = errorFunc(out_vect, m4Obj.testVals, m4Obj.meanAbsSeasDiff); + testLosses.push_back(qLoss); + + qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUL, 0); + testLossesL.push_back(qLoss); + + qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUH, OUTPUT_SIZE); + testLossesH.push_back(qLoss); + } + + testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect; + if (iEpoch >= AVERAGING_LEVEL) { + if (USE_MEDIAN) { + if (testResults_map[series][AVERAGING_LEVEL].size() == 0) + testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten + for (int iii = 0; iii < OUTPUT_SIZE_I*2; iii++) { + vector temp_vect2; + for (int ii = 0; ii firstForec = testResults_map[series][0]; + testResults_map[series][AVERAGING_LEVEL] = firstForec; + for (int ii = 1; ii nextForec = testResults_map[series][ii]; + for (int iii = 0; iii 0) { + float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff); + testAvgLosses.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0); + testAvgLossesL.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE); + testAvgLossesH.push_back(qLoss); + + #if defined USE_ODBC //save + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL)); + + for (int iv = 0; iv<2; iv++) { + if (iv == 0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun)) + else + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun)); + + for (int io = 0; io < OUTPUT_SIZE_I; io++) { + int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io; + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io + iv*OUTPUT_SIZE_I], 0, NULL)); + } + if (MAX_NUM_OF_SERIES<0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLExecute(hInsertStmt)); + } + #endif + } //lback>0 + } //time to average + }//last anchor point of the series + }//through TEST loop + }//through series + + + if (iEpoch % FREQ_OF_TEST == 0) { + float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size(); + + cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100; + if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forecast loss:" << averageForecLoss*100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + + float averageTestLoss=0; + if (LBACK > 0) { + float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size(); + float averageTestLossL = accumulate(testLossesL.begin(), testLossesL.end(), 0.0) / testLossesL.size(); + float averageTestLossH = accumulate(testLossesH.begin(), testLossesH.end(), 0.0) / testLossesH.size(); + cout<<" Test loss:" << averageTestLoss<<" L:"<< averageTestLossL<<" H:"<< averageTestLossH; + if (iEpoch >= AVERAGING_LEVEL) { + float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch + float averageTestAvgLossL = accumulate(testAvgLossesL.begin(), testAvgLossesL.end(), 0.0) / testAvgLossesL.size();//of this epoch + float averageTestAvgLossH = accumulate(testAvgLossesH.begin(), testAvgLossesH.end(), 0.0) / testAvgLossesH.size();//of this epoch + cout << " avgLoss:" << averageTestAvgLoss<<" L:"<< averageTestAvgLossL<<" H:"<< averageTestAvgLossH<= 2) { + if (iEpoch < L3_PERIOD) + changeL2Rate = perfValid_vect[perfValid_vect.size() - 2] MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) { + learning_rate /= LR_RATIO; + cout << "decreasing LR to:" << learning_rate << endl; + epochOfLastChangeOfLRate = iEpoch; + trainer.learning_rate = learning_rate; + } + } + #if defined USE_ODBC + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLEndTran( + SQL_HANDLE_DBC, + hDbc, + SQL_COMMIT)); + #endif + }//through epochs + + if (PRINT_DIAGN) {//some diagnostic info + set diagSeries; + for (int i = 0; i<1; i++) {//add a few normal ones + int irand = uniOnSeries(rng); + diagSeries.insert(oneChunk_vect[irand]); + } + for (auto series : diagSeries) { + cout << endl << series << endl; + array* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series]; + cout << "lSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levSm << " "; + cout << endl; + cout << "sSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).sSm << " "; + cout << endl; + cout << "seasons:" << endl; + for (int isea = 0; iseaat(iEpoch).initSeasonality[isea] << " "; + cout << endl; + } + cout << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levels.size()>0) { + cout << "levels:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).levels.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", "; + cout << endl; + cout << "seas:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).seasons.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", "; + cout << endl; + } + } + } + } + + //save the forecast to outputFile + ofstream outputFile; + outputFile.open(outputPathL); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) { + string series = *iter; + outputFile<< series; + for (int io=0; io +#include +#include +#include + +#if defined DEBUG + #define _DEBUG +#endif + +using namespace std; + +namespace dynet { + + // ResidualDilatedLSTMBuilder based on Vanilla LSTM + enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG }; + enum { LN_GH, LN_BH, LN_GX, LN_BX, LN_GC, LN_BC }; + + ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), ln_lstm(false), forget_bias(1.f), dropout_masks_valid(false) { } + + ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model, + bool ln_lstm, float forget_bias) : dilations(dilations), layers(unsigned(dilations.size())), + input_dim(input_dim), hid(hidden_dim), ln_lstm(ln_lstm), forget_bias(forget_bias), dropout_masks_valid(false) { + unsigned layer_input_dim = input_dim; + local_model = model.add_subcollection("ResidualDilated-lstm-builder"); + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_x2i = local_model.add_parameters({ hidden_dim * 4, layer_input_dim }); + Parameter p_h2i = local_model.add_parameters({ hidden_dim * 4, hidden_dim }); + //Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bi = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = { p_x2i, p_h2i, /*p_c2i,*/ p_bi }; + params.push_back(ps); + + if (ln_lstm) { + Parameter p_gh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f)); + Parameter p_bh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + Parameter p_gx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f)); + Parameter p_bx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + Parameter p_gc = model.add_parameters({ hidden_dim }, ParameterInitConst(1.f)); + Parameter p_bc = model.add_parameters({ hidden_dim }, ParameterInitConst(0.f)); + vector ln_ps = { p_gh, p_bh, p_gx, p_bx, p_gc, p_bc }; + ln_params.push_back(ln_ps); + } + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + void ResidualDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) { + param_vars.clear(); + if (ln_lstm)ln_param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + vector vars; + for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); } + param_vars.push_back(vars); + if (ln_lstm) { + auto& ln_p = ln_params[i]; + vector ln_vars; + for (unsigned j = 0; j < ln_p.size(); ++j) { ln_vars.push_back(update ? parameter(cg, ln_p[j]) : const_parameter(cg, ln_p[j])); } + ln_param_vars.push_back(ln_vars); + } + } + + _cg = &cg; + } + // layout: 0..layers = c + // layers+1..2*layers = h + void ResidualDilatedLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "ResidualDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state, and cell for each layer). However, for " << layers << " layers, " << + hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } + else { + has_initial_state = false; + } + + dropout_masks_valid = false; + } + + void ResidualDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h)); + masks.push_back(masks_i); + } + } + dropout_masks_valid = true; + } + + ParameterCollection & ResidualDilatedLSTMBuilder::get_parameter_collection() { + return local_model; + } + + // TODO - Make this correct + // Copied c from the previous step (otherwise c.size()< h.size()) + // Also is creating a new step something we want? + // wouldn't overwriting the current one be better? + Expression ResidualDilatedLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "ResidualDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " << + h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + // Current implementation : s_new is either {new_c[0],...,new_c[n]} + // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} + Expression ResidualDilatedLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "ResidualDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = c.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + + Expression ResidualDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd); + for (unsigned i = 0; i < layers; ++i) { + int dilation_offset = dilations[i] - 1; + const vector& vars = param_vars[i]; + + Expression i_h_tm1, i_c_tm1; + bool has_prev_state = (prev >= 0 || has_initial_state); + if (prev < dilation_offset) { + if (has_initial_state) { + // intial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + else { + i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd)); + i_c_tm1 = i_h_tm1; + } + } + else { + i_h_tm1 = h[prev - dilation_offset][i]; + i_c_tm1 = c[prev - dilation_offset][i]; + } + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + if (dropout_rate > 0.f) { + in = cmult(in, masks[i][0]); + } + if (has_prev_state && dropout_rate_h > 0.f) + i_h_tm1 = cmult(i_h_tm1, masks[i][1]); + // input + Expression tmp; + Expression i_ait; + Expression i_aft; + Expression i_aot; + Expression i_agt; + if (ln_lstm) { + const vector& ln_vars = ln_param_vars[i]; + if (has_prev_state) + tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]) + layer_norm(vars[_H2I] * i_h_tm1, ln_vars[LN_GH], ln_vars[LN_BH]); + else + tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]); + } + else { + if (has_prev_state) + tmp = affine_transform({ vars[_BI], vars[_X2I], in, vars[_H2I], i_h_tm1 }); + else + tmp = affine_transform({ vars[_BI], vars[_X2I], in }); + } + i_ait = pick_range(tmp, 0, hid); + i_aft = pick_range(tmp, hid, hid * 2); + i_aot = pick_range(tmp, hid * 2, hid * 3); + i_agt = pick_range(tmp, hid * 3, hid * 4); + Expression i_it = logistic(i_ait); + if (forget_bias != 0.0) + tmp = logistic(i_aft + forget_bias); + else + tmp = logistic(i_aft); + + Expression i_ft = tmp; + Expression i_ot = logistic(i_aot); + Expression i_gt = tanh(i_agt); + + ct[i] = has_prev_state ? (cmult(i_ft, i_c_tm1) + cmult(i_it, i_gt)) : cmult(i_it, i_gt); + if (ln_lstm) { + const vector& ln_vars = ln_param_vars[i]; + if (i==0) + in = ht[i] = cmult(i_ot, tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC]))); + else + in = ht[i] = cmult(i_ot, in+tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC]))); + } + else { + if (i==0) + in = ht[i] = cmult(i_ot, tanh(ct[i])); + else + in = ht[i] = cmult(i_ot, in+tanh(ct[i])); + } + } + return ht.back(); + } + + void ResidualDilatedLSTMBuilder::copy(const RNNBuilder & rnn) { + const ResidualDilatedLSTMBuilder & rnn_lstm = (const ResidualDilatedLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy ResidualDilatedLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; + for (size_t i = 0; i < ln_params.size(); ++i) + for (size_t j = 0; j < ln_params[i].size(); ++j) + ln_params[i][j] = rnn_lstm.ln_params[i][j]; + } + + void ResidualDilatedLSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; + } + + void ResidualDilatedLSTMBuilder::set_dropout(float d, float d_h) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; + } + + void ResidualDilatedLSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + + + + //enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG }; + enum { _X2I_, _H2I_, _BI_, _XA1, _HA1, _SA1, _BA1, _A2, _B2 }; + + +//*************************** + + + + AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { } + + AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder(vector max_dilations, + unsigned input_dim, + unsigned hidden_dim, + unsigned attention_dim, + ParameterCollection& model) + : max_dilations(max_dilations), layers(unsigned(max_dilations.size())), + input_dim(input_dim), hid(hidden_dim), attention_dim(attention_dim), weightnoise_std(0), dropout_masks_valid(false) { + unsigned layer_input_dim = input_dim; + local_model = model.add_subcollection("compact-vanilla-lstm-builder"); + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim }); + Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim }); + Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + + Parameter p_Wxa1 = local_model.add_parameters({ attention_dim, layer_input_dim }); + Parameter p_Wha1 = local_model.add_parameters({ attention_dim, hidden_dim }); + Parameter p_Wsa1 = local_model.add_parameters({ attention_dim, hidden_dim }); + Parameter p_ba1 = local_model.add_parameters({ attention_dim }, ParameterInitConst(0.f)); + + Parameter p_Wa2 = local_model.add_parameters({ max_dilations[i], attention_dim }); + Parameter p_ba2 = local_model.add_parameters({ max_dilations[i] }, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = { p_Wx, p_Wh, p_b, p_Wxa1, p_Wha1, p_Wsa1, p_ba1, p_Wa2, p_ba2 }; + params.push_back(ps); + + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + void AttentiveDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) { + param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + vector vars; + for (unsigned j = 0; j < p.size(); ++j) { + vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); + } + param_vars.push_back(vars); + } + + _cg = &cg; + } + // layout: 0..layers = c + // layers+1..2*layers = h + void AttentiveDilatedLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "AttentiveDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state, and cell for each layer). However, for " << layers << " layers, " << + hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } + else { + has_initial_state = false; + } + + dropout_masks_valid = false; + } + + void AttentiveDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h)); + masks.push_back(masks_i); + } + } + dropout_masks_valid = true; + } + + ParameterCollection & AttentiveDilatedLSTMBuilder::get_parameter_collection() { + return local_model; + } + + // TODO - Make this correct + // Copied c from the previous step (otherwise c.size()< h.size()) + // Also is creating a new step something we want? + // wouldn't overwriting the current one be better? + Expression AttentiveDilatedLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "AttentiveDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " << + h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = unsigned(h.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + // Current implementation : s_new is either {new_c[0],...,new_c[n]} + // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} + Expression AttentiveDilatedLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "AttentiveDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = unsigned(c.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + + Expression AttentiveDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd); + for (unsigned i = 0; i < layers; ++i) { + int dilation_offset= max_dilations[i]-1; + const vector& vars = param_vars[i]; + Expression i_h_tm1, i_c_tm1; + if (prev < dilation_offset) { + if (has_initial_state) { + // initial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + else { + i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd)); + i_c_tm1 = i_h_tm1; + } + } + else { + if (dilation_offset>0) { + //enum { _X2I, _H2I, _BI, _XA1, _HA1, _SA1, _BA1, _A2, _B2 }; + Expression weights_ex=vars[_XA1]*in+ vars[_HA1]*h[prev][i]+ vars[_SA1]*c[prev][i]+ vars[_BA1]; + weights_ex=tanh(weights_ex); + weights_ex=vars[_A2]* weights_ex+ vars[_B2]; + weights_ex =softmax(weights_ex); + #if defined _DEBUG + vector weights=as_vector(weights_ex.value()); + #endif + + unsigned indx=0; + Expression w_ex = pick(weights_ex, indx); + Expression avg_h= cmult(h[prev][i], w_ex); + for (indx=1; indx <= dilation_offset; indx++) {//dilation_offset==max_dilations[i]-1, so together with indx==0, we cover max_dilations[i] steps + w_ex = pick(weights_ex, indx); + avg_h = avg_h+cmult(h[prev- indx][i], w_ex); + } + i_h_tm1 = avg_h; + } else { + i_h_tm1 = h[prev- dilation_offset][i]; + } + i_c_tm1 = c[prev- dilation_offset][i]; + } + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } + else { + Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } + } + return ht.back(); + } + + void AttentiveDilatedLSTMBuilder::copy(const RNNBuilder & rnn) { + const AttentiveDilatedLSTMBuilder & rnn_lstm = (const AttentiveDilatedLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy AttentiveDilatedLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; + } + + void AttentiveDilatedLSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; + } + + void AttentiveDilatedLSTMBuilder::set_dropout(float d, float d_h) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; + } + + void AttentiveDilatedLSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + void AttentiveDilatedLSTMBuilder::set_weightnoise(float std) { + DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0"); + weightnoise_std = std; + } + + //*/ + + DilatedLSTMBuilder::DilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { } + + DilatedLSTMBuilder::DilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model) + : dilations(dilations), layers(unsigned(dilations.size())), + input_dim(input_dim), hid(hidden_dim), weightnoise_std(0), dropout_masks_valid(false) { + unsigned layer_input_dim = input_dim; + local_model = model.add_subcollection("compact-vanilla-lstm-builder"); + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim }); + Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim }); + Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = { p_Wx, p_Wh, p_b }; + params.push_back(ps); + + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + void DilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) { + param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + vector vars; + for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); } + param_vars.push_back(vars); + } + + _cg = &cg; + } + // layout: 0..layers = c + // layers+1..2*layers = h + void DilatedLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "DilatedLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state, and cell for each layer). However, for " << layers << " layers, " << + hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } else { + has_initial_state = false; + } + + dropout_masks_valid = false; + } + + void DilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h)); + masks.push_back(masks_i); + } + } + dropout_masks_valid = true; + } + + ParameterCollection & DilatedLSTMBuilder::get_parameter_collection() { + return local_model; + } + + // TODO - Make this correct + // Copied c from the previous step (otherwise c.size()< h.size()) + // Also is creating a new step something we want? + // wouldn't overwriting the current one be better? + Expression DilatedLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "DilatedLSTMBuilder::set_h expects as many inputs as layers, but got " << + h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = unsigned(h.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + // Current implementation : s_new is either {new_c[0],...,new_c[n]} + // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} + Expression DilatedLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "DilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = unsigned(c.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + + Expression DilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd); + for (unsigned i = 0; i < layers; ++i) { + int dilation_offset = dilations[i] - 1; + const vector& vars = param_vars[i]; + Expression i_h_tm1, i_c_tm1; + if (prev < dilation_offset) { + if (has_initial_state) { + // initial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } else { + i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd)); + i_c_tm1 = i_h_tm1; + } + } else { // t > 0 + i_h_tm1 = h[prev - dilation_offset][i]; + i_c_tm1 = c[prev - dilation_offset][i]; + } + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } else { + Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } + } + return ht.back(); + } + + void DilatedLSTMBuilder::copy(const RNNBuilder & rnn) { + const DilatedLSTMBuilder & rnn_lstm = (const DilatedLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy DilatedLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; + } + + void DilatedLSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; + } + + void DilatedLSTMBuilder::set_dropout(float d, float d_h) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; + } + + void DilatedLSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + void DilatedLSTMBuilder::set_weightnoise(float std) { + DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0"); + weightnoise_std = std; + } + +} // namespace dynet diff --git a/118 - slaweks17/c++/slstm.h b/118 - slaweks17/c++/slstm.h new file mode 100644 index 0000000..adb63a7 --- /dev/null +++ b/118 - slaweks17/c++/slstm.h @@ -0,0 +1,394 @@ +/** +* file slstm.h +* header for my implementation of dilated LSTMs, based on Dynet LSTM builders + - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf) + - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360 + - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971 +* +Slawek Smyl, Mar-May 2018 +*/ + +#ifndef DYNET_SLSTMS_H_ +#define DYNET_SLSTMS_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" + +using namespace std; + +namespace dynet { + + //basd on VanillaLSTMBuilder + struct ResidualDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + ResidualDilatedLSTMBuilder(); + /** + * \brief Constructor for the ResidualDilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + * \param ln_lstm Whether to use layer normalization + * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0) + */ + explicit ResidualDilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model, + bool ln_lstm = false, + float forget_bias = 1.f); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + /** + * \brief Get parameters in ResidualDilatedLSTMBuilder + * \return list of points to ParameterStorage objects + */ + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + // first index is layer, then ... + std::vector> ln_params; + + // first index is layer, then ... + std::vector> param_vars; + // first index is layer, then ... + std::vector> ln_param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + bool ln_lstm; + float forget_bias; + bool dropout_masks_valid; + vector dilations; //one int per layer + + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct DilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + DilatedLSTMBuilder(); + /** + * \brief Constructor for the DilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit DilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + float weightnoise_std; + vector dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct AttentiveDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + AttentiveDilatedLSTMBuilder(); + /** + * \brief Constructor for the AttentiveDilatedLSTMBuilder + * + * \param max_dilations Vector, maximum dilations (per layer) + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit AttentiveDilatedLSTMBuilder(vector max_dilations, + unsigned input_dim, + unsigned hidden_dim, + unsigned attention_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + unsigned attention_dim; + float dropout_rate_h; + float weightnoise_std; + vector max_dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; +} // namespace dynet + +#endif diff --git a/118 - slaweks17/c++/windows_VisualStudio/M4.sln b/118 - slaweks17/c++/windows_VisualStudio/M4.sln new file mode 100644 index 0000000..035373c --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M4.sln @@ -0,0 +1,58 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M41", "M41\M41.vcxproj", "{928301A0-F01A-48F6-A499-851B3CE8BD4E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M42", "M42\M42.vcxproj", "{A16B5466-E680-43F6-A884-A4A01EB78E50}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M43", "M43\M43.vcxproj", "{BE951571-3F3A-4048-BAA3-0C05F38CFF42}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M44", "M44\M44.vcxproj", "{7A192E0C-8F58-4D65-998E-3A7010AB5F87}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + RelWithDebug|x64 = RelWithDebug|x64 + RelWithDebug|x86 = RelWithDebug|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x64.ActiveCfg = Debug|x64 + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x64.Build.0 = Debug|x64 + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x86.ActiveCfg = Debug|Win32 + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x86.Build.0 = Debug|Win32 + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64 + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x64.Build.0 = RelWithDebug|x64 + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32 + {928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x64.ActiveCfg = Debug|x64 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x64.Build.0 = Debug|x64 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x86.ActiveCfg = Debug|Win32 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x86.Build.0 = Debug|Win32 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x64.Build.0 = RelWithDebug|x64 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32 + {A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x64.ActiveCfg = Debug|x64 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x64.Build.0 = Debug|x64 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x86.ActiveCfg = Debug|Win32 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x86.Build.0 = Debug|Win32 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x64.Build.0 = RelWithDebug|x64 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32 + {BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x64.ActiveCfg = Debug|x64 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x64.Build.0 = Debug|x64 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x86.ActiveCfg = Debug|Win32 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x86.Build.0 = Debug|Win32 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x64.Build.0 = RelWithDebug|x64 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32 + {7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/118 - slaweks17/c++/windows_VisualStudio/M41/ES_RNN.cc b/118 - slaweks17/c++/windows_VisualStudio/M41/ES_RNN.cc new file mode 100644 index 0000000..43dc358 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M41/ES_RNN.cc @@ -0,0 +1,1193 @@ +/*ES-RNN: ES-RNN Exponential Smoothing Recurrent Neural Network hybrid. Point forecast. +Slawek Smyl, Jan-May 2017. + +Dilated LSTMs, with optional shortcuts, attention. +It is meant to be used for Monthly and Quarterly series of M4 competition, becasue the DE (Diversified Ensemble) version is too slow. +The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac. + +It has to be invoked in pair of executables, passing at least two integers: seedForChunks, chunkNo +so e.g. create a script with following lines on Windows +start 10 1 +start 10 2 +Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers: +# start 10 1 0 +# start 10 2 0 +# start 20 1 5 +# start 20 2 5 +# start 30 1 10 +# start 30 2 10 +seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2. +We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP. +Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, +so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R. +By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - +a decent number to ensemble (in a separate R script). + +There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. +The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. +The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. +So, you need comment/uncomment to have one block of interest active. + + +*/ + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + + + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- +string VARIABLE = "Quarterly"; +const string run = "50/45 (1,2),(4,8), LR=0.001/{10,1e-4f}, EPOCHS=15, LVP=80 40*"; +const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 45; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE. + +vector> dilations={{1,2},{4,8}};//Each vector represents one chunk of Dilateed LSTMS, connected in standard resnNet fashion +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM //so for Quarterly series, we do not use either the more advanced residual connections nor attention. +const bool ADD_NL_LAYER=false; //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer + +const float INITIAL_LEARNING_RATE = 0.001f; +const map LEARNING_RATES = { { 10,1e-4f } }; //at which epoch we set them up to what +const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate. + +const int NUM_OF_TRAIN_EPOCHS = 15; +const unsigned int STATE_HSIZE = 40; + +const int SEASONALITY = 4; +const unsigned int INPUT_SIZE = 4; +const int INPUT_SIZE_I= INPUT_SIZE; +const unsigned int OUTPUT_SIZE = 8; +const int OUTPUT_SIZE_I= OUTPUT_SIZE; +const int MIN_INP_SEQ_LEN = 0; +const float LEVEL_VARIABILITY_PENALTY = 80; //Multiplier for L" penalty against wigglines of level vector. Important. +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2; +const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years + + + +/* +string VARIABLE = "Monthly"; +const string run = "50/49 Res (1,3,6,12), LR=5e-4 {12,1e-4f}, EPOCHS=10, 20*"; +const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +#define USE_RESIDUAL_LSTM //so for Monthly we use only one block, so no standard resNet shortcuts, but instead but of the special residual shortcuts, after https://arxiv.org/abs/1701.03360. +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer + +vector> dilations={{1,3,6,12}};//so for Monthly we use only one block, so no standard resNet shortcut +const float INITIAL_LEARNING_RATE = 5e-4; +const map LEARNING_RATES = { { 12,1e-4f } }; //at which epoch we set them up to what +const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate. + +const int NUM_OF_TRAIN_EPOCHS = 10; +const unsigned int STATE_HSIZE = 50; + +const float LEVEL_VARIABILITY_PENALTY = 50; //Multiplier for L" penalty against wigglines of level vector. + +const int SEASONALITY = 12; +const unsigned int OUTPUT_SIZE = 18; +const unsigned int INPUT_SIZE = 12; +const int INPUT_SIZE_I= INPUT_SIZE; +const int OUTPUT_SIZE_I= OUTPUT_SIZE; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2; +const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years +*/ + + +/* +string VARIABLE = "Daily"; +const string run = "50/49 NL LRMult=1.5, 3/5 (1,7,28) LR=3e-4 {9,1e-4f} EPOCHS=15, LVP=100 HSIZE=40 20w"; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = true; + +const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const int TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 7; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,7,28 } }; + +const float INITIAL_LEARNING_RATE = 3e-4; +const map LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1.5; +const int NUM_OF_TRAIN_EPOCHS = 15; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 7; +const int INPUT_SIZE_I = INPUT_SIZE; +const unsigned int OUTPUT_SIZE = 14; +const int OUTPUT_SIZE_I = OUTPUT_SIZE; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK +const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to max of last 20 years +*/ + +Expression squash(const Expression& x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 40; +#else + const int MAX_NUM_OF_SERIES = -1; //use all series +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6;//in data provided +const int BIG_LOOP = 3; +const int NUM_OF_CHUNKS = 2; +const float EPS=1e-6; +const int AVERAGING_LEVEL=5; +const bool USE_MEDIAN = false; +const int MIDDLE_POS_FOR_AVG = 2; //if using medians + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=20; +const float C_STATE_PENALTY = 0; + +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN=true; +const float TAU = PERCENTILE / 100.; +const float TRAINING_TAU = TRAINING_PERCENTILE / 100.; +const unsigned ATTENTION_HSIZE=STATE_HSIZE; + +const bool USE_AUTO_LEARNING_RATE=false; +//if USE_AUTO_LEARNING_RATE, and only if LBACK>0 +const float MIN_LEARNING_RATE = 0.0001f; +const float LR_RATIO = sqrt(10); +const float LR_TOLERANCE_MULTIP = 1.005; +const int L3_PERIOD = 2; +const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2; + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE_I) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) { //chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; +}; + +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + vector levels; + vector seasons; +}; + + +Expression pinBallLoss(const Expression& out_ex, const Expression& actuals_ex) {//used by Dynet, learning loss function + vector losses; + for (unsigned int indx = 0; indx as_scalar(forec.value())) + losses.push_back((actual - forec)*TRAINING_TAU); + else + losses.push_back((actual - forec)*(TRAINING_TAU - 1)); + } + return sum(losses) / OUTPUT_SIZE * 2; +} + + +//weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 +float wQuantLoss(vector& out_vect, vector& actuals_vect) { + float sumf = 0; float suma=0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*TAU; + else + sumf = sumf + (actual - forec)*(TAU - 1); + } + return sumf / suma * 200; +} + +//used just for diagnostics, if LBACK>0 and PERCENTILE==50 +float sMAPE(vector& out_vect, vector& actuals_vect) { + float sumf = 0; + for (unsigned int indx = 0; indx& out_vect, vector& actuals_vect) { + if (PERCENTILE==50) + return sMAPE(out_vect, actuals_vect); + else + return wQuantLoss(out_vect, actuals_vect); +} + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples. + int chunkNo = 1; + int ibigOffset = 0; + if (argc >= 3) { + seedForChunks = atoi(argv[1]); + chunkNo = atoi(argv[2]); + } + if (argc >= 4) + ibigOffset = atoi(argv[3]); + + if (chunkNo > NUM_OF_CHUNKS) { + cerr << "chunkNo > NUM_OF_CHUNKS"; + exit(-1); + } + else if (chunkNo <= 0) { + cerr << "chunkNo <= 0"; + exit(-1); + } + + cout<0) + std::cout<< " ibigOffset:"<< ibigOffset; //if continuing prematurely stopped run + if (LBACK>0) + std::cout<<" lback:"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister) + mt19937 rngForChunks(seedForChunks); + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + allSeries_map[series] = m4Obj; + } + if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + + int series_len=(int)series_vect.size(); + int chunkSize= series_len/NUM_OF_CHUNKS; + std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize< uniOnSeries(0, chunkSize -1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5); + set diagSeries; + + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + ParameterCollection pc; + ParameterCollection perSeriesPC; + + float learning_rate= INITIAL_LEARNING_RATE; + AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS); + trainer.clip_threshold = GRADIENT_CLIPPING; + AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING; + + #if defined USE_RESIDUAL_LSTM + vector rNNStack; + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il oneChunk_vect(start,end); + if (PRINT_DIAGN) { + for (int k = 0; k<10; k++) //diag + cout << oneChunk_vect[k] << " "; + cout << endl; + } + if (chunkNo == NUM_OF_CHUNKS) + cout<<"last chunk size:"<< oneChunk_vect.size()< additionalParams_map((int)oneChunk_vect.size()*1.5); //per series + unordered_map*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup + string series = *iter; + AdditionalParams addParams; + addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5); //level smoothing + addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5); //seasonality smoothing + for (int isea = 0; isea(); + } + + for (int iEpoch=0; iEpoch testLosses; //test losses of all series in this epoch + vector testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch + vector trainingLosses; //training losses of all series in one epoch + vector forecLosses; vector levVarLosses; vector stateLosses; + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL)); + #endif + + for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) { + string series=*iter; + auto m4Obj = allSeries_map[series]; + + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); + #endif + + ComputationGraph cg; + for (int il=0; il season_exVect;//vector, because we do not know how long the series is + for (int iseas=0; iseas seas==1 + season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK. + } + season_exVect.push_back(season_exVect[0]); + + vector logDiffOfLevels_vect; + vector levels_exVect; + Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]); + levels_exVect.push_back(lev); + for (int i=1; i 0) { + vector levelVarLoss_v; + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY; + for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx+i]); + } + vector losses; + for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex=input(cg,{INPUT_SIZE},input_vect); + Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]); + joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect); + + first = m4Obj.vals.begin() +i+1; + pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect); + Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization + labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization + Expression labels_ex=squash(labels1_ex); + + Expression loss_ex=pinBallLoss(out_ex, labels_ex); + if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + } + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; it maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax = it; + layerOfMax = il; + chunkOfMax = irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + cout << " min season=" << minSeason << endl; + cout << " min level=" << minLevel << endl; + cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl; + + //diagSeries.insert(series); + pc.reset_gradient(); + perSeriesPC.reset_gradient(); + } + + //saving per-series values for diagnostics purposes + AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch); + histAdditionalParams.levSm=as_scalar(levSm_ex.value()); + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea=0; isea::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization + joinedInput_ex.emplace_back(squash(input1_ex)); + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality_ex = concatenate(outputSeasonality_exVect); + + Expression out_ex; + if (ADD_NL_LAYER) { + out_ex=MLPW_ex*rnn_ex+MLPB_ex; + out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex; + } else + out_ex=adapterW_ex*rnn_ex+adapterB_ex; + + out_ex = cmult(expand(out_ex), outputSeasonality_ex)*levels_exVect[i];//back to original scale + vector out_vect = as_vector(out_ex.value()); + + if (LBACK > 0) { + float qLoss = errorFunc(out_vect, m4Obj.testVals); + testLosses.push_back(qLoss); + } + + testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect; + if (iEpoch >= AVERAGING_LEVEL) { + if (USE_MEDIAN) { + if (testResults_map[series][AVERAGING_LEVEL].size() == 0) + testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten + for (int iii = 0; iii < OUTPUT_SIZE_I; iii++) { + vector temp_vect2; + for (int ii = 0; ii firstForec = testResults_map[series][0]; + testResults_map[series][AVERAGING_LEVEL] = firstForec; + for (int ii = 1; ii nextForec = testResults_map[series][ii]; + for (int iii = 0; iii 0) { + float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals); + testAvgLosses.push_back(qLoss); + + #if defined USE_ODBC //save + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL)); + + for (int io = 0; io < OUTPUT_SIZE_I; io++) { + int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io; + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io], 0, NULL)); + } + if (MAX_NUM_OF_SERIES<0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLExecute(hInsertStmt)); + #endif + } + } //time to average + }//last anchor point of the series + }//through TEST loop + }//through series + + + if (iEpoch % FREQ_OF_TEST == 0) { + float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size(); + + cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100; + if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forecast loss:" << averageForecLoss*100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + + float averageTestLoss=0; + if (LBACK > 0) { + float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size(); + cout<<" Test loss:" << averageTestLoss; + if (iEpoch >= AVERAGING_LEVEL) { + float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch + cout << " avgLoss:" << averageTestAvgLoss; + } + if (USE_AUTO_LEARNING_RATE) + perfValid_vect.push_back(averageTestLoss); + } + cout << endl; + } + + if (USE_AUTO_LEARNING_RATE) { + bool changeL2Rate = false; + if (iEpoch >= 2) { + if (iEpoch < L3_PERIOD) + changeL2Rate = perfValid_vect[perfValid_vect.size() - 2] MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) { + learning_rate /= LR_RATIO; + cout << "decreasing LR to:" << learning_rate << endl; + epochOfLastChangeOfLRate = iEpoch; + trainer.learning_rate = learning_rate; + } + } + #if defined USE_ODBC + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLEndTran( + SQL_HANDLE_DBC, + hDbc, + SQL_COMMIT)); + #endif + }//through epochs + + if (PRINT_DIAGN) {//some diagnostic info + set diagSeries; + for (int i = 0; i<1; i++) {//add a few normal ones + int irand = uniOnSeries(rng); + diagSeries.insert(oneChunk_vect[irand]); + } + for (auto series : diagSeries) { + cout << endl << series << endl; + array* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series]; + cout << "lSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levSm << " "; + cout << endl; + cout << "sSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).sSm << " "; + cout << endl; + cout << "seasons:" << endl; + for (int isea = 0; iseaat(iEpoch).initSeasonality[isea] << " "; + cout << endl; + } + cout << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levels.size()>0) { + cout << "levels:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).levels.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", "; + cout << endl; + cout << "seas:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).seasons.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", "; + cout << endl; + } + } + } + } + + //save the forecast to outputFile + ofstream outputFile; + outputFile.open(outputPath); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) { + string series = *iter; + outputFile<< series; + for (int io=0; io + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + RelWithDebug + Win32 + + + RelWithDebug + x64 + + + + + + + + + + + {928301A0-F01A-48F6-A499-851B3CE8BD4E} + Win32Proj + M41 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + true + v140 + Unicode + Sequential + + + Application + true + v140 + Unicode + Sequential + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + NotUsing + Level1 + Disabled + WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + + + Console + true + E:\progs2\dynet\buildMKL\dynet\Debug + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + NotUsing + Level1 + MaxSpeed + WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + AnySuitable + true + Speed + AdvancedVectorExtensions + Default + MultiThreadedDLL + ProgramDatabase + true + false + + + Console + true + E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.cpp b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.cpp new file mode 100644 index 0000000..3935604 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.cpp @@ -0,0 +1,729 @@ +/* +My implementation of dilated LSTMs, based on Dynet LSTM builders +- DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf) +- ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360 +- AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971 +* +Slawek Smyl, Mar-May 2018 +*/ + +#include "slstm.h" +#include "dynet/lstm.h" +#include "dynet/param-init.h" + +#include +#include +#include +#include + +#if defined DEBUG + #define _DEBUG +#endif + +using namespace std; + +namespace dynet { + + // ResidualDilatedLSTMBuilder based on Vanilla LSTM + enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG }; + enum { LN_GH, LN_BH, LN_GX, LN_BX, LN_GC, LN_BC }; + + ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), ln_lstm(false), forget_bias(1.f), dropout_masks_valid(false) { } + + ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model, + bool ln_lstm, float forget_bias) : dilations(dilations), layers(unsigned(dilations.size())), + input_dim(input_dim), hid(hidden_dim), ln_lstm(ln_lstm), forget_bias(forget_bias), dropout_masks_valid(false) { + unsigned layer_input_dim = input_dim; + local_model = model.add_subcollection("ResidualDilated-lstm-builder"); + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_x2i = local_model.add_parameters({ hidden_dim * 4, layer_input_dim }); + Parameter p_h2i = local_model.add_parameters({ hidden_dim * 4, hidden_dim }); + //Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bi = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = { p_x2i, p_h2i, /*p_c2i,*/ p_bi }; + params.push_back(ps); + + if (ln_lstm) { + Parameter p_gh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f)); + Parameter p_bh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + Parameter p_gx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f)); + Parameter p_bx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + Parameter p_gc = model.add_parameters({ hidden_dim }, ParameterInitConst(1.f)); + Parameter p_bc = model.add_parameters({ hidden_dim }, ParameterInitConst(0.f)); + vector ln_ps = { p_gh, p_bh, p_gx, p_bx, p_gc, p_bc }; + ln_params.push_back(ln_ps); + } + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + void ResidualDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) { + param_vars.clear(); + if (ln_lstm)ln_param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + vector vars; + for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); } + param_vars.push_back(vars); + if (ln_lstm) { + auto& ln_p = ln_params[i]; + vector ln_vars; + for (unsigned j = 0; j < ln_p.size(); ++j) { ln_vars.push_back(update ? parameter(cg, ln_p[j]) : const_parameter(cg, ln_p[j])); } + ln_param_vars.push_back(ln_vars); + } + } + + _cg = &cg; + } + // layout: 0..layers = c + // layers+1..2*layers = h + void ResidualDilatedLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "ResidualDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state, and cell for each layer). However, for " << layers << " layers, " << + hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } + else { + has_initial_state = false; + } + + dropout_masks_valid = false; + } + + void ResidualDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h)); + masks.push_back(masks_i); + } + } + dropout_masks_valid = true; + } + + ParameterCollection & ResidualDilatedLSTMBuilder::get_parameter_collection() { + return local_model; + } + + // TODO - Make this correct + // Copied c from the previous step (otherwise c.size()< h.size()) + // Also is creating a new step something we want? + // wouldn't overwriting the current one be better? + Expression ResidualDilatedLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "ResidualDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " << + h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + // Current implementation : s_new is either {new_c[0],...,new_c[n]} + // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} + Expression ResidualDilatedLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "ResidualDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = c.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + + Expression ResidualDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd); + for (unsigned i = 0; i < layers; ++i) { + int dilation_offset = dilations[i] - 1; + const vector& vars = param_vars[i]; + + Expression i_h_tm1, i_c_tm1; + bool has_prev_state = (prev >= 0 || has_initial_state); + if (prev < dilation_offset) { + if (has_initial_state) { + // intial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + else { + i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd)); + i_c_tm1 = i_h_tm1; + } + } + else { + i_h_tm1 = h[prev - dilation_offset][i]; + i_c_tm1 = c[prev - dilation_offset][i]; + } + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + if (dropout_rate > 0.f) { + in = cmult(in, masks[i][0]); + } + if (has_prev_state && dropout_rate_h > 0.f) + i_h_tm1 = cmult(i_h_tm1, masks[i][1]); + // input + Expression tmp; + Expression i_ait; + Expression i_aft; + Expression i_aot; + Expression i_agt; + if (ln_lstm) { + const vector& ln_vars = ln_param_vars[i]; + if (has_prev_state) + tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]) + layer_norm(vars[_H2I] * i_h_tm1, ln_vars[LN_GH], ln_vars[LN_BH]); + else + tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]); + } + else { + if (has_prev_state) + tmp = affine_transform({ vars[_BI], vars[_X2I], in, vars[_H2I], i_h_tm1 }); + else + tmp = affine_transform({ vars[_BI], vars[_X2I], in }); + } + i_ait = pick_range(tmp, 0, hid); + i_aft = pick_range(tmp, hid, hid * 2); + i_aot = pick_range(tmp, hid * 2, hid * 3); + i_agt = pick_range(tmp, hid * 3, hid * 4); + Expression i_it = logistic(i_ait); + if (forget_bias != 0.0) + tmp = logistic(i_aft + forget_bias); + else + tmp = logistic(i_aft); + + Expression i_ft = tmp; + Expression i_ot = logistic(i_aot); + Expression i_gt = tanh(i_agt); + + ct[i] = has_prev_state ? (cmult(i_ft, i_c_tm1) + cmult(i_it, i_gt)) : cmult(i_it, i_gt); + if (ln_lstm) { + const vector& ln_vars = ln_param_vars[i]; + if (i==0) + in = ht[i] = cmult(i_ot, tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC]))); + else + in = ht[i] = cmult(i_ot, in+tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC]))); + } + else { + if (i==0) + in = ht[i] = cmult(i_ot, tanh(ct[i])); + else + in = ht[i] = cmult(i_ot, in+tanh(ct[i])); + } + } + return ht.back(); + } + + void ResidualDilatedLSTMBuilder::copy(const RNNBuilder & rnn) { + const ResidualDilatedLSTMBuilder & rnn_lstm = (const ResidualDilatedLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy ResidualDilatedLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; + for (size_t i = 0; i < ln_params.size(); ++i) + for (size_t j = 0; j < ln_params[i].size(); ++j) + ln_params[i][j] = rnn_lstm.ln_params[i][j]; + } + + void ResidualDilatedLSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; + } + + void ResidualDilatedLSTMBuilder::set_dropout(float d, float d_h) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; + } + + void ResidualDilatedLSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + + + + //enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG }; + enum { _X2I_, _H2I_, _BI_, _XA1, _HA1, _SA1, _BA1, _A2, _B2 }; + + +//*************************** + + + + AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { } + + AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder(vector max_dilations, + unsigned input_dim, + unsigned hidden_dim, + unsigned attention_dim, + ParameterCollection& model) + : max_dilations(max_dilations), layers(unsigned(max_dilations.size())), + input_dim(input_dim), hid(hidden_dim), attention_dim(attention_dim), weightnoise_std(0), dropout_masks_valid(false) { + unsigned layer_input_dim = input_dim; + local_model = model.add_subcollection("compact-vanilla-lstm-builder"); + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim }); + Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim }); + Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + + Parameter p_Wxa1 = local_model.add_parameters({ attention_dim, layer_input_dim }); + Parameter p_Wha1 = local_model.add_parameters({ attention_dim, hidden_dim }); + Parameter p_Wsa1 = local_model.add_parameters({ attention_dim, hidden_dim }); + Parameter p_ba1 = local_model.add_parameters({ attention_dim }, ParameterInitConst(0.f)); + + Parameter p_Wa2 = local_model.add_parameters({ max_dilations[i], attention_dim }); + Parameter p_ba2 = local_model.add_parameters({ max_dilations[i] }, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = { p_Wx, p_Wh, p_b, p_Wxa1, p_Wha1, p_Wsa1, p_ba1, p_Wa2, p_ba2 }; + params.push_back(ps); + + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + void AttentiveDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) { + param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + vector vars; + for (unsigned j = 0; j < p.size(); ++j) { + vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); + } + param_vars.push_back(vars); + } + + _cg = &cg; + } + // layout: 0..layers = c + // layers+1..2*layers = h + void AttentiveDilatedLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "AttentiveDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state, and cell for each layer). However, for " << layers << " layers, " << + hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } + else { + has_initial_state = false; + } + + dropout_masks_valid = false; + } + + void AttentiveDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h)); + masks.push_back(masks_i); + } + } + dropout_masks_valid = true; + } + + ParameterCollection & AttentiveDilatedLSTMBuilder::get_parameter_collection() { + return local_model; + } + + // TODO - Make this correct + // Copied c from the previous step (otherwise c.size()< h.size()) + // Also is creating a new step something we want? + // wouldn't overwriting the current one be better? + Expression AttentiveDilatedLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "AttentiveDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " << + h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = unsigned(h.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + // Current implementation : s_new is either {new_c[0],...,new_c[n]} + // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} + Expression AttentiveDilatedLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "AttentiveDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = unsigned(c.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + + Expression AttentiveDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd); + for (unsigned i = 0; i < layers; ++i) { + int dilation_offset= max_dilations[i]-1; + const vector& vars = param_vars[i]; + Expression i_h_tm1, i_c_tm1; + if (prev < dilation_offset) { + if (has_initial_state) { + // initial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + else { + i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd)); + i_c_tm1 = i_h_tm1; + } + } + else { + if (dilation_offset>0) { + //enum { _X2I, _H2I, _BI, _XA1, _HA1, _SA1, _BA1, _A2, _B2 }; + Expression weights_ex=vars[_XA1]*in+ vars[_HA1]*h[prev][i]+ vars[_SA1]*c[prev][i]+ vars[_BA1]; + weights_ex=tanh(weights_ex); + weights_ex=vars[_A2]* weights_ex+ vars[_B2]; + weights_ex =softmax(weights_ex); + #if defined _DEBUG + vector weights=as_vector(weights_ex.value()); + #endif + + unsigned indx=0; + Expression w_ex = pick(weights_ex, indx); + Expression avg_h= cmult(h[prev][i], w_ex); + for (indx=1; indx <= dilation_offset; indx++) {//dilation_offset==max_dilations[i]-1, so together with indx==0, we cover max_dilations[i] steps + w_ex = pick(weights_ex, indx); + avg_h = avg_h+cmult(h[prev- indx][i], w_ex); + } + i_h_tm1 = avg_h; + } else { + i_h_tm1 = h[prev- dilation_offset][i]; + } + i_c_tm1 = c[prev- dilation_offset][i]; + } + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } + else { + Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } + } + return ht.back(); + } + + void AttentiveDilatedLSTMBuilder::copy(const RNNBuilder & rnn) { + const AttentiveDilatedLSTMBuilder & rnn_lstm = (const AttentiveDilatedLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy AttentiveDilatedLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; + } + + void AttentiveDilatedLSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; + } + + void AttentiveDilatedLSTMBuilder::set_dropout(float d, float d_h) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; + } + + void AttentiveDilatedLSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + void AttentiveDilatedLSTMBuilder::set_weightnoise(float std) { + DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0"); + weightnoise_std = std; + } + + //*/ + + DilatedLSTMBuilder::DilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { } + + DilatedLSTMBuilder::DilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model) + : dilations(dilations), layers(unsigned(dilations.size())), + input_dim(input_dim), hid(hidden_dim), weightnoise_std(0), dropout_masks_valid(false) { + unsigned layer_input_dim = input_dim; + local_model = model.add_subcollection("compact-vanilla-lstm-builder"); + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim }); + Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim }); + Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = { p_Wx, p_Wh, p_b }; + params.push_back(ps); + + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + + void DilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) { + param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + vector vars; + for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); } + param_vars.push_back(vars); + } + + _cg = &cg; + } + // layout: 0..layers = c + // layers+1..2*layers = h + void DilatedLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "DilatedLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state, and cell for each layer). However, for " << layers << " layers, " << + hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } else { + has_initial_state = false; + } + + dropout_masks_valid = false; + } + + void DilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h)); + masks.push_back(masks_i); + } + } + dropout_masks_valid = true; + } + + ParameterCollection & DilatedLSTMBuilder::get_parameter_collection() { + return local_model; + } + + // TODO - Make this correct + // Copied c from the previous step (otherwise c.size()< h.size()) + // Also is creating a new step something we want? + // wouldn't overwriting the current one be better? + Expression DilatedLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "DilatedLSTMBuilder::set_h expects as many inputs as layers, but got " << + h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = unsigned(h.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + // Current implementation : s_new is either {new_c[0],...,new_c[n]} + // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} + Expression DilatedLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "DilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = unsigned(c.size()); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); + } + + Expression DilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd); + for (unsigned i = 0; i < layers; ++i) { + int dilation_offset = dilations[i] - 1; + const vector& vars = param_vars[i]; + Expression i_h_tm1, i_c_tm1; + if (prev < dilation_offset) { + if (has_initial_state) { + // initial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } else { + i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd)); + i_c_tm1 = i_h_tm1; + } + } else { // t > 0 + i_h_tm1 = h[prev - dilation_offset][i]; + i_c_tm1 = c[prev - dilation_offset][i]; + } + if (dropout_rate > 0.f || dropout_rate_h > 0.f) { + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } else { + Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std); + ct[i] = vanilla_lstm_c(i_c_tm1, gates_t); + in = ht[i] = vanilla_lstm_h(ct[i], gates_t); + } + } + return ht.back(); + } + + void DilatedLSTMBuilder::copy(const RNNBuilder & rnn) { + const DilatedLSTMBuilder & rnn_lstm = (const DilatedLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy DilatedLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; + } + + void DilatedLSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; + } + + void DilatedLSTMBuilder::set_dropout(float d, float d_h) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; + } + + void DilatedLSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; + } + void DilatedLSTMBuilder::set_weightnoise(float std) { + DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0"); + weightnoise_std = std; + } + +} // namespace dynet diff --git a/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.h new file mode 100644 index 0000000..adb63a7 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.h @@ -0,0 +1,394 @@ +/** +* file slstm.h +* header for my implementation of dilated LSTMs, based on Dynet LSTM builders + - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf) + - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360 + - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971 +* +Slawek Smyl, Mar-May 2018 +*/ + +#ifndef DYNET_SLSTMS_H_ +#define DYNET_SLSTMS_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" + +using namespace std; + +namespace dynet { + + //basd on VanillaLSTMBuilder + struct ResidualDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + ResidualDilatedLSTMBuilder(); + /** + * \brief Constructor for the ResidualDilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + * \param ln_lstm Whether to use layer normalization + * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0) + */ + explicit ResidualDilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model, + bool ln_lstm = false, + float forget_bias = 1.f); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + /** + * \brief Get parameters in ResidualDilatedLSTMBuilder + * \return list of points to ParameterStorage objects + */ + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + // first index is layer, then ... + std::vector> ln_params; + + // first index is layer, then ... + std::vector> param_vars; + // first index is layer, then ... + std::vector> ln_param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + bool ln_lstm; + float forget_bias; + bool dropout_masks_valid; + vector dilations; //one int per layer + + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct DilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + DilatedLSTMBuilder(); + /** + * \brief Constructor for the DilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit DilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + float weightnoise_std; + vector dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct AttentiveDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + AttentiveDilatedLSTMBuilder(); + /** + * \brief Constructor for the AttentiveDilatedLSTMBuilder + * + * \param max_dilations Vector, maximum dilations (per layer) + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit AttentiveDilatedLSTMBuilder(vector max_dilations, + unsigned input_dim, + unsigned hidden_dim, + unsigned attention_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + unsigned attention_dim; + float dropout_rate_h; + float weightnoise_std; + vector max_dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; +} // namespace dynet + +#endif diff --git a/118 - slaweks17/c++/windows_VisualStudio/M42/ES_RNN_PI.cc b/118 - slaweks17/c++/windows_VisualStudio/M42/ES_RNN_PI.cc new file mode 100644 index 0000000..268c654 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M42/ES_RNN_PI.cc @@ -0,0 +1,1246 @@ +/*ES-RNN: ES-RNN Exponential Smoothing Recurrent Neural Network hybrid. Prediction intervals. +Slawek Smyl, Jan-May 2017. + +Dilated LSTMs, with optional shortcuts, attention. +It is meant to be used for Monthly and Quarterly series of M4 competition, becasue the DE (Diversified Ensemble) version is too slow. +The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac. + +It has to be invoked in pair of executables, passing at least two integers: seedForChunks, chunkNo +so e.g. create a script with following lines on Windows +start 10 1 +start 10 2 +Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers: +# start 10 1 0 +# start 10 2 0 +# start 20 1 5 +# start 20 2 5 +# start 30 1 10 +# start 30 2 10 +seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2. +We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP. +Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, +so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R. +By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - +a decent number to ensemble (in a separate, supplied R script). + +There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. +The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. +The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. +So, you need comment/uncomment to have one block of interest active. + + +*/ + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + + + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- +string VARIABLE = "Quarterly"; +const string run0 = "(1,2),(4,8), LR=1e-3/{7,3e-4f},{11,1e-4f}, EPOCHS=16, LVP=200 40*"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +vector> dilations = { { 1,2 },{ 4,8 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion +const float INITIAL_LEARNING_RATE = 1e-3f; +//else +const map LEARNING_RATES = { { 7,3e-4f },{ 11,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate. + +const float ALPHA = 0.05; +const float TAUL = ALPHA / 2; +const float TAUH = 1 - TAUL; +const float ALPHA_MULTIP = 2 / ALPHA; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer + +const int NUM_OF_TRAIN_EPOCHS = 16; +const unsigned int STATE_HSIZE = 40; + +const int SEASONALITY = 4; +const unsigned int INPUT_SIZE = 4; +const int INPUT_SIZE_I = INPUT_SIZE; +const unsigned int OUTPUT_SIZE = 8; +const int OUTPUT_SIZE_I = OUTPUT_SIZE; +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2; +const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years + +const float LEVEL_VARIABILITY_PENALTY = 200; //Multiplier for L" penalty against wigglines of level vector. + + +/* +string VARIABLE = "Monthly"; +const string run0 = "Res(1,3,6,12), LR=1e-3 {8,3e-4f},{13,1e-4f}, EPOCHS=14, LVP=50, 20*"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +vector> dilations = { { 1,3,6,12 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion^M +const float INITIAL_LEARNING_RATE = 1e-3f; +const map LEARNING_RATES = { { 8,3e-4f },{ 13,1e-4f } }; //at which epoch we set them up to what^M +const float PER_SERIES_LR_MULTIP = 1; + +const int NUM_OF_TRAIN_EPOCHS = 14; +const unsigned int STATE_HSIZE = 50; + +const float LEVEL_VARIABILITY_PENALTY = 50; //Multiplier for L" penalty against wigglines of level vector. + +const int SEASONALITY = 12; +const unsigned int OUTPUT_SIZE = 18; +const unsigned int INPUT_SIZE = 12; +const int INPUT_SIZE_I = INPUT_SIZE; +const int OUTPUT_SIZE_I = OUTPUT_SIZE; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2; +const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years + +const float ALPHA = 0.05; +const float TAUL = ALPHA / 2; +const float TAUH = 1 - TAUL; +const float ALPHA_MULTIP = 2 / ALPHA; +*/ + +Expression squash(const Expression& x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 40; +#else + const int MAX_NUM_OF_SERIES = -1; //use all series +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6;//in data provided +const int BIG_LOOP = 3; +const int NUM_OF_CHUNKS = 2; +const float EPS=1e-6; +const int AVERAGING_LEVEL=5; +const bool USE_MEDIAN = false; +const int MIDDLE_POS_FOR_AVG = 2; //if using medians + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=20; +const float C_STATE_PENALTY = 0; + +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN=true; +const unsigned ATTENTION_HSIZE=STATE_HSIZE; + +const bool USE_AUTO_LEARNING_RATE=false; +//if USE_AUTO_LEARNING_RATE, and only if LBACK>0 +const float MIN_LEARNING_RATE = 0.0001f; +const float LR_RATIO = sqrt(10); +const float LR_TOLERANCE_MULTIP = 1.005; +const int L3_PERIOD = 2; +const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2; + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + float meanAbsSeasDiff; + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + + meanAbsSeasDiff = 0; + float sumf = 0; + for (int ip = SEASONALITY; ip0) + meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY); + + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE_I) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) {//chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; +}; + +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + vector levels; + vector seasons; +}; + +//loss function +Expression MSIS(const Expression& out_ex, const Expression& actuals_ex) { + vector losses; + for (unsigned int indx = 0; indx as_scalar(forecH.value())) + loss = loss + (actual - forecH)*ALPHA_MULTIP; + losses.push_back(loss); + } + Expression ret = sum(losses) / OUTPUT_SIZE; + #if defined _DEBUG + float retf = as_scalar(ret.value()); + if (retf>100) { + vector out_vect = as_vector(out_ex.value()); + vector actuals_vect = as_vector(actuals_ex.value()); + for (int i = 0; i0 and PERCENTILE!=50 +float wQuantLoss(vector& out_vect, vector& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 + float sumf = 0; float suma = 0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*tau; + else + sumf = sumf + (actual - forec)*(tau - 1); + } + return sumf / suma * 200; +} + +//MSIS operating on floats, used for validation +float errorFunc(vector& out_vect, vector& actuals_vect, float meanAbsSeasDiff) { + float sumf=0; + for (unsigned int indx = 0; indx forecH) + loss = loss + (actualf - forecH)*ALPHA_MULTIP; + sumf+=loss; + } + return sumf / (OUTPUT_SIZE*meanAbsSeasDiff); +} + + + + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples. + int chunkNo = 1; + int ibigOffset = 0; + if (argc >= 3) { + seedForChunks = atoi(argv[1]); + chunkNo = atoi(argv[2]); + } + if (argc >= 4) + ibigOffset = atoi(argv[3]); + + if (chunkNo > NUM_OF_CHUNKS) { + cerr << "chunkNo > NUM_OF_CHUNKS"; + exit(-1); + } + else if (chunkNo <= 0) { + cerr << "chunkNo <= 0"; + exit(-1); + } + + cout<0) + std::cout<< " ibigOffset:"<< ibigOffset; //if continuing prematurely stopped run + if (LBACK>0) + std::cout<<" lback:"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister) + mt19937 rngForChunks(seedForChunks); + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + if (m4Obj.meanAbsSeasDiff==0) { + cout<<"Warning, flat series:"<0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + + int series_len=(int)series_vect.size(); + int chunkSize= series_len/NUM_OF_CHUNKS; + std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize< uniOnSeries(0, chunkSize -1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5); + set diagSeries; + + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + ParameterCollection pc; + ParameterCollection perSeriesPC; + + float learning_rate= INITIAL_LEARNING_RATE; + AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS); + trainer.clip_threshold = GRADIENT_CLIPPING; + AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING; + + #if defined USE_RESIDUAL_LSTM + vector rNNStack; + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc)); + for (int il = 1; il rNNStack; + rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il oneChunk_vect(start,end); + if (PRINT_DIAGN) { + for (int k = 0; k<10; k++) //diag + cout << oneChunk_vect[k] << " "; + cout << endl; + } + if (chunkNo == NUM_OF_CHUNKS) + cout<<"last chunk size:"<< oneChunk_vect.size()< additionalParams_map((int)oneChunk_vect.size()*1.5); //per series + unordered_map*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup + string series = *iter; + AdditionalParams addParams; + addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5); //level smoothing + addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5); //seasonality smoothing + for (int isea = 0; isea(); + } + + for (int iEpoch=0; iEpoch testLosses; //test losses of all series in this epoch + vector testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch + vector testLossesL; //lower quantile loss + vector testAvgLossesL; //lower quantile loss + vector testLossesH; //higher quantile loss + vector testAvgLossesH; //higher quantile loss + vector trainingLosses; //training losses of all series in one epoch + vector forecLosses; vector levVarLosses; vector stateLosses; + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL)); + #endif + + for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) { + string series=*iter; + auto m4Obj = allSeries_map[series]; + + #if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); + #endif + + ComputationGraph cg; + for (int il=0; il season_exVect;//vector, because we do not know how long the series is + for (int iseas=0; iseas seas==1 + season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK. + } + season_exVect.push_back(season_exVect[0]); + + vector logDiffOfLevels_vect; + vector levels_exVect; + Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]); + levels_exVect.push_back(lev); + for (int i=1; i 0) { + vector levelVarLoss_v; + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY; + for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx+i]); + } + vector losses; + for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex=input(cg,{INPUT_SIZE},input_vect); + Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]); + joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect); + + first = m4Obj.vals.begin() +i+1; + pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect); + Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization + labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization + Expression labels_ex=squash(labels1_ex); + + Expression loss_ex=MSIS(out_ex, labels_ex);//although out_ex has doubled size, labels_ex have normal size. NB, we do not have duplicated labels during training. + //Expression loss_ex=pinBallLoss(out_ex, labels_ex); + if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + } + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; it maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax = it; + layerOfMax = il; + chunkOfMax = irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + cout << " min season=" << minSeason << endl; + cout << " min level=" << minLevel << endl; + cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl; + + //diagSeries.insert(series); + pc.reset_gradient(); + perSeriesPC.reset_gradient(); + } + + //saving per-series values for diagnostics purposes + AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch); + histAdditionalParams.levSm=as_scalar(levSm_ex.value()); + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea=0; isea::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect); + + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization + vector joinedInput_ex; + input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization + joinedInput_ex.emplace_back(squash(input1_ex)); + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + for (int ios=0; ios out_vect = as_vector(out_ex.value()); + + if (LBACK > 0) { + float qLoss = errorFunc(out_vect, m4Obj.testVals, m4Obj.meanAbsSeasDiff); + testLosses.push_back(qLoss); + + qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUL, 0); + testLossesL.push_back(qLoss); + + qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUH, OUTPUT_SIZE); + testLossesH.push_back(qLoss); + } + + testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect; + if (iEpoch >= AVERAGING_LEVEL) { + if (USE_MEDIAN) { + if (testResults_map[series][AVERAGING_LEVEL].size() == 0) + testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten + for (int iii = 0; iii < OUTPUT_SIZE_I*2; iii++) { + vector temp_vect2; + for (int ii = 0; ii firstForec = testResults_map[series][0]; + testResults_map[series][AVERAGING_LEVEL] = firstForec; + for (int ii = 1; ii nextForec = testResults_map[series][ii]; + for (int iii = 0; iii 0) { + float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff); + testAvgLosses.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0); + testAvgLossesL.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE); + testAvgLossesH.push_back(qLoss); + + #if defined USE_ODBC //save + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL)); + + for (int iv = 0; iv<2; iv++) { + if (iv == 0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun)) + else + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun)); + + for (int io = 0; io < OUTPUT_SIZE_I; io++) { + int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io; + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io + iv*OUTPUT_SIZE_I], 0, NULL)); + } + if (MAX_NUM_OF_SERIES<0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLExecute(hInsertStmt)); + } + #endif + } //lback>0 + } //time to average + }//last anchor point of the series + }//through TEST loop + }//through series + + + if (iEpoch % FREQ_OF_TEST == 0) { + float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size(); + + cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100; + if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forecast loss:" << averageForecLoss*100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + + float averageTestLoss=0; + if (LBACK > 0) { + float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size(); + float averageTestLossL = accumulate(testLossesL.begin(), testLossesL.end(), 0.0) / testLossesL.size(); + float averageTestLossH = accumulate(testLossesH.begin(), testLossesH.end(), 0.0) / testLossesH.size(); + cout<<" Test loss:" << averageTestLoss<<" L:"<< averageTestLossL<<" H:"<< averageTestLossH; + if (iEpoch >= AVERAGING_LEVEL) { + float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch + float averageTestAvgLossL = accumulate(testAvgLossesL.begin(), testAvgLossesL.end(), 0.0) / testAvgLossesL.size();//of this epoch + float averageTestAvgLossH = accumulate(testAvgLossesH.begin(), testAvgLossesH.end(), 0.0) / testAvgLossesH.size();//of this epoch + cout << " avgLoss:" << averageTestAvgLoss<<" L:"<< averageTestAvgLossL<<" H:"<< averageTestAvgLossH<= 2) { + if (iEpoch < L3_PERIOD) + changeL2Rate = perfValid_vect[perfValid_vect.size() - 2] MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) { + learning_rate /= LR_RATIO; + cout << "decreasing LR to:" << learning_rate << endl; + epochOfLastChangeOfLRate = iEpoch; + trainer.learning_rate = learning_rate; + } + } + #if defined USE_ODBC + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLEndTran( + SQL_HANDLE_DBC, + hDbc, + SQL_COMMIT)); + #endif + }//through epochs + + if (PRINT_DIAGN) {//some diagnostic info + set diagSeries; + for (int i = 0; i<1; i++) {//add a few normal ones + int irand = uniOnSeries(rng); + diagSeries.insert(oneChunk_vect[irand]); + } + for (auto series : diagSeries) { + cout << endl << series << endl; + array* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series]; + cout << "lSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levSm << " "; + cout << endl; + cout << "sSm:" << endl; + for (int iEpoch = 0; iEpochat(iEpoch).sSm << " "; + cout << endl; + cout << "seasons:" << endl; + for (int isea = 0; iseaat(iEpoch).initSeasonality[isea] << " "; + cout << endl; + } + cout << endl; + for (int iEpoch = 0; iEpochat(iEpoch).levels.size()>0) { + cout << "levels:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).levels.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", "; + cout << endl; + cout << "seas:" << iEpoch << " "; + for (int iv = 0; ivat(iEpoch).seasons.size(); iv++) + cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", "; + cout << endl; + } + } + } + } + + //save the forecast to outputFile + ofstream outputFile; + outputFile.open(outputPathL); + for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) { + string series = *iter; + outputFile<< series; + for (int io=0; io + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + RelWithDebug + Win32 + + + RelWithDebug + x64 + + + + + + + + + + + {A16B5466-E680-43F6-A884-A4A01EB78E50} + Win32Proj + M42 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + true + v140 + Unicode + Sequential + + + Application + true + v140 + Unicode + Sequential + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + NotUsing + Level1 + Disabled + WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + + + Console + true + E:\progs2\dynet\buildMKL\dynet\Debug + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + NotUsing + Level1 + MaxSpeed + WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + AnySuitable + true + Speed + AdvancedVectorExtensions + Default + MultiThreadedDLL + ProgramDatabase + true + false + + + Console + true + E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj.filters b/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj.filters new file mode 100644 index 0000000..b8ac1c3 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/M42/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M42/slstm.h new file mode 100644 index 0000000..adb63a7 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M42/slstm.h @@ -0,0 +1,394 @@ +/** +* file slstm.h +* header for my implementation of dilated LSTMs, based on Dynet LSTM builders + - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf) + - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360 + - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971 +* +Slawek Smyl, Mar-May 2018 +*/ + +#ifndef DYNET_SLSTMS_H_ +#define DYNET_SLSTMS_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" + +using namespace std; + +namespace dynet { + + //basd on VanillaLSTMBuilder + struct ResidualDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + ResidualDilatedLSTMBuilder(); + /** + * \brief Constructor for the ResidualDilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + * \param ln_lstm Whether to use layer normalization + * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0) + */ + explicit ResidualDilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model, + bool ln_lstm = false, + float forget_bias = 1.f); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + /** + * \brief Get parameters in ResidualDilatedLSTMBuilder + * \return list of points to ParameterStorage objects + */ + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + // first index is layer, then ... + std::vector> ln_params; + + // first index is layer, then ... + std::vector> param_vars; + // first index is layer, then ... + std::vector> ln_param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + bool ln_lstm; + float forget_bias; + bool dropout_masks_valid; + vector dilations; //one int per layer + + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct DilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + DilatedLSTMBuilder(); + /** + * \brief Constructor for the DilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit DilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + float weightnoise_std; + vector dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct AttentiveDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + AttentiveDilatedLSTMBuilder(); + /** + * \brief Constructor for the AttentiveDilatedLSTMBuilder + * + * \param max_dilations Vector, maximum dilations (per layer) + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit AttentiveDilatedLSTMBuilder(vector max_dilations, + unsigned input_dim, + unsigned hidden_dim, + unsigned attention_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + unsigned attention_dim; + float dropout_rate_h; + float weightnoise_std; + vector max_dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; +} // namespace dynet + +#endif diff --git a/118 - slaweks17/c++/windows_VisualStudio/M43/ES_RNN_E.cc b/118 - slaweks17/c++/windows_VisualStudio/M43/ES_RNN_E.cc new file mode 100644 index 0000000..aaf4659 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M43/ES_RNN_E.cc @@ -0,0 +1,1665 @@ +/*ES-RNN-E: Exponential Smoothing Recurrent Neural Network hybrid, Ensemble of specialists. Point forecast. +Slawek Smyl, Jan-May 2017. + +Dilated LSTMs, with optional shortcuts, attention. Non-seasonal, single, or double seasonal. +It is meant to be used for all types of series from M4 competition, except Monthly and Quarterly (for performance reasons - it is slower). +The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac. + +In contradistinction to ES-RNN, each executable uses all series, but in a similar manner repeating the whole learning process BIG_LOOP times (by default 3). +Invocation should pass BIG_LOOP offset +so e.g. create a script with following lines on Windows +start 0 +start 10 +start 20 +start 30 +on 4-core computer. +In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble. +Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1. +(Creating final forecasts is done in a supplied R script) + +There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive. +These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active. +*/ + + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +//#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- +string VARIABLE = "Hourly"; +const string run = "50/49 Att 4/5 1,4)(24,168) LR=0.01,{7,5e-3f},{18,1e-3f},{22,3e-4f} EPOCHS=27, LVP=10, CSP=1"; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 24; +const int SEASONALITY2 = 168; +vector> dilations = { { 1,4 },{ 24, 168 } }; + +const float INITIAL_LEARNING_RATE = 0.01f; +const map LEARNING_RATES = { { 7,5e-3f },{ 18,1e-3f },{ 22,3e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 27; + +float LEVEL_VARIABILITY_PENALTY = 10; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 1; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 24; +const unsigned int OUTPUT_SIZE = 48; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK +const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH; //==all +const int TOPN = 4; + + +/* +string VARIABLE = "Weekly"; +const string run = "50/47 Att 3/5 (1,52) LR=1e-3 {11,3e-4f}, {17,1e-4f} EPOCHS=23, LVP=100 6y"; + +const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const int TRAINING_PERCENTILE = 47; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 52; +const int SEASONALITY2 = 0; +vector> dilations = { { 1, 52 } }; + +const float INITIAL_LEARNING_RATE = 1e-3; +const map LEARNING_RATES = { { 11,3e-4f },{ 17,1e-4f } }; //at which epoch we manually set them up to what +const int NUM_OF_TRAIN_EPOCHS = 23; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; +const float PER_SERIES_LR_MULTIP = 1; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 10; +const unsigned int OUTPUT_SIZE = 13; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#81 380 935 1023 1604 2598 +const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH; //==all +const int TOPN = 3; +*/ + +/* +string VARIABLE = "Daily"; +const string run = "Final 50/49 730 4/5 (1,3)(7,14) LR=3e-4 {9,1e-4f} EPOCHS=13, LVP=100 13w"; +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const int TRAINING_PERCENTILE = 49; //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 7; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,3 },{ 7, 14 } }; + +const float INITIAL_LEARNING_RATE = 3e-4; +const map LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 13; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 7; +const unsigned int OUTPUT_SIZE = 14; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //##93 323 2940 2357 4197 9919 +const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +/* +string VARIABLE = "Yearly"; +const string run = "50 Att 4/5 (1,6) LR=1e-4 EPOCHS=12, 60*"; + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50 +const float TRAINING_PERCENTILE = 50; + +const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 0; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,6 } }; + +const float INITIAL_LEARNING_RATE = 1e-4; +const map LEARNING_RATES = { { 15,1e-5 } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 12; + +float LEVEL_VARIABILITY_PENALTY = 0; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 30; + +const unsigned int INPUT_SIZE = 4; +const unsigned int OUTPUT_SIZE = 6; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#13.00 20.00 29.00 31.32 40.00 835.00 +const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +//end of VARIABLE-specific params + +const int BIG_LOOP = 3; +const int NUM_OF_NETS = 5; +const unsigned int ATTENTION_HSIZE = STATE_HSIZE; + + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 20; +#else + const int MAX_NUM_OF_SERIES = -1; +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6; +const int AVERAGING_LEVEL = 5; +const float EPS=1e-6; + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=50; +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN = false; +const float TAU = PERCENTILE / 100.; +const float TRAINING_TAU = TRAINING_PERCENTILE / 100.; + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + + +Expression squash(const Expression& x) { + return log(x); +} +float squash(float x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} +float expand(float x) { + return exp(x); +} + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) {//chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + +#if defined USE_ODBC +void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); +#endif + + + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; + Parameter sSm2; + array initSeasonality2; +}; +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + float sSm2; + array initSeasonality2; + vector levels; + vector seasons; + vector seasons2; +}; + + +array perfToRanking (array perf_arr) { + array index; + + for (int itop=0; itop losses; + for (unsigned int indx = 0; indx as_scalar(forec.value())) + losses.push_back((actual - forec)*TRAINING_TAU); + else + losses.push_back((actual - forec)*(TRAINING_TAU - 1)); + } + return sum(losses) / OUTPUT_SIZE * 2; +} + + +// weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 +float wQuantLoss(vector& out_vect, vector& actuals_vect) { + float sumf = 0; float suma=0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*TAU; + else + sumf = sumf + (actual - forec)*(TAU - 1); + } + return sumf / suma * 200; +} + +//used just for diagnostics, if LBACK>0 and PERCENTILE==50 +float sMAPE(vector& out_vect, vector& actuals_vect) { + float sumf = 0; + for (unsigned int indx = 0; indx& out_vect, vector& actuals_vect) { + if (PERCENTILE==50) + return sMAPE(out_vect, actuals_vect); + else + return wQuantLoss(out_vect, actuals_vect); +} + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int ibigOffset = 0; + if (argc == 2) + ibigOffset = atoi(argv[1]); + + cout << VARIABLE<<" "< 0) { + cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister in this case) + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 24k for yearly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + allSeries_map[series] = m4Obj; + } + if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + cout << "num of series:" << series_vect.size() << endl; + + unsigned int series_len=(unsigned int)series_vect.size(); + uniform_int_distribution uniOnSeries(0,series_len-1); // closed interval [a, b] + uniform_int_distribution uniOnNets(0,NUM_OF_NETS-1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc... + unordered_map> finalResults_map((int)series_len*1.5);//per series + set diagSeries; + + unordered_map> netRanking_map; + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + //create nets + array paramsCollection_arr;//per net + array perSeriesParamsCollection_arr;//per net + array trainers_arr; + array perSeriesTrainers_arr; + + + #if defined USE_RESIDUAL_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #elif defined USE_ATTENTIVE_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #else + array, NUM_OF_NETS> rnnStack_arr; + #endif + + array MLPW_parArr; + array MLPB_parArr; + array adapterW_parArr; + array adapterB_parArr; + + //this is not a history, this is the real stuff + unordered_map* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + additionalParams_mapOfArr[series]=new array(); + } + + for (int inet=0; inetclip_threshold = GRADIENT_CLIPPING; + perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING; + + auto& rNNStack=rnnStack_arr[inet]; + #if defined USE_RESIDUAL_LSTM + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il* additionalParams_arr=additionalParams_mapOfArr[series]; + additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net + if (SEASONALITY_NUM > 0) { + additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + if (SEASONALITY_NUM > 1) { + additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + } + }//seting up, through nets + + //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS] + unordered_map, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5); + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + historyOfAdditionalParams_map[series]=new array, NUM_OF_NETS>(); + } + + //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted + array, NUM_OF_NETS> seriesAssignment;//every net has an array + for (int j=0; j> netPerf_map; + for (int inet=0; inetlearning_rate = LEARNING_RATES.at(iEpoch); + if (inet==0) + cout << "changing LR to:" << trainer->learning_rate << endl; + perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP; + } + + auto& rNNStack=rnnStack_arr[inet]; + Parameter& MLPW_par = MLPW_parArr[inet]; + Parameter& MLPB_par = MLPB_parArr[inet]; + Parameter& adapterW_par=adapterW_parArr[inet]; + Parameter& adapterB_par=adapterB_parArr[inet]; + + vector oneNetAssignments=seriesAssignment[inet]; + random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end()); + + vector epochLosses; + vector forecLosses; vector levVarLosses; vector stateLosses; + for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + + ComputationGraph cg; + for (int il=0; ilat(inet); + array& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet); + + Expression MLPW_ex,MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea logDiffOfLevels_vect; + vector levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + Expression levelVarLoss_ex; + if (LEVEL_VARIABILITY_PENALTY > 0) { + vector levelVarLoss_v; + for (int i = 1; i losses;//losses of steps through single time series + for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + + first = m4Obj.vals.begin() + i + 1; + pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + firstE = season_exVect.begin() + i + 1; + pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + + firstE = season2_exVect.begin() + i + 1; + pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + }//through points of a series + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; itupdate();//update shared weights + perSeriesTrainer->update(); //update params of this series only + } catch (exception& e) { //long diagnostics for this unlikely event :-) + cerr<<"cought exception while doing "< maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax=it; + layerOfMax=il; + chunkOfMax= irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + if (SEASONALITY_NUM > 0) + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + if (SEASONALITY_NUM > 1) + cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl; + cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax< 0) { + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea = 0; isea 1) { + histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value()); + for (int isea=0; isea 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forec loss:" << averageForecLoss * 100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + cout<at(inet); + Expression MLPW_ex, MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; i0 then this is shortened, so it always contains data awe have right to access + Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1]; + levels_exVect.push_back(newLevel_ex); + + Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i]; + season_exVect.push_back(newSeason_ex); + } + + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + + Expression inputSeasonality_ex; Expression inputSeasonality2_ex; + Expression outputSeasonality_ex; Expression outputSeasonality2_ex; + vector losses;//losses of steps through single time series + Expression out_ex;//we declare it here, bcause the last one will be the forecast + for (int i=INPUT_SIZE-1; i::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il::const_iterator first = m4Obj.vals.begin() + i + 1; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + Expression loss_ex = pinBallLoss(out_ex, labels_ex); + if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); //training area losses + } + + if (i==(m4Obj.n-1)) {//validation loss + out_ex=expand(out_ex)*levels_exVect[i];//back to original scale + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + out_ex = cmult(out_ex, outputSeasonality_ex);//reseasonalize + } + if (SEASONALITY_NUM > 1 ) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + out_ex = cmult(out_ex, outputSeasonality2_ex);//reseasonalize + } + //we do not need the matching label here, because we do not bother calculate valid losses of each net across all series. + //We care about best and topn performance + } + }//end of going through all point of a series + + Expression loss_exp = average(losses); + float loss = as_scalar(cg.forward(loss_exp));//training loss of a single series + netPerf_map[series][inet]=loss; + + //unordered_map, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc... + //No epoch here, because this will just reflect the current (latest) situation - the last few epochs + vector out_vect=as_vector(out_ex.value()); + testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect; + if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) { + vector firstForec=testResults_map[series][inet][0]; + testResults_map[series][inet][AVERAGING_LEVEL]=firstForec; + for (int ii=1; ii nextForec=testResults_map[series][inet][ii]; + for (int iii=0; iii0 && iEpoch % FREQ_OF_TEST==0) { + //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch. + vector bestEpochLosses; + vector bestEpochAvgLosses; + vector topnEpochLosses; + vector topnEpochAvgLosses; + + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); +#endif + + float avgLoss; + vector avgLatest; + vector avgAvg; + + for (int itop=0; itop 0) { + float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals); + bestEpochLosses.push_back(qLoss); + } + avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]; //used later for calculating topn loss + + if (iEpoch>=AVERAGING_LEVEL) { + if (LBACK > 0) { + float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals); + bestEpochAvgLosses.push_back(qLoss); + } + avgAvg=testResults_map[series][inet][AVERAGING_LEVEL]; + } + } else { + for (int iii=0; iii=AVERAGING_LEVEL) + avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii]; + } + } + }//through topn + + for (int iii=0; iii 0) { + float qLoss = errorFunc(avgLatest, m4Obj.testVals); + topnEpochLosses.push_back(qLoss); + } + + if (iEpoch>=AVERAGING_LEVEL) { + for (int iii = 0; iii 0) { +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL)); + + for (int iii=0; iii 0) { + float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size(); + float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size(); + cout<=AVERAGING_LEVEL) { + float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size(); + float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size(); + cout<<" bestAvg:"<> netRanking_map + netRanking_map[series]=perfToRanking(netPerf_map[series]); + + for (int itop=0; itop diagSeries; + for (int i=0; i<1; i++) {//add a few normal ones + int irand=uniOnSeries(rng); + diagSeries.insert(series_vect[irand]); + } + for(auto series : diagSeries) { + cout<at(inet); + for (int iEpoch=0; iEpoch 0 ) { + cout<<"sSm:"<at(inet); + for (int iEpoch=0; iEpoch 1 ) { + cout<<"sSm2:"<at(inet); + for (int iEpoch=0; iEpochat(inet); + for (int iEpoch = 0; iEpoch0) { + cout << "levels:" << iEpoch<<" "; + for (int iv = 0; iv 0 ) { + cout << "seasons:" << iEpoch<<" "; + for (int iv = 0; iv 1 ) { + cout << "seasons2:" << iEpoch<<" "; + for (int iv = 0; iv + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + + + + Source Files + + + \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/M43/M43.vcxproj b/118 - slaweks17/c++/windows_VisualStudio/M43/M43.vcxproj new file mode 100644 index 0000000..5da8187 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M43/M43.vcxproj @@ -0,0 +1,227 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + RelWithDebug + Win32 + + + RelWithDebug + x64 + + + + + + + + + + + {BE951571-3F3A-4048-BAA3-0C05F38CFF42} + Win32Proj + M43 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + true + v140 + Unicode + Sequential + + + Application + true + v140 + Unicode + Sequential + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + NotUsing + Level1 + Disabled + WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + + + Console + true + E:\progs2\dynet\buildMKL\dynet\Debug + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + NotUsing + Level1 + MaxSpeed + WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + AnySuitable + true + Speed + AdvancedVectorExtensions + Default + MultiThreadedDLL + ProgramDatabase + true + false + + + Console + true + E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/M43/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M43/slstm.h new file mode 100644 index 0000000..adb63a7 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M43/slstm.h @@ -0,0 +1,394 @@ +/** +* file slstm.h +* header for my implementation of dilated LSTMs, based on Dynet LSTM builders + - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf) + - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360 + - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971 +* +Slawek Smyl, Mar-May 2018 +*/ + +#ifndef DYNET_SLSTMS_H_ +#define DYNET_SLSTMS_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" + +using namespace std; + +namespace dynet { + + //basd on VanillaLSTMBuilder + struct ResidualDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + ResidualDilatedLSTMBuilder(); + /** + * \brief Constructor for the ResidualDilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + * \param ln_lstm Whether to use layer normalization + * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0) + */ + explicit ResidualDilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model, + bool ln_lstm = false, + float forget_bias = 1.f); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + /** + * \brief Get parameters in ResidualDilatedLSTMBuilder + * \return list of points to ParameterStorage objects + */ + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + // first index is layer, then ... + std::vector> ln_params; + + // first index is layer, then ... + std::vector> param_vars; + // first index is layer, then ... + std::vector> ln_param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + bool ln_lstm; + float forget_bias; + bool dropout_masks_valid; + vector dilations; //one int per layer + + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct DilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + DilatedLSTMBuilder(); + /** + * \brief Constructor for the DilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit DilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + float weightnoise_std; + vector dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct AttentiveDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + AttentiveDilatedLSTMBuilder(); + /** + * \brief Constructor for the AttentiveDilatedLSTMBuilder + * + * \param max_dilations Vector, maximum dilations (per layer) + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit AttentiveDilatedLSTMBuilder(vector max_dilations, + unsigned input_dim, + unsigned hidden_dim, + unsigned attention_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + unsigned attention_dim; + float dropout_rate_h; + float weightnoise_std; + vector max_dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; +} // namespace dynet + +#endif diff --git a/118 - slaweks17/c++/windows_VisualStudio/M44/ES_RNN_E_PI.cc b/118 - slaweks17/c++/windows_VisualStudio/M44/ES_RNN_E_PI.cc new file mode 100644 index 0000000..e9729d5 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M44/ES_RNN_E_PI.cc @@ -0,0 +1,1744 @@ +/*ES-RNN-E: Exponential Smoothing Recurrent Neural Network hybrid, Ensemble of specialists. Prediction Intervals forecast. +Slawek Smyl, Jan-May 2017. + +Dilated LSTMs, with optional shortcuts, attention. Non-seasonal, single, or double seasonal. +It is meant to be used for all types of series from M4 competition, except Monthly and Quarterly (for performance reasons - Ensamble of Specilists is slower). +The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac. + +In contradistinction to ES-RNN, each executable uses all series, but in a similar manner repeating the whole learning process BIG_LOOP times (by default 3). +Invocation should pass BIG_LOOP offset +so e.g. create a script with following lines on Windows +start 0 +start 10 +start 20 +start 30 +on 4-core computer. +In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble. +Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1. +(Creating final forecasts is done in a supplied R script) + +There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive. +These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active. +*/ + + +//#define USE_ODBC +//define USE_ODBC if you want to +// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below. +// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code. +// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table. +// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR +//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error. + +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/io.h" +#include "dynet/model.h" +#include "dynet/nodes.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "slstm.h" //my implementation of dilated LSTMs + + +#if defined USE_ODBC + #if defined _WINDOWS + #include + #endif + #include + #include +#endif + +#include +#include +#include +//#include +#include +#include +#include +#include + +using namespace std; +using namespace dynet; + +string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs +//string DATA_DIR="/home/uber/progs/data/M4DataSet/"; +string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; +//string OUTPUT_DIR="/home/uber/progs/data/M4/"; + +int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks. + + +//PARAMS-------------- + +string VARIABLE = "Hourly"; +const string run0 = "(1,4)(24,168) LR=0.01, {25,3e-3f} EPOCHS=37, LVP=10, CSP=0"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 24; +const int SEASONALITY2 = 168; +vector> dilations = { { 1,4 },{ 24, 168 } }; + +const float INITIAL_LEARNING_RATE = 0.01f; +const map LEARNING_RATES = { { 20,1e-3f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 37; + +float LEVEL_VARIABILITY_PENALTY = 10; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 24; +const unsigned int OUTPUT_SIZE = 48; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK +const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH; //==all +const int TOPN = 4; + + +/* +string VARIABLE = "Weekly"; +const string run0 = "Att 4/5 (1,52) LR=1e-3 {15,3e-4f} EPOCHS=31, LVP=100 6y"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = false; + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 52; +const int SEASONALITY2 = 0; +vector> dilations = { { 1, 52 } }; + +const float INITIAL_LEARNING_RATE = 1e-3; +const map LEARNING_RATES = { { 15,3e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 31; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 10; +const unsigned int OUTPUT_SIZE = 13; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#81 380 935 1023 1604 2598 +const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +/* + +string VARIABLE = "Daily"; +const string run0 = "4/5 (1,3)(7,14) LR=3e-4 {13,1e-4f} EPOCHS=21, LVP=100 13w"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +//#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER=false; + +const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168) +const int SEASONALITY = 7; +const int SEASONALITY2 = 0; +vector> dilations = { { 1,3 },{ 7, 14 } }; + +const float INITIAL_LEARNING_RATE = 3e-4; +const map LEARNING_RATES = { { 13,1e-4f } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 21; + +float LEVEL_VARIABILITY_PENALTY = 100; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 40; + +const unsigned int INPUT_SIZE = 7; +const unsigned int OUTPUT_SIZE = 14; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //##93 323 2940 2357 4197 9919 +const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +/* +string VARIABLE = "Yearly"; +const string run0 = "Att NL 4/5 (1,6) LR=1e-4 {17,3e-5}{22,1e-5} EPOCHS=29, 60*"; +const string runL = "alpha5L " + run0; +const string runH = "alpha5H " + run0; + +//#define USE_RESIDUAL_LSTM +#define USE_ATTENTIVE_LSTM +const bool ADD_NL_LAYER = true; + +const int SEASONALITY_NUM = 0; //0 means no seasonality +const int SEASONALITY = 1; //for no seasonality, set it to 1, important +const int SEASONALITY2 = 0; +vector> dilations = { { 1,6 } }; + +const float INITIAL_LEARNING_RATE = 1e-4; +const map LEARNING_RATES = { { 17,3e-5 },{ 22,1e-5 } }; //at which epoch we manually set them up to what +const float PER_SERIES_LR_MULTIP = 1; +const int NUM_OF_TRAIN_EPOCHS = 29; + +float LEVEL_VARIABILITY_PENALTY = 0; //Multiplier for L" penalty against wigglines of level vector. +const float C_STATE_PENALTY = 0; + +const unsigned int STATE_HSIZE = 30; + +const unsigned int INPUT_SIZE = 4; +const unsigned int OUTPUT_SIZE = 6; + +const int MIN_INP_SEQ_LEN = 0; +const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2; //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK + //#Min. 1st Qu. Median Mean 3rd Qu. Max. + //#13.00 20.00 29.00 31.32 40.00 835.00 +const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH; +const int TOPN = 4; +*/ + +const float ALPHA = 0.05; +const float TAUL = ALPHA / 2; +const float TAUH = 1 - TAUL; +const float ALPHA_MULTIP = 2 / ALPHA; + +const int BIG_LOOP = 3; +const int NUM_OF_NETS = 5; +const unsigned ATTENTION_HSIZE = STATE_HSIZE; + +#if defined _DEBUG + const int MAX_NUM_OF_SERIES = 20; +#else + const int MAX_NUM_OF_SERIES = -1; +#endif // _DEBUG + +const unsigned int NUM_OF_CATEGORIES = 6; +const int AVERAGING_LEVEL = 5; +const float EPS=1e-6; + +const float NOISE_STD=0.001; +const int FREQ_OF_TEST=1; +const float GRADIENT_CLIPPING=50; +const float BIG_FLOAT=1e38;//numeric_limits::max(); +const bool PRINT_DIAGN = false; + +string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv"; +string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv"; + + +Expression squash(const Expression& x) { + return log(x); +} +float squash(float x) { + return log(x); +} + +Expression expand(const Expression& x) { + return exp(x); +} +float expand(float x) { + return exp(x); +} + + +#if defined USE_ODBC + void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); + + #if defined _WINDOWS + WCHAR* pwszConnStr = L"DSN=slawek"; + #else + SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek"; + #endif + #define TRYODBC(h, ht, x) { RETCODE rc = x;\ + if (rc != SQL_SUCCESS) \ + { \ + HandleDiagnosticRecord (h, ht, rc); \ + } \ + if (rc == SQL_ERROR) \ + { \ + fprintf(stderr, "Error in " #x "\n"); \ + if (hStmt) { \ + SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \ + } \ + if (hDbc) { \ + SQLDisconnect(hDbc); \ + SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \ + } \ + if (hEnv) { \ + SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \ + } \ + exit(-1); \ + } \ + } + +#endif + +struct M4TS {//storing series data + vector < float> categories_vect; + vector vals; + vector testVals;//empty, unless LBACK>0 + float meanAbsSeasDiff; + int n; + + M4TS(string category, stringstream &line_stream) { + array categories = { 0,0,0,0,0,0 }; + if (category == "Demographic") + categories[0] = 1; + else if (category == "Finance") + categories[1] = 1; + else if (category == "Industry") + categories[2] = 1; + else if (category == "Macro") + categories[3] = 1; + else if (category == "Micro") + categories[4] = 1; + else if (category == "Other") + categories[5] = 1; + else { + cerr << "unknown category?"; + exit(-1); + } + for (int i = 0; i < NUM_OF_CATEGORIES; i++) + categories_vect.push_back(categories[i]); + + string tmp_str; + while(getline(line_stream, tmp_str, ',' )) { + string val_str; + for (const auto c : tmp_str) { + if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line + val_str.push_back(c); + } + if (val_str.size() == 0) + break; + float val=(atof(val_str.c_str())); + vals.push_back(val); + } + + meanAbsSeasDiff = 0; + float sumf = 0; + for (int ip = SEASONALITY; ip0) + meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY); + + if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values + if (vals.size() > LBACK*OUTPUT_SIZE) { + auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE; + auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE; + vector input_vect(first, pastLast); //[first,pastLast) + testVals= input_vect; + vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements + n = vals.size(); + } else + n = 0; + } else { + n = vals.size(); + } + if (n > MAX_SERIES_LENGTH) {//chop long series + vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data + n = vals.size(); + } + } + M4TS(){}; +}; + +#if defined USE_ODBC +void HandleDiagnosticRecord(SQLHANDLE hHandle, + SQLSMALLINT hType, + RETCODE RetCode); +#endif + +struct AdditionalParams {//Per series, important + Parameter levSm; + Parameter sSm; + array initSeasonality; + Parameter sSm2; + array initSeasonality2; +}; +struct AdditionalParamsF {//Used for storing diagnostics + float levSm; + float sSm; + array initSeasonality; + float sSm2; + array initSeasonality2; + vector levels; + vector seasons; + vector seasons2; +}; + + +array perfToRanking (array perf_arr) { + array index; + + for (int itop=0; itop losses; + for (unsigned int indx = 0; indx as_scalar(forecH.value())) + loss = loss + (actual - forecH)*ALPHA_MULTIP; + losses.push_back(loss); + } + return sum(losses) / OUTPUT_SIZE; +} + +// weighted quantile Loss +float wQuantLoss(vector& out_vect, vector& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50 + float sumf = 0; float suma = 0; + for (unsigned int indx = 0; indx forec) + sumf = sumf + (actual - forec)*tau; + else + sumf = sumf + (actual - forec)*(tau - 1); + } + return sumf / suma * 200; +} + +float errorFunc(vector& out_vect, vector& actuals_vect, float meanAbsSeasDiff) { + float sumf=0; + for (unsigned int indx = 0; indx forecH) + loss = loss + (actualf - forecH)*ALPHA_MULTIP; + sumf+=loss; + } + return sumf / (OUTPUT_SIZE*meanAbsSeasDiff); +} + + + +int main(int argc, char** argv) { + dynet::initialize(argc, argv); + + int ibigOffset = 0; + if (argc == 2) + ibigOffset = atoi(argv[1]); + + cout< 0) { + cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<tm_year+1900; + now_ts.month=now->tm_mon+1; + now_ts.day=now->tm_mday; + now_ts.hour=now->tm_hour; + now_ts.minute=now->tm_min; + now_ts.second=now->tm_sec; + now_ts.fraction=0; //reportedly needed + + const int OFFSET_TO_FIRST_ACTUAL=5; + string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + stringstream ss; + ss << iq; + string iq_str = ss.str(); + insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str; + } + insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \ + values(? , ? , ? , ? , ? "; + for (int iq = 1; iq <= OUTPUT_SIZE; iq++) { + insertQuery_str = insertQuery_str + ",?,?"; + } + insertQuery_str = insertQuery_str + ",?,?,?,?)"; + #if defined _WINDOWS + wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end()); + SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str(); + #else + SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str(); + #endif + + SQLHENV hEnv = NULL; + SQLHDBC hDbc = NULL; + SQLHSTMT hStmt = NULL, hInsertStmt = NULL; + + if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) { + fprintf(stderr, "Unable to allocate an environment handle\n"); + exit(-1); + } + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLSetEnvAttr(hEnv, + SQL_ATTR_ODBC_VERSION, + (SQLPOINTER)SQL_OV_ODBC3, + 0)); + + // Allocate a connection + TRYODBC(hEnv, + SQL_HANDLE_ENV, + SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLDriverConnect(hDbc, + NULL, + pwszConnStr, + SQL_NTS, + NULL, + 0, + NULL, + SQL_DRIVER_COMPLETE)); + fprintf(stderr, "Connected!\n"); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER)); + + TRYODBC(hDbc, + SQL_HANDLE_DBC, + SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS)); + + SQLLEN nullTerminatedStringOfRun = SQL_NTS; + SQLLEN nullTerminatedStringOfSeries = SQL_NTS; + SQLLEN nullTerminatedStringOfVariable = SQL_NTS; + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL)); + + // variable, n, dateTimeOfPrediction + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL)); +#endif + + random_device rd; // only used once to initialise (seed) engine + mt19937 rng(rd()); // random-number engine used (Mersenne-Twister in this case) + + vector series_vect; + unordered_map allSeries_map(30000);//max series in one chunk would be 24k for yearly series + unordered_map seriesCategories_map(120000);//100k series + + ifstream infoFile(INFO_INPUT_PATH); + string line; + getline(infoFile, line); //header + while (getline(infoFile, line)) { + //cout << string( line)<= MIN_SERIES_LENGTH) { + series_vect.push_back(series); + if (m4Obj.meanAbsSeasDiff==0) { + cout<<"Warning, flat series:"<0 && series_vect.size()>=MAX_NUM_OF_SERIES) + break; + } + cout << "num of series:" << series_vect.size() << endl; + + unsigned int series_len=(unsigned int)series_vect.size(); + uniform_int_distribution uniOnSeries(0,series_len-1); // closed interval [a, b] + uniform_int_distribution uniOnNets(0,NUM_OF_NETS-1); // closed interval [a, b] + + unordered_map, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc... + unordered_map> finalResults_map((int)series_len*1.5);//per series + set diagSeries; + + unordered_map> netRanking_map; + for (int ibig=0; ibig perfValid_vect; + int epochOfLastChangeOfLRate = -1; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL)); +#endif + + //create nets + array paramsCollection_arr;//per net + array perSeriesParamsCollection_arr;//per net + array trainers_arr; + array perSeriesTrainers_arr; + + + #if defined USE_RESIDUAL_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #elif defined USE_ATTENTIVE_LSTM + array, NUM_OF_NETS> rnnStack_arr; + #else + array, NUM_OF_NETS> rnnStack_arr; + #endif + + array MLPW_parArr; + array MLPB_parArr; + array adapterW_parArr; + array adapterB_parArr; + + //this is not a history, this is the real stuff + unordered_map* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + additionalParams_mapOfArr[series]=new array(); + } + + for (int inet=0; inetclip_threshold = GRADIENT_CLIPPING; + perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS); + perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING; + + auto& rNNStack=rnnStack_arr[inet]; + #if defined USE_RESIDUAL_LSTM + rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc)); + for (int il = 1; il* additionalParams_arr=additionalParams_mapOfArr[series]; + additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net + if (SEASONALITY_NUM > 0) { + additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + if (SEASONALITY_NUM > 1) { + additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5); + for (int isea = 0; iseaat(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5); + } + } + }//seting up, through nets + + //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS] + unordered_map, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5); + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + historyOfAdditionalParams_map[series]=new array, NUM_OF_NETS>(); + } + + //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted + array, NUM_OF_NETS> seriesAssignment;//every net has an array + for (int j=0; j> netPerf_map; + for (int inet=0; inetlearning_rate = LEARNING_RATES.at(iEpoch); + if (inet==0) + cout << "changing LR to:" << trainer->learning_rate << endl; + perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP; + } + + auto& rNNStack=rnnStack_arr[inet]; + Parameter& MLPW_par = MLPW_parArr[inet]; + Parameter& MLPB_par = MLPB_parArr[inet]; + Parameter& adapterW_par=adapterW_parArr[inet]; + Parameter& adapterB_par=adapterB_parArr[inet]; + + vector oneNetAssignments=seriesAssignment[inet]; + random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end()); + + vector epochLosses; + vector forecLosses; vector levVarLosses; vector stateLosses; + for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + + ComputationGraph cg; + for (int il=0; ilat(inet); + array& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet); + + Expression MLPW_ex,MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea logDiffOfLevels_vect; + vector levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + Expression levelVarLoss_ex; + if (LEVEL_VARIABILITY_PENALTY > 0) { + vector levelVarLoss_v; + for (int i = 1; i losses;//losses of steps through single time series + for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { + vector::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + + first = m4Obj.vals.begin() + i + 1; + pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + + firstE = season_exVect.begin() + i + 1; + pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + + firstE = season2_exVect.begin() + i + 1; + pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); + }//through points of a series + + Expression forecLoss_ex= average(losses); + Expression loss_exp = forecLoss_ex; + + float levVarLoss=0; + if (LEVEL_VARIABILITY_PENALTY > 0) { + Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY; + levVarLoss = as_scalar(levelVarLossP_ex.value()); + levVarLosses.push_back(levVarLoss); + loss_exp= loss_exp + levelVarLossP_ex; + } + + float cStateLoss=0; + if (C_STATE_PENALTY>0) { + vector cStateLosses_vEx; + for (int irnn = 0; irnn < rNNStack.size(); irnn++) + for (int it = 0; itupdate();//update shared weights + perSeriesTrainer->update();//update params of this series only + } catch (exception& e) {//it may happen occasionally. I believe it is due to not robust enough implementation of squashing functions in Dynet. When abs(x)>35 NAs appear. + //so the code below is trying to produce some diagnostics, hopefully useful when setting LEVEL_VARIABILITY_PENALTY and C_STATE_PENALTY. + cerr<<"cought exception while doing "< maxAbs) { + maxAbs = abs(state[iv]); + timeOfMax=it; + layerOfMax=il; + chunkOfMax= irnn; + } + } + } //through layers/states + } //through time + } //through chunks + + cout << "levSm:" << as_scalar(levSm_ex.value()) << endl; + if (SEASONALITY_NUM > 0) + cout << "sSm:" << as_scalar(sSm_ex.value()) << endl; + if (SEASONALITY_NUM > 1) + cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl; + cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax< 0) { + histAdditionalParams.sSm=as_scalar(sSm_ex.value()); + for (int isea = 0; isea 1) { + histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value()); + for (int isea=0; isea 0 || C_STATE_PENALTY > 0) { + float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size(); + cout << " forec loss:" << averageForecLoss * 100; + } + if (LEVEL_VARIABILITY_PENALTY > 0) { + float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size(); + cout << " levVar loss:" << averagelevVarLoss * 100; + } + if (C_STATE_PENALTY > 0) { + float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size(); + cout << " state loss:" << averageStateLoss * 100; + } + cout<at(inet); + Expression MLPW_ex, MLPB_ex; + if (ADD_NL_LAYER) { + MLPW_ex = parameter(cg, MLPW_par); + MLPB_ex = parameter(cg, MLPB_par); + } + Expression adapterW_ex=parameter(cg, adapterW_par); + Expression adapterB_ex=parameter(cg, adapterB_par); + + Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm); + Expression levSm_ex = logistic(levSmSerNet0_ex); + + vector season_exVect;//vector, because we do not know how long the series is + Expression sSm_ex; + if (SEASONALITY_NUM > 0) { + Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm); + sSm_ex = logistic(sSmSerNet0_ex); + + for (int isea = 0; isea season2_exVect;//vector, because we do not know how long the series is + Expression sSm2_ex; + if (SEASONALITY_NUM > 1) { + Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2); + sSm2_ex = logistic(sSm2SerNet0_ex); + + for (int isea = 0; isea levels_exVect; + if (SEASONALITY_NUM == 0) { + levels_exVect.push_back(input(cg, m4Obj.vals[0])); + for (int i = 1; i0 then this is shortened, so it always contains data awe have right to access + Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1]; + levels_exVect.push_back(newLevel_ex); + + Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i]; + season_exVect.push_back(newSeason_ex); + } + + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + } + else if (SEASONALITY_NUM == 2) { + Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]); + levels_exVect.push_back(lev); + for (int i = 1; iSEASONALITY) { + unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++) + season_exVect.push_back(season_exVect[startSeasonalityIndx + i]); + } + //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors + if (OUTPUT_SIZE>SEASONALITY2) { + unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2; + for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++) + season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]); + } + } + else { + cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM; + exit(-1); + } + + + Expression inputSeasonality_ex; Expression inputSeasonality2_ex; + Expression outputSeasonality_ex; Expression outputSeasonality2_ex; + vector losses;//losses of steps through single time series + Expression out_ex;//we declare it here, bcause the last one will be the forecast + for (int i=INPUT_SIZE-1; i::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one + vector input_vect(first, pastLast); //[first,pastLast) + Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect); + + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE; + vector::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one + vector inputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality_ex=concatenate(inputSeasonality_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one + vector inputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + inputSeasonality2_ex = concatenate(inputSeasonality2_exVect); + input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization + } + + vector joinedInput_ex; + joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise + joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect)); + Expression input_ex = concatenate(joinedInput_ex); + + Expression rnn_ex; + try { + rnn_ex = rNNStack[0].add_input(input_ex); + for (int il=1; il::const_iterator first = m4Obj.vals.begin() + i + 1; + vector::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE; + vector labels_vect(first, pastLast); //[first,pastLast) + Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect); + + if (SEASONALITY_NUM > 0) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + outputSeasonality_ex = concatenate(outputSeasonality_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization + } + if (SEASONALITY_NUM > 1) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;//checking if enough elements is in the vecor was done a few pe + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect); + labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization + } + Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization + + //Expression loss_ex = pinBallLoss(out_ex, labels_ex); + Expression loss_ex = MSIS(out_ex, labels_ex); + if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN) + losses.push_back(loss_ex); //training area losses + } + + if (i==(m4Obj.n-1)) {//validation loss + out_ex=expand(out_ex)*levels_exVect[i];//back to original scale + if (SEASONALITY_NUM > 0 ) { + vector::const_iterator firstE = season_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality_exVect(firstE, pastLastE); //[first,pastLast) + for (int ios=0; ios 1 ) { + vector::const_iterator firstE = season2_exVect.begin() + i + 1; + vector::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE; + vector outputSeasonality2_exVect(firstE, pastLastE); //[first,pastLast) + for (int ios = 0; ios, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc... + //No epoch here, because this will just reflect the current (latest) situation - the last few epochs + vector out_vect=as_vector(out_ex.value()); + testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect; + if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) { + vector firstForec=testResults_map[series][inet][0]; + testResults_map[series][inet][AVERAGING_LEVEL]=firstForec; + for (int ii=1; ii nextForec=testResults_map[series][inet][ii]; + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) + testResults_map[series][inet][AVERAGING_LEVEL][iii]+=nextForec[iii]; + } + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) + testResults_map[series][inet][AVERAGING_LEVEL][iii]/=AVERAGING_LEVEL; + } //time to average + }//through series + } //through nets + + if (iEpoch>0 && iEpoch % FREQ_OF_TEST==0) { + //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch. + vector bestEpochLosses; + vector bestEpochAvgLosses; + vector topnEpochLosses; + vector topnEpochAvgLosses; + vector bestEpochLossesL; + vector bestEpochAvgLossesL; + vector topnEpochLossesL; + vector topnEpochAvgLossesL; + vector bestEpochLossesH; + vector bestEpochAvgLossesH; + vector topnEpochLossesH; + vector topnEpochAvgLossesH; + + for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) { + string series=*iter; + auto m4Obj=allSeries_map[series]; + +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries)); + + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL)); +#endif + + float avgLoss; + vector avgLatest; + vector avgAvg; + + for (int itop=0; itop 0) { + float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff); + bestEpochLosses.push_back(qLoss); + + qLoss=wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0); + bestEpochLossesL.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE); + bestEpochLossesH.push_back(qLoss); + } + avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]; //used later for calculating topn loss + + if (iEpoch>=AVERAGING_LEVEL) { + if (LBACK > 0) { + float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff); + bestEpochAvgLosses.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0); + bestEpochAvgLossesL.push_back(qLoss); + + qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE); + bestEpochAvgLossesH.push_back(qLoss); + } + avgAvg=testResults_map[series][inet][AVERAGING_LEVEL]; + } + } else { + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) { + avgLatest[iii]+=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL][iii];//calculate current topn + if (iEpoch>=AVERAGING_LEVEL) + avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii]; + } + } + }//through topn + + for (int iii=0; iii<2*OUTPUT_SIZE; iii++) + avgLatest[iii]/=TOPN; + + if (LBACK > 0) { + float qLoss = errorFunc(avgLatest, m4Obj.testVals, m4Obj.meanAbsSeasDiff); + topnEpochLosses.push_back(qLoss); + + qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUL, 0); + topnEpochLossesL.push_back(qLoss); + + qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUH, OUTPUT_SIZE); + topnEpochLossesH.push_back(qLoss); + } + + if (iEpoch>=AVERAGING_LEVEL) { + for (int iii = 0; iii<2*OUTPUT_SIZE; iii++) + avgAvg[iii] /= TOPN; + + finalResults_map[series] = avgAvg; + + if (LBACK > 0) { +#if defined USE_ODBC + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL)); + + for (int iv=0; iv<2; iv++) { + if (iv==0) + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun)) + else + TRYODBC(hInsertStmt, + SQL_HANDLE_STMT, + SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun)); + + for (int iii=0; iii 0) { + float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size(); + float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size(); + float bestEpochLossL = accumulate(bestEpochLossesL.begin(), bestEpochLossesL.end(), 0.0) / bestEpochLossesL.size(); + float topnEpochLossL = accumulate(topnEpochLossesL.begin(), topnEpochLossesL.end(), 0.0) / topnEpochLossesL.size(); + float bestEpochLossH = accumulate(bestEpochLossesH.begin(), bestEpochLossesH.end(), 0.0) / bestEpochLossesH.size(); + float topnEpochLossH = accumulate(topnEpochLossesH.begin(), topnEpochLossesH.end(), 0.0) / topnEpochLossesH.size(); + cout<=AVERAGING_LEVEL) { + float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size(); + float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size(); + float bestEpochAvgLossL = accumulate(bestEpochAvgLossesL.begin(), bestEpochAvgLossesL.end(), 0.0) / bestEpochAvgLossesL.size(); + float topnEpochAvgLossL = accumulate(topnEpochAvgLossesL.begin(), topnEpochAvgLossesL.end(), 0.0) / topnEpochAvgLossesL.size(); + float bestEpochAvgLossH = accumulate(bestEpochAvgLossesH.begin(), bestEpochAvgLossesH.end(), 0.0) / bestEpochAvgLossesH.size(); + float topnEpochAvgLossH = accumulate(topnEpochAvgLossesH.begin(), topnEpochAvgLossesH.end(), 0.0) / topnEpochAvgLossesH.size(); + cout<<" bestAvg:"<> netRanking_map + netRanking_map[series]=perfToRanking(netPerf_map[series]); + + for (int itop=0; itop diagSeries; + for (int i=0; i<1; i++) {//add a few normal ones + int irand=uniOnSeries(rng); + diagSeries.insert(series_vect[irand]); + } + for(auto series : diagSeries) { + cout<at(inet); + for (int iEpoch=0; iEpoch 0 ) { + cout<<"sSm:"<at(inet); + for (int iEpoch=0; iEpoch 1 ) { + cout<<"sSm2:"<at(inet); + for (int iEpoch=0; iEpochat(inet); + for (int iEpoch = 0; iEpoch0) { + cout << "levels:" << iEpoch<<" "; + for (int iv = 0; iv 0 ) { + cout << "seasons:" << iEpoch<<" "; + for (int iv = 0; iv 1 ) { + cout << "seasons2:" << iEpoch<<" "; + for (int iv = 0; iv + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + + + + Source Files + + + \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/M44/M44.vcxproj b/118 - slaweks17/c++/windows_VisualStudio/M44/M44.vcxproj new file mode 100644 index 0000000..b56923a --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M44/M44.vcxproj @@ -0,0 +1,227 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + RelWithDebug + Win32 + + + RelWithDebug + x64 + + + + + + + + + + + {7A192E0C-8F58-4D65-998E-3A7010AB5F87} + Win32Proj + M44 + 8.1 + + + + Application + true + v140 + Unicode + + + Application + true + v140 + Unicode + + + Application + false + v140 + true + Unicode + + + Application + true + v140 + Unicode + Sequential + + + Application + true + v140 + Unicode + Sequential + + + Application + false + v140 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + true + + + false + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + NotUsing + Level1 + Disabled + WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + + + Console + true + E:\progs2\dynet\buildMKL\dynet\Debug + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + NotUsing + Level1 + MaxSpeed + WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions) + E:\progs2\dynet;E:\progs\Eigen; + AnySuitable + true + Speed + AdvancedVectorExtensions + Default + MultiThreadedDLL + ProgramDatabase + true + false + + + Console + true + E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo + dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/M44/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M44/slstm.h new file mode 100644 index 0000000..adb63a7 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/M44/slstm.h @@ -0,0 +1,394 @@ +/** +* file slstm.h +* header for my implementation of dilated LSTMs, based on Dynet LSTM builders + - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf) + - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360 + - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971 +* +Slawek Smyl, Mar-May 2018 +*/ + +#ifndef DYNET_SLSTMS_H_ +#define DYNET_SLSTMS_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" + +using namespace std; + +namespace dynet { + + //basd on VanillaLSTMBuilder + struct ResidualDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + ResidualDilatedLSTMBuilder(); + /** + * \brief Constructor for the ResidualDilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + * \param ln_lstm Whether to use layer normalization + * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0) + */ + explicit ResidualDilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model, + bool ln_lstm = false, + float forget_bias = 1.f); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + /** + * \brief Get parameters in ResidualDilatedLSTMBuilder + * \return list of points to ParameterStorage objects + */ + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + // first index is layer, then ... + std::vector> ln_params; + + // first index is layer, then ... + std::vector> param_vars; + // first index is layer, then ... + std::vector> ln_param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + bool ln_lstm; + float forget_bias; + bool dropout_masks_valid; + vector dilations; //one int per layer + + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct DilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + DilatedLSTMBuilder(); + /** + * \brief Constructor for the DilatedLSTMBuilder + * + * \param dilations Vector of dilations + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit DilatedLSTMBuilder(vector dilations, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + float weightnoise_std; + vector dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; + + + struct AttentiveDilatedLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + AttentiveDilatedLSTMBuilder(); + /** + * \brief Constructor for the AttentiveDilatedLSTMBuilder + * + * \param max_dilations Vector, maximum dilations (per layer) + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model ParameterCollection holding the parameters + */ + explicit AttentiveDilatedLSTMBuilder(vector max_dilations, + unsigned input_dim, + unsigned hidden_dim, + unsigned attention_dim, + ParameterCollection& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); + + void set_weightnoise(float std); + ParameterCollection & get_parameter_collection() override; + protected: + void new_graph_impl(ComputationGraph& cg, bool update) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + ParameterCollection local_model; + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + unsigned attention_dim; + float dropout_rate_h; + float weightnoise_std; + vector max_dilations; //one int per layer + + bool dropout_masks_valid; + private: + ComputationGraph* _cg; // Pointer to current cg + + }; +} // namespace dynet + +#endif diff --git a/118 - slaweks17/c++/windows_VisualStudio/readme.txt b/118 - slaweks17/c++/windows_VisualStudio/readme.txt new file mode 100644 index 0000000..de51078 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/readme.txt @@ -0,0 +1,5 @@ +This is Visual Studio 15 solution, with 4 projects, one for each .cc file. +Two targets are defined: Debug and RelWitDebug, which is Release with debug info, that I used normally. +You will need to update include and link paths to point to your installation of Dynet. +In x64\RelWithDebug directory you will find two example scripts to run the executables +in conjunction with one program started interactively inside VS. \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/readme.txt b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/readme.txt new file mode 100644 index 0000000..6568116 --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/readme.txt @@ -0,0 +1,5 @@ +These example run scripts. They are meant to be run on 6-core computer and assume that the program, +M41.exe has been started interactively in Visual Studio, so they add 5 processes. +run61.cmd should be run for ES_RNN and ES_RNN_PI, so Monthly and Quarterly series, +although for Monthly you probably want to use computer with more cores, unless you are fine waiting a week or so :-) +run61_e.cmd is for ES_RNN_E and ES_RNN_E_PI, so all other cases. \ No newline at end of file diff --git a/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61.cmd b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61.cmd new file mode 100644 index 0000000..5ff41dd --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61.cmd @@ -0,0 +1,5 @@ +start M41 10 2 +start M41 11 1 5 +start M41 11 2 5 +start M41 12 1 10 +start M41 12 2 10 diff --git a/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61_e.cmd b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61_e.cmd new file mode 100644 index 0000000..a862afa --- /dev/null +++ b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61_e.cmd @@ -0,0 +1,5 @@ +start M41 5 +start M41 10 +start M41 15 +start M41 20 +start M41 25 diff --git a/118 - slaweks17/readme.txt b/118 - slaweks17/readme.txt new file mode 100644 index 0000000..0c926cd --- /dev/null +++ b/118 - slaweks17/readme.txt @@ -0,0 +1,9 @@ +ES-RNN programs, related script, and docs. +M4 Forecasting Competition, 2018 +Slawek Smyl, Uber. + +The programs are in C++ and use Dynet - a Dynamic Graph NN system (https://github.com/clab/dynet) + + + + diff --git a/118 - slaweks17/sql/createM72nn_SQLServer.sql b/118 - slaweks17/sql/createM72nn_SQLServer.sql new file mode 100644 index 0000000..e7ad329 --- /dev/null +++ b/118 - slaweks17/sql/createM72nn_SQLServer.sql @@ -0,0 +1,135 @@ +USE [slawek] +GO + +/****** Object: Table [dbo].[M72nn] Script Date: 6/2/2018 9:37:26 AM ******/ +SET ANSI_NULLS ON +GO + +SET QUOTED_IDENTIFIER ON +GO + +SET ANSI_PADDING ON +GO + +CREATE TABLE [dbo].[M72nn]( + [run] [varchar](164) NOT NULL, + [LBack] [smallint] NOT NULL, + [iBig] [smallint] NOT NULL, + [series] [varchar](20) NOT NULL, + [epoch] [smallint] NOT NULL, + [actual1] [real] NULL, + [forec1] [real] NULL, + [actual2] [real] NULL, + [forec2] [real] NULL, + [actual3] [real] NULL, + [forec3] [real] NULL, + [actual4] [real] NULL, + [forec4] [real] NULL, + [actual5] [real] NULL, + [forec5] [real] NULL, + [actual6] [real] NULL, + [forec6] [real] NULL, + [actual7] [real] NULL, + [forec7] [real] NULL, + [actual8] [real] NULL, + [forec8] [real] NULL, + [actual9] [real] NULL, + [forec9] [real] NULL, + [actual10] [real] NULL, + [forec10] [real] NULL, + [actual11] [real] NULL, + [forec11] [real] NULL, + [actual12] [real] NULL, + [forec12] [real] NULL, + [actual13] [real] NULL, + [forec13] [real] NULL, + [actual14] [real] NULL, + [forec14] [real] NULL, + [actual15] [real] NULL, + [forec15] [real] NULL, + [actual16] [real] NULL, + [forec16] [real] NULL, + [actual17] [real] NULL, + [forec17] [real] NULL, + [actual18] [real] NULL, + [forec18] [real] NULL, + [actual19] [real] NULL, + [forec19] [real] NULL, + [actual20] [real] NULL, + [forec20] [real] NULL, + [actual21] [real] NULL, + [forec21] [real] NULL, + [actual22] [real] NULL, + [forec22] [real] NULL, + [actual23] [real] NULL, + [forec23] [real] NULL, + [actual24] [real] NULL, + [forec24] [real] NULL, + [actual25] [real] NULL, + [forec25] [real] NULL, + [actual26] [real] NULL, + [forec26] [real] NULL, + [actual27] [real] NULL, + [forec27] [real] NULL, + [actual28] [real] NULL, + [forec28] [real] NULL, + [actual29] [real] NULL, + [forec29] [real] NULL, + [actual30] [real] NULL, + [forec30] [real] NULL, + [actual31] [real] NULL, + [forec31] [real] NULL, + [actual32] [real] NULL, + [forec32] [real] NULL, + [actual33] [real] NULL, + [forec33] [real] NULL, + [actual34] [real] NULL, + [forec34] [real] NULL, + [actual35] [real] NULL, + [forec35] [real] NULL, + [actual36] [real] NULL, + [forec36] [real] NULL, + [actual37] [real] NULL, + [forec37] [real] NULL, + [actual38] [real] NULL, + [forec38] [real] NULL, + [actual39] [real] NULL, + [forec39] [real] NULL, + [actual40] [real] NULL, + [forec40] [real] NULL, + [actual41] [real] NULL, + [forec41] [real] NULL, + [actual42] [real] NULL, + [forec42] [real] NULL, + [actual43] [real] NULL, + [forec43] [real] NULL, + [actual44] [real] NULL, + [forec44] [real] NULL, + [actual45] [real] NULL, + [forec45] [real] NULL, + [actual46] [real] NULL, + [forec46] [real] NULL, + [actual47] [real] NULL, + [forec47] [real] NULL, + [actual48] [real] NULL, + [forec48] [real] NULL, + [trainingError] [real] NULL, + [variable] [varchar](20) NOT NULL, + [n] [smallint] NOT NULL, + [dateTimeOfPrediction] [datetime] NOT NULL, + CONSTRAINT [M72nn_pk] PRIMARY KEY CLUSTERED +( + [run] ASC, + [LBack] ASC, + [iBig] ASC, + [series] ASC, + [epoch] ASC +)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] +) ON [PRIMARY] + +GO + +SET ANSI_PADDING OFF +GO + + diff --git a/118 - slaweks17/sql/createM72nn_mysql.txt b/118 - slaweks17/sql/createM72nn_mysql.txt new file mode 100644 index 0000000..f86d1c0 --- /dev/null +++ b/118 - slaweks17/sql/createM72nn_mysql.txt @@ -0,0 +1,54 @@ +CREATE TABLE M72nn( + run varchar(160) NOT NULL, + LBack smallint NOT NULL, + iBig smallint NOT NULL, + series varchar(20) NOT NULL, + epoch smallint NOT NULL, + actual1 float NULL, + forec1 float NULL, + actual2 float NULL, + forec2 float NULL, + actual3 float NULL, + forec3 float NULL, + actual4 float NULL, + forec4 float NULL, + actual5 float NULL, + forec5 float NULL, + actual6 float NULL, + forec6 float NULL, + actual7 float NULL, + forec7 float NULL, + actual8 float NULL, + forec8 float NULL, + actual9 float NULL, + forec9 float NULL, + actual10 float NULL, + forec10 float NULL, + actual11 float NULL, + forec11 float NULL, + actual12 float NULL, + forec12 float NULL, + actual13 float NULL, + forec13 float NULL, + actual14 float NULL, + forec14 float NULL, + actual15 float NULL, + forec15 float NULL, + actual16 float NULL, + forec16 float NULL, + actual17 float NULL, + forec17 float NULL, + actual18 float NULL, + forec18 float NULL, + trainingError float NULL, + variable varchar(20) NOT NULL, + n smallint NOT NULL, + dateTimeOfPrediction datetime NOT NULL, + CONSTRAINT M72nn_pk PRIMARY KEY CLUSTERED +( + run ASC, + LBack ASC, + iBig ASC, + series ASC, + epoch ASC)); + diff --git a/118 - slaweks17/sql/readme.txt b/118 - slaweks17/sql/readme.txt new file mode 100644 index 0000000..e8c186c --- /dev/null +++ b/118 - slaweks17/sql/readme.txt @@ -0,0 +1,4 @@ +I provide just two example table creation scrits, one for SQL Server and one for mysql. +The mysql table is limited to output vector 18, so would not be good for hourly runs. +Anyway, starting using the database is a large investment of time, apart from installationm, you also need to create auxiliary tables with MASE, and a lot of queries. +I do not have time to do all of it here and suspect there will be little interest in ODBC, so this is all what you get :-) diff --git a/4Theta method.R b/4Theta method.R deleted file mode 100644 index e5a8294..0000000 --- a/4Theta method.R +++ /dev/null @@ -1,209 +0,0 @@ -#This code can be used to reproduce the forecasts submitted to the M4 competition for the 4Theta method - -#Authors: E. Spiliotis and V. Assimakopoulos (2017) / Forecasting & Strategy Unit - NTUA - -#Method Description: Generalizing the Theta model for automatic forecasting -#Method Type: Statistical - Decomposition - -library(forecast) #requires version 8.2 - -SeasonalityTest <- function(input, ppy){ - #Used for determining whether the time series is seasonal - tcrit <- 1.645 - if (length(input)<3*ppy){ - test_seasonal <- FALSE - }else{ - xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1] - clim <- tcrit/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2))) - test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] ) - - if (is.na(test_seasonal)==TRUE){ test_seasonal <- FALSE } - } - - return(test_seasonal) -} - -Theta.fit <- function(input, fh, theta, curve, model, seasonality , plot=FALSE){ - #Used to fit a Theta model - - #Check if the inputs are valid - if (theta<0){ theta <- 2 } - if (fh<1){ fh <- 1 } - #Estimate theta line weights - outtest <- naive(input, h=fh)$mean - if (theta==0){ - wses <- 0 - }else{ - wses <- (1/theta) - } - wlrl <- (1-wses) - #Estimate seasonaly adjusted time series - ppy <- frequency(input) - if (seasonality=="N"){ - des_input <- input ; SIout <- rep(1, fh) ; SIin <- rep(1, length(input)) - }else if (seasonality=="A"){ - Dec <- decompose(input, type="additive") - des_input <- input-Dec$seasonal - SIin <- Dec$seasonal - SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh) - }else{ - Dec <- decompose(input, type="multiplicative") - des_input <- input/Dec$seasonal - SIin <- Dec$seasonal - SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh) - } - - #If negative values, force to linear model - if (min(des_input)<=0){ curve <- "Lrl" ; model <- "A" } - #Estimate theta line zero - observations <- length(des_input) - xs <- c(1:observations) - xf = xff <- c((observations+1):(observations+fh)) - dat=data.frame(des_input=des_input, xs=xs) - newdf <- data.frame(xs = xff) - - if (curve=="Exp"){ - estimate <- lm(log(des_input)~xs) - thetaline0In <- exp(predict(estimate))+input-input - thetaline0Out <- exp(predict(estimate, newdf))+outtest-outtest - }else{ - estimate <- lm(des_input ~ poly(xs, 1, raw=TRUE)) - thetaline0In <- predict(estimate)+des_input-des_input - thetaline0Out <- predict(estimate, newdf)+outtest-outtest - } - - #Estimete Theta line (theta) - if (model=="A"){ - thetalineT <- theta*des_input+(1-theta)*thetaline0In - }else if ((model=="M")&(all(thetaline0In>0)==T)&(all(thetaline0Out>0)==T)){ - thetalineT <- (des_input^theta)*(thetaline0In^(1-theta)) - }else{ - model<-"A" - thetalineT <- theta*des_input+(1-theta)*thetaline0In - } - - #forecasting TL2 - sesmodel <- ses(thetalineT, h=fh) - thetaline2In <- sesmodel$fitted - thetaline2Out <- sesmodel$mean - - #Theta forecasts - if (model=="A"){ - forecastsIn <- as.numeric(thetaline2In*wses)+as.numeric(thetaline0In*wlrl)+des_input-des_input - forecastsOut <- as.numeric(thetaline2Out*wses)+as.numeric(thetaline0Out*wlrl)+outtest-outtest - }else if ((model=="M")& - (all(thetaline2In>0)==T)&(all(thetaline2Out>0)==T)& - (all(thetaline0In>0)==T)&(all(thetaline0Out>0)==T)){ - forecastsIn <- ((as.numeric(thetaline2In)^(1/theta))*(as.numeric(thetaline0In)^(1-(1/theta))))+des_input-des_input - forecastsOut <- ((as.numeric(thetaline2Out)^(1/theta))*(as.numeric(thetaline0Out)^(1-(1/theta))))+outtest-outtest - }else{ - model<-"A" - thetalineT <- theta*des_input+(1-theta)*thetaline0In - sesmodel <- ses(thetalineT,h=fh) - thetaline2In <- sesmodel$fitted - thetaline2Out <- sesmodel$mean - forecastsIn <- as.numeric(thetaline2In*wses)+as.numeric(thetaline0In*wlrl)+des_input-des_input - forecastsOut <- as.numeric(thetaline2Out*wses)+as.numeric(thetaline0Out*wlrl)+outtest-outtest - } - - #Seasonal adjustments - if (seasonality=="A"){ - forecastsIn <- forecastsIn+SIin - forecastsOut <- forecastsOut+SIout - }else{ - forecastsIn <- forecastsIn*SIin - forecastsOut <- forecastsOut*SIout - } - - #Zero forecasts become positive - for (i in 1:length(forecastsOut)){ - if (forecastsOut[i]<0){ forecastsOut[i] <- 0 } - } - - if (plot==TRUE){ - united <- cbind(input,forecastsOut) - for (ik in 1:(observations+fh)){ united[ik,1] = sum(united[ik,2],united[ik,1], na.rm = TRUE) } - plot(united[,1],col="black",type="l",main=paste("Model:",model,",Curve:",curve,",Theta:",theta),xlab="Time",ylab="Values", - ylim=c(min(united[,1])*0.85,max(united[,1])*1.15)) - lines(forecastsIn, col="green") ; lines(forecastsOut, col="green") - lines(thetaline2In, col="blue") ; lines(thetaline2Out, col="blue") - lines(thetaline0In, col="red") ; lines(thetaline0Out, col="red") - } - - output=list(fitted=forecastsIn,mean=forecastsOut, - fitted0=thetaline0In,mean0=thetaline0Out, - fitted2=thetaline2In,mean2=thetaline2Out, - model=paste(seasonality,model,curve,c(round(theta,2)))) - - return(output) -} - -FourTheta<- function(input, fh){ - #Used to automatically select the best Theta model - - #Scale - base <- mean(input) ; input <- input/base - - molist <- c("M","A") ; trlist <- c("Lrl","Exp") - - #Check seasonality & Create list of models - ppy <- frequency(input) ; ST <- F - if (ppy>1){ ST <- SeasonalityTest(input, ppy) } - if (ST==T){ - - selist <- c("M","A") - listnames <- c() - for (i in 1:length(selist)){ - for (ii in 1:length(molist)){ - for (iii in 1:length(trlist)){ - listnames <- c(listnames,paste(selist[i], molist[ii], trlist[iii])) - } - } - } - - }else{ - - listnames <- c() - for (ii in 1:length(molist)){ - for (iii in 1:length(trlist)){ - listnames <- c(listnames, paste("N", molist[ii], trlist[iii])) - } - } - - } - - modellist <- NULL - for (i in 1:length(listnames)){ - modellist[length(modellist)+1] <- list(c(substr(listnames,1,1)[i], substr(listnames,3,3)[i], - substr(listnames,5,7)[i])) - } - - #Start validation - errorsin <- c() ; models <- NULL - - #With this function determine opt theta per case - optfun <- function(x, input, fh, curve, model, seasonality){ - mean(abs(Theta.fit(input=input, fh, theta=x, curve, model, seasonality , plot=FALSE)$fitted-input)) - } - - for (j in 1:length(listnames)){ - optTheta <- optimize(optfun, c(1:3), - input=input, fh=fh, curve=modellist[[j]][3], model=modellist[[j]][2], - seasonality=modellist[[j]][1])$minimum - - fortheta <- Theta.fit(input=input, fh=fh, theta=optTheta, curve=modellist[[j]][3], model=modellist[[j]][2], - seasonality=modellist[[j]][1], plot=F) - models[length(models)+1] <- list(fortheta) - errorsin <- c(errorsin, mean(abs(input-fortheta$fitted))) - } - - #Select model and export - selected.model <- models[[which.min(errorsin)]] - description <- selected.model$model - output <- list(fitted=selected.model$fitted*base,mean=selected.model$mean*base, - description=description) - #Returns the fitted and forecasted values, as well as the model used (Type of seasonality, Type of Model, Type of Trend, Theta coef.) - - return(output) - -} diff --git a/Benchmarks and Evaluation.R b/Benchmarks and Evaluation.R deleted file mode 100644 index 07469ad..0000000 --- a/Benchmarks and Evaluation.R +++ /dev/null @@ -1,162 +0,0 @@ -#This code can be used to reproduce the forecasts of the M4 Competition STATISTICAL Benchmarks and evaluate their accuracy - -library(forecast) #Requires v8.2 - -################################################################################# -#In this example let us produce forecasts for 100 randomly generated timeseries -fh <- 6 #The forecasting horizon examined -frq <- 1 #The frequency of the data -data_train = data_test <- NULL #Train and test sample -for (i in 1:100){ - data_all <- 2+ 0.15*(1:20) + rnorm(20) - data_train[length(data_train)+1] <- list(ts(head(data_all,length(data_all)-fh),frequency = frq)) - data_test[length(data_test)+1] <- list(tail(data_all,fh)) -} -################################################################################# - -smape_cal <- function(outsample, forecasts){ - #Used to estimate sMAPE - outsample <- as.numeric(outsample) ; forecasts<-as.numeric(forecasts) - smape <- (abs(outsample-forecasts)*200)/(abs(outsample)+abs(forecasts)) - return(smape) -} - -mase_cal <- function(insample, outsample, forecasts){ - #Used to estimate MASE - frq <- frequency(insample) - forecastsNaiveSD <- rep(NA,frq) - for (j in (frq+1):length(insample)){ - forecastsNaiveSD <- c(forecastsNaiveSD, insample[j-frq]) - } - masep<-mean(abs(insample-forecastsNaiveSD),na.rm = TRUE) - - outsample <- as.numeric(outsample) ; forecasts <- as.numeric(forecasts) - mase <- (abs(outsample-forecasts))/masep - return(mase) -} - -naive_seasonal <- function(input, fh){ - #Used to estimate Seasonal Naive - frcy <- frequency(input) - frcst <- naive(input, h=fh)$mean - if (frcy>1){ - frcst <- head(rep(as.numeric(tail(input,frcy)), fh), fh) + frcst - frcst - } - return(frcst) -} - -Theta.classic <- function(input, fh){ - #Used to estimate Theta classic - - #Set parameters - wses <- wlrl<-0.5 ; theta <- 2 - #Estimate theta line (0) - observations <- length(input) - xt <- c(1:observations) - xf <- c((observations+1):(observations+fh)) - train <- data.frame(input=input, xt=xt) - test <- data.frame(xt = xf) - - estimate <- lm(input ~ poly(xt, 1, raw=TRUE)) - thetaline0In <- as.numeric(predict(estimate)) - thetaline0Out <- as.numeric(predict(estimate,test)) - - #Estimate theta line (2) - thetalineT <- theta*input+(1-theta)*thetaline0In - sesmodel <- ses(thetalineT, h=fh) - thetaline2In <- sesmodel$fitted - thetaline2Out <- sesmodel$mean - - #Theta forecasts - forecastsIn <- (thetaline2In*wses)+(thetaline0In*wlrl) - forecastsOut <- (thetaline2Out*wses)+(thetaline0Out*wlrl) - - #Zero forecasts become positive - for (i in 1:length(forecastsOut)){ - if (forecastsOut[i]<0){ forecastsOut[i]<-0 } - } - - output=list(fitted = forecastsIn, mean = forecastsOut, - fitted0 = thetaline0In, mean0 = thetaline0Out, - fitted2 = thetaline2In, mean2 = thetaline2Out) - - return(output) -} - -SeasonalityTest <- function(input, ppy){ - #Used to determine whether a time series is seasonal - tcrit <- 1.645 - if (length(input)<3*ppy){ - test_seasonal <- FALSE - }else{ - xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1] - clim <- tcrit/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2))) - test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] ) - - if (is.na(test_seasonal)==TRUE){ test_seasonal <- FALSE } - } - - return(test_seasonal) -} - -Benchmarks <- function(input, fh){ - #Used to estimate the statistical benchmarks of the M4 competition - - #Estimate seasonaly adjusted time series - ppy <- frequency(input) ; ST <- F - if (ppy>1){ ST <- SeasonalityTest(input,ppy) } - if (ST==T){ - Dec <- decompose(input,type="multiplicative") - des_input <- input/Dec$seasonal - SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh) - }else{ - des_input <- input ; SIout <- rep(1, fh) - } - - f1 <- naive(input, h=fh)$mean #Naive - f2 <- naive_seasonal(input, fh=fh) #Seasonal Naive - f3 <- naive(des_input, h=fh)$mean*SIout #Naive2 - f4 <- ses(des_input, h=fh)$mean*SIout #Ses - f5 <- holt(des_input, h=fh, damped=F)$mean*SIout #Holt - f6 <- holt(des_input, h=fh, damped=T)$mean*SIout #Damped - f7 <- Theta.classic(input=des_input, fh=fh)$mean*SIout #Theta - f8 <- (f4+f5+f6)/3 #Comb - - return(list(f1,f2,f3,f4,f5,f6,f7,f8)) -} - -Names_benchmarks <- c("Naive", "sNaive", "Naive2", "SES", "Holt", "Damped", "Theta", "Com") -Total_smape=Total_mase <- array(NA,dim = c(length(Names_benchmarks), fh, length(data_train))) -#Methods, Horizon, time-series -for (i in 1:length(data_train)){ - - insample <- data_train[[i]] - outsample <- data_test[[i]] - forecasts <- Benchmarks(input=insample, fh=fh) - - #sMAPE - for (j in 1:length(Names_benchmarks)){ - Total_smape[j,,i] <- smape_cal(outsample, forecasts[[j]]) #j the # of the benchmark - } - #MASE - for (j in 1:length(Names_benchmarks)){ - Total_mase[j,,i] <- mase_cal(insample, outsample, forecasts[[j]]) #j the # of the benchmark - } - -} - -print("########### sMAPE ###############") -for (i in 1:length(Names_benchmarks)){ - print(paste(Names_benchmarks[i], round(mean(Total_smape[i,,]), 3))) -} -print("########### MASE ################") -for (i in 1:length(Names_benchmarks)){ - print(paste(Names_benchmarks[i], round(mean(Total_mase[i,,]), 3))) -} -print("########### OWA ################") -for (i in 1:length(Names_benchmarks)){ - print(paste(Names_benchmarks[i], - round(((mean(Total_mase[i,,])/mean(Total_mase[3,,]))+(mean(Total_smape[i,,])/mean(Total_smape[3,,])))/2, 3))) -} - - diff --git a/ML_benchmarks.py b/ML_benchmarks.py deleted file mode 100644 index 0bc61aa..0000000 --- a/ML_benchmarks.py +++ /dev/null @@ -1,341 +0,0 @@ -# This code can be used to reproduce the forecasts of M4 Competition NN benchmarks and evaluate their accuracy - -from numpy.random import seed -seed(42) -from tensorflow import set_random_seed -set_random_seed(42) -from sklearn.neural_network import MLPRegressor -from keras.models import Sequential -from keras.layers import Dense, SimpleRNN -from keras.optimizers import rmsprop -from keras import backend as ker -from math import sqrt -import numpy as np -import tensorflow as tf -import pandas as pd -import gc - - -def detrend(insample_data): - """ - Calculates a & b parameters of LRL - - :param insample_data: - :return: - """ - x = np.arange(len(insample_data)) - a, b = np.polyfit(x, insample_data, 1) - return a, b - - -def deseasonalize(original_ts, ppy): - """ - Calculates and returns seasonal indices - - :param original_ts: original data - :param ppy: periods per year - :return: - """ - """ - # === get in-sample data - original_ts = original_ts[:-out_of_sample] - """ - if seasonality_test(original_ts, ppy): - # print("seasonal") - # ==== get moving averages - ma_ts = moving_averages(original_ts, ppy) - - # ==== get seasonality indices - le_ts = original_ts * 100 / ma_ts - le_ts = np.hstack((le_ts, np.full((ppy - (len(le_ts) % ppy)), np.nan))) - le_ts = np.reshape(le_ts, (-1, ppy)) - si = np.nanmean(le_ts, 0) - norm = np.sum(si) / (ppy * 100) - si = si / norm - else: - # print("NOT seasonal") - si = np.full(ppy, 100) - - return si - - -def moving_averages(ts_init, window): - """ - Calculates the moving averages for a given TS - - :param ts_init: the original time series - :param window: window length - :return: moving averages ts - """ - if len(ts_init) % 2 == 0: - ts_ma = pd.rolling_mean(ts_init, window, center=True) - ts_ma = pd.rolling_mean(ts_ma, 2, center=True) - ts_ma = np.roll(ts_ma, -1) - else: - ts_ma = pd.rolling_mean(ts_init, window, center=True) - - return ts_ma - - -def seasonality_test(original_ts, ppy): - """ - Seasonality test - - :param original_ts: time series - :param ppy: periods per year - :return: boolean value: whether the TS is seasonal - """ - s = acf(original_ts, 1) - for i in range(2, ppy): - s = s + (acf(original_ts, i) ** 2) - - limit = 1.645 * (sqrt((1 + 2 * s) / len(original_ts))) - - return (abs(acf(original_ts, ppy))) > limit - - -def acf(data, k): - """ - Autocorrelation function - - :param data: time series - :param k: lag - :return: - """ - m = np.mean(data) - s1 = 0 - for i in range(k, len(data)): - s1 = s1 + ((data[i] - m) * (data[i - k] - m)) - - s2 = 0 - for i in range(0, len(data)): - s2 = s2 + ((data[i] - m) ** 2) - - return float(s1 / s2) - - -def split_into_train_test(data, in_num, fh): - """ - Splits the series into train and test sets. Each step takes multiple points as inputs - - :param data: an individual TS - :param fh: number of out of sample points - :param in_num: number of input points for the forecast - :return: - """ - train, test = data[:-fh], data[-(fh + in_num):] - x_train, y_train = train[:-1], np.roll(train, -in_num)[:-in_num] - x_test, y_test = train[-in_num:], np.roll(test, -in_num)[:-in_num] - - # reshape input to be [samples, time steps, features] (N-NF samples, 1 time step, 1 feature) - x_train = np.reshape(x_train, (-1, 1)) - x_test = np.reshape(x_test, (-1, 1)) - temp_test = np.roll(x_test, -1) - temp_train = np.roll(x_train, -1) - for x in range(1, in_num): - x_train = np.concatenate((x_train[:-1], temp_train[:-1]), 1) - x_test = np.concatenate((x_test[:-1], temp_test[:-1]), 1) - temp_test = np.roll(temp_test, -1)[:-1] - temp_train = np.roll(temp_train, -1)[:-1] - - return x_train, y_train, x_test, y_test - - -def rnn_bench(x_train, y_train, x_test, fh, input_size): - """ - Forecasts using 6 SimpleRNN nodes in the hidden layer and a Dense output layer - - :param x_train: train data - :param y_train: target values for training - :param x_test: test data - :param fh: forecasting horizon - :param input_size: number of points used as input - :return: - """ - # reshape to match expected input - x_train = np.reshape(x_train, (-1, input_size, 1)) - x_test = np.reshape(x_test, (-1, input_size, 1)) - - # create the model - model = Sequential([ - SimpleRNN(6, input_shape=(input_size, 1), activation='linear', - use_bias=False, kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', bias_initializer='zeros', - dropout=0.0, recurrent_dropout=0.0), - Dense(1, use_bias=True, activation='linear') - ]) - opt = rmsprop(lr=0.001) - model.compile(loss='mean_squared_error', optimizer=opt) - - # fit the model to the training data - model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=0) - - # make predictions - y_hat_test = [] - last_prediction = model.predict(x_test)[0] - for i in range(0, fh): - y_hat_test.append(last_prediction) - x_test[0] = np.roll(x_test[0], -1) - x_test[0, (len(x_test[0]) - 1)] = last_prediction - last_prediction = model.predict(x_test)[0] - - return np.asarray(y_hat_test) - - -def mlp_bench(x_train, y_train, x_test, fh): - """ - Forecasts using a simple MLP which 6 nodes in the hidden layer - - :param x_train: train input data - :param y_train: target values for training - :param x_test: test data - :param fh: forecasting horizon - :return: - """ - y_hat_test = [] - - model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam', - max_iter=100, learning_rate='adaptive', learning_rate_init=0.001, - random_state=42) - model.fit(x_train, y_train) - - last_prediction = model.predict(x_test)[0] - for i in range(0, fh): - y_hat_test.append(last_prediction) - x_test[0] = np.roll(x_test[0], -1) - x_test[0, (len(x_test[0]) - 1)] = last_prediction - last_prediction = model.predict(x_test)[0] - - return np.asarray(y_hat_test) - - -def smape(a, b): - """ - Calculates sMAPE - - :param a: actual values - :param b: predicted values - :return: sMAPE - """ - a = np.reshape(a, (-1,)) - b = np.reshape(b, (-1,)) - return np.mean(2.0 * np.abs(a - b) / (np.abs(a) + np.abs(b))).item() - - -def mase(insample, y_test, y_hat_test, freq): - """ - Calculates MAsE - - :param insample: insample data - :param y_test: out of sample target values - :param y_hat_test: predicted values - :param freq: data frequency - :return: - """ - y_hat_naive = [] - for i in range(freq, len(insample)): - y_hat_naive.append(insample[(i - freq)]) - - masep = np.mean(abs(insample[freq:] - y_hat_naive)) - - return np.mean(abs(y_test - y_hat_test)) / masep - - -def main(): - fh = 6 # forecasting horizon - freq = 1 # data frequency - in_size = 3 # number of points used as input for each forecast - - err_MLP_sMAPE = [] - err_MLP_MASE = [] - err_RNN_sMAPE = [] - err_RNN_MASE = [] - - # ===== In this example we produce forecasts for 100 randomly generated timeseries ===== - data_all = np.array(np.random.random_integers(0, 100, (100, 20)), dtype=np.float32) - for i in range(0, 100): - for j in range(0, 20): - data_all[i, j] = j * 10 + data_all[i, j] - - counter = 0 - # ===== Main loop which goes through all timeseries ===== - for j in range(len(data_all)): - ts = data_all[j, :] - - # remove seasonality - seasonality_in = deseasonalize(ts, freq) - - for i in range(0, len(ts)): - ts[i] = ts[i] * 100 / seasonality_in[i % freq] - - # detrending - a, b = detrend(ts) - - for i in range(0, len(ts)): - ts[i] = ts[i] - ((a * i) + b) - - x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh) - - # RNN benchmark - Produce forecasts - y_hat_test_RNN = np.reshape(rnn_bench(x_train, y_train, x_test, fh, in_size), (-1)) - - # MLP benchmark - Produce forecasts - y_hat_test_MLP = mlp_bench(x_train, y_train, x_test, fh) - for i in range(0, 29): - y_hat_test_MLP = np.vstack((y_hat_test_MLP, mlp_bench(x_train, y_train, x_test, fh))) - y_hat_test_MLP = np.median(y_hat_test_MLP, axis=0) - - # add trend - for i in range(0, len(ts)): - ts[i] = ts[i] + ((a * i) + b) - - for i in range(0, fh): - y_hat_test_MLP[i] = y_hat_test_MLP[i] + ((a * (len(ts) + i + 1)) + b) - y_hat_test_RNN[i] = y_hat_test_RNN[i] + ((a * (len(ts) + i + 1)) + b) - - # add seasonality - for i in range(0, len(ts)): - ts[i] = ts[i] * seasonality_in[i % freq] / 100 - - for i in range(len(ts), len(ts) + fh): - y_hat_test_MLP[i - len(ts)] = y_hat_test_MLP[i - len(ts)] * seasonality_in[i % freq] / 100 - y_hat_test_RNN[i - len(ts)] = y_hat_test_RNN[i - len(ts)] * seasonality_in[i % freq] / 100 - - # check if negative or extreme - for i in range(len(y_hat_test_MLP)): - if y_hat_test_MLP[i] < 0: - y_hat_test_MLP[i] = 0 - if y_hat_test_RNN[i] < 0: - y_hat_test_RNN[i] = 0 - - if y_hat_test_MLP[i] > (1000 * max(ts)): - y_hat_test_MLP[i] = max(ts) - if y_hat_test_RNN[i] > (1000 * max(ts)): - y_hat_test_RNN[i] = max(ts) - - x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh) - - # Calculate errors - err_MLP_sMAPE.append(smape(y_test, y_hat_test_MLP)) - err_RNN_sMAPE.append(smape(y_test, y_hat_test_RNN)) - err_MLP_MASE.append(mase(ts[:-fh], y_test, y_hat_test_MLP, freq)) - err_RNN_MASE.append(mase(ts[:-fh], y_test, y_hat_test_RNN, freq)) - - # memory handling - ker.clear_session() - tf.reset_default_graph() - gc.collect() - - counter = counter + 1 - print("-------------TS ID: ", counter, "-------------") - - print("\n\n---------FINAL RESULTS---------") - print("=============sMAPE=============\n") - print("#### MLP ####\n", np.mean(err_MLP_sMAPE), "\n") - print("#### RNN ####\n", np.mean(err_RNN_sMAPE), "\n") - print("==============MASE=============") - print("#### MLP ####\n", np.mean(err_MLP_MASE), "\n") - print("#### RNN ####\n", np.mean(err_RNN_MASE), "\n") - - -main() diff --git a/README.md b/README.md deleted file mode 100644 index 9b7ceb1..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# M4-methods -Includes the source code to reproduce the forecasts of the methods which participated in the M4 Competition