diff --git a/118 - slaweks17/ES_RNN_SlawekSmyl.pdf b/118 - slaweks17/ES_RNN_SlawekSmyl.pdf
new file mode 100644
index 0000000..9ab0732
Binary files /dev/null and b/118 - slaweks17/ES_RNN_SlawekSmyl.pdf differ
diff --git a/118 - slaweks17/R/merge.R b/118 - slaweks17/R/merge.R
new file mode 100644
index 0000000..9f6c52b
--- /dev/null
+++ b/118 - slaweks17/R/merge.R	
@@ -0,0 +1,143 @@
+# Merging outputs, per category, M4 competition, for point forecasts, so for ES_RNN and ES_RNN_E
+# Author: Slawek Smyl, Mar-May 2018
+
+
+#The c++ executables write to one (occasinally two, sorry :-), so in such case move files to one dir before continuing) directories. 
+#(One logical run of several instances of the same program will produce a number files, e.g. outputs with different ibig value) 
+#This script merges, averages values, and writes them down to the same directory - FOREC_DIR
+###############################################################################
+
+#directory that should include all *-train.csv files, as well as M4-info.csv
+DATA_DIR="F:/progs/data/M4DataSet/"
+m4Info_df=read.csv(paste0(DATA_DIR,"M4-info.csv"))
+options(stringsAsFactors =FALSE)
+
+#directory with all the output files produced by the c++ code  we want to merge
+FOREC_DIR='F:\\progs\\data\\M4\\Quarterly2018-05-31_09_30'  #do not end with separator
+
+LBACK=1  #shoud be as in the c++ code, LBACK>0 means backtesting
+SP="Quarterly"
+#SP="Yearly"
+#SP="Daily"
+#SP="Hourly"
+
+#//----------------PARAMS ----------   comment/uncomment following 3 variables
+#for ES_RNN_E, so for all except Monthly and Quarterly runs: 
+#NUM_OF_SEEDS=1
+#NUM_OF_CHUNKS=1
+#IBIGS=<number of input files n FOREC_DIR>
+
+#for ES_RNN (do for Monthly and Quarterly):
+NUM_OF_CHUNKS=2 #same as NUM_OF_CHUNKS constant the the c++ cource code, changing it is not recommended. 
+NUM_OF_SEEDS=3  #It is equal to the number of seeds in the startup script, (or number of teams of worker processes)
+# so number_of_concurrent_executables==number_of_lines_in_the_running script/NUM_OF_CHUNKS, and number_of_chunks 
+#E.g if using following script for ES_RNN:
+# start <this_executable> 10 1 0
+# start <this_executable> 10 2 0
+# start <this_executable> 20 1 5
+# start <this_executable> 20 2 5
+# start <this_executable> 30 1 10
+# start <this_executable> 30 2 10
+# we have here three seeds: 10,20,30, and two chunks: 1,2. (The pairs of workes have IBIG offsets of 0,5,10)
+IBIGS=3 #number of complete runs by each executables, so if programs are not interrupted, this should be equal to the constant BIG_LOOP in the c++ code, by default 3.
+
+
+m4_df=read.csv(paste0(DATA_DIR,SP,"-train.csv"))
+
+sMAPE<-function(forec,actual) {
+	mean(abs(forec-actual)/(abs(forec)+abs(actual)))*200 
+}
+errorFunc=sMAPE
+
+
+spInfo_df=m4Info_df[m4Info_df$SP==SP,]
+ids=spInfo_df$M4id
+horizon=spInfo_df[1,"Horizon"]
+
+#VARIABLE + "_" + to_string(seedForChunks) + "_" + to_string(chunkNo) + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
+inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*LB",LBACK), full.names = T)
+if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) {
+	stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS")
+}
+
+
+comp_df=NULL
+fil=inputFiles[1]
+for (fil in inputFiles) {
+  print(fil)
+	c_df=read.csv(fil, header=F)
+	comp_df=rbind(comp_df,c_df)
+} 
+names(comp_df)[1]='id'
+
+forecSeries=sort(unique(comp_df$id))
+if (length(forecSeries)!=length(ids) && LBACK==0) {
+	stop(paste0("Expected number of cases:",length(ids)," but got:",length(forecSeries)))
+}
+
+SIZE_OF_CHUNK=1000
+out_df=NULL; ou_df=NULL
+fSeries=forecSeries[1]
+for (fSeries in forecSeries) {
+	oneSeriesForecs_df=comp_df[comp_df$id==fSeries,]
+	o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)])
+	o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F)
+	ou_df=rbind(ou_df, o_df)
+	if (nrow(ou_df)>=SIZE_OF_CHUNK) {
+		out_df=rbind(out_df,ou_df)
+		ou_df=NULL
+		print(nrow(out_df))
+	}
+}
+out_df=rbind(out_df,ou_df)
+print(nrow(out_df))
+out_df=out_df[order(as.integer(substring(out_df$id, 2))),]
+
+#FOREC_DIR="e:\\temp"
+outPath=paste0(FOREC_DIR,'\\',SP,"Forec.csv")
+write.csv(out_df,file=outPath,row.names = F)
+
+################ Main work done, now just diagnostics calculations and plots
+
+#display a sample of forecasts and, if LBACK>0,  actuals
+MAX_NUM_OF_POINTS_TO_SHOW=200
+for (i in 1:100) {
+	irand=sample(1:length(forecSeries),1)
+	fSeries=forecSeries[irand]
+	forec=as.numeric(out_df[out_df$id==fSeries,2:ncol(out_df)])
+	actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
+	actual=actual[!is.na(actual)]
+	if (length(actual)>MAX_NUM_OF_POINTS_TO_SHOW) {
+		actual=actual[(length(actual)-MAX_NUM_OF_POINTS_TO_SHOW):length(actual)]	
+	}
+	if (LBACK==0) {
+		plot(c(actual,forec), col=c(rep(1,length(actual)),rep(2,length(forec))), main=fSeries)	
+	} else {
+		ymin=min(actual,forec)
+		ymax=max(actual,forec)
+		plot(1:length(actual),actual, main=fSeries, ylim=c(ymin,ymax))
+		lines((length(actual)-length(forec)+1):length(actual), forec, col=2, type='p')
+	}
+  
+	Sys.sleep(5)
+}
+
+
+#calc error metrics
+if (LBACK>0) {
+	summErrors=0
+	fSeries=forecSeries[1]
+	i=1
+	for (fSeries in forecSeries) {
+		if (i%%1000==0)
+			cat(".")
+		forec=as.numeric(out_df[out_df$id==fSeries,2:ncol(out_df)])
+		actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
+		actual=actual[!is.na(actual)]
+		actual=actual[(length(actual)-LBACK*horizon+1):(length(actual)-(LBACK-1)*horizon)]
+		summErrors=summErrors+errorFunc(forec,actual)
+		i=i+1
+	}
+	print(".")
+	print(paste0("avg error:",round(summErrors/length(forecSeries),2)))
+}
diff --git a/118 - slaweks17/R/merge_PI.R b/118 - slaweks17/R/merge_PI.R
new file mode 100644
index 0000000..6033d7a
--- /dev/null
+++ b/118 - slaweks17/R/merge_PI.R	
@@ -0,0 +1,210 @@
+# Merging outputs, per category, M4 competition, for Prediction Intervals , so for ES_RNN_PI and ES_RNN_E_PI
+# Author: Slawek Smyl, Mar-May 2018
+
+
+#The c++ executables write to one (occasinally two, sorry :-), so in such case move files to one dir before continuing) directories. 
+#(One logical run of several instances of the same program will produce a number files, e.g. outputs with different ibig value) 
+#This script merges, averages values, and writes them down to the same directory - FOREC_DIR
+###############################################################################
+
+#directory that should include all *-train.csv files, as well as M4-info.csv
+DATA_DIR="F:/progs/data/M4DataSet/"
+m4Info_df=read.csv(paste0(DATA_DIR,"M4-info.csv"))
+options(stringsAsFactors =FALSE)
+memory.limit(10000)
+
+#directory with all the output files produced by the c++ code  we want to merge
+FOREC_DIR='F:\\progs\\data\\M4\\Hourlygood'  #do not end with separator
+
+LBACK=1  #shoud be as in the c++ code, LBACK>0 means backtesting
+#SP="Quarterly"
+#SP="Yearly"
+#SP="Daily"
+SP="Hourly"
+m4_df=read.csv(paste0(DATA_DIR,SP,"-train.csv"))
+
+
+#//----------------PARAMS ----------   comment/uncomment following 3 variables
+#for ES_RNN_E_PI, so for all except Monthly and Quarterly runs: 
+NUM_OF_SEEDS=1
+NUM_OF_CHUNKS=1
+#IBIGS=<number of input files n FOREC_DIR>/2
+IBIGS=6
+
+#for ES_RNN_PI (do for Monthly and Quarterly):
+#NUM_OF_CHUNKS=2 #same as NUM_OF_CHUNKS constant the the c++ cource code, changing it is not recommended. 
+#NUM_OF_SEEDS=3  #It is equal to the number of seeds in the startup script, (or number of teams of worker processes)
+# so number_of_concurrent_executables==number_of_lines_in_the_running script/NUM_OF_CHUNKS, and number_of_chunks 
+#E.g if using following script for ES_RNN:
+# start <this_executable> 10 1 0
+# start <this_executable> 10 2 0
+# start <this_executable> 20 1 5
+# start <this_executable> 20 2 5
+# start <this_executable> 30 1 10
+# start <this_executable> 30 2 10
+# we have here three seeds: 10,20,30, and two chunks: 1,2. (The pairs of workes have IBIG offsets of 0,5,10)
+#IBIGS=3 #number of complete runs by each executables, so if programs are not interrupted, this should be equal to the constant BIG_LOOP in the c++ code, by default 3.
+
+ALPHA = 0.05;
+ALPHA_MULTIP = 2 / ALPHA;
+
+MSIS<-function(forecL,forecH,actual) {
+	sumDiffs=0
+	for (i in 1:(length(actual)-seasonality)) {
+		sumDiffs=sumDiffs+abs(actual[i+seasonality]-actual[i])
+	}
+	avgAbsDiff=sumDiffs/(length(actual)-seasonality)
+	
+	actual=actual[(length(actual)-LBACK*horizon+1):(length(actual)-(LBACK-1)*horizon)]
+	
+	msis=sum(forecH-forecL)+sum(pmax(0,forecL-actual))*ALPHA_MULTIP+sum(pmax(0,actual-forecH))*ALPHA_MULTIP
+	msis/horizon/avgAbsDiff
+}
+errorFunc=MSIS
+
+spInfo_df=m4Info_df[m4Info_df$SP==SP,]
+ids=spInfo_df$M4id
+horizon=spInfo_df[1,"Horizon"]
+seasonality=spInfo_df[1,"Frequency"]
+
+
+#lower
+#VARIABLE + "_" + to_string(seedForChunks) + "_" + to_string(chunkNo) + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
+inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*LLB",LBACK), full.names = T)
+if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) {
+	stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS")
+}
+
+comp_df=NULL
+fil=inputFiles[1]
+for (fil in inputFiles) {
+  print(fil)
+	c_df=read.csv(fil, header=F)
+	comp_df=rbind(comp_df,c_df)
+} 
+names(comp_df)[1]='id'
+
+forecSeries=sort(unique(comp_df$id))
+if (length(forecSeries)!=length(ids) && LBACK==0) {
+	stop(paste0("Expected number of cases:",length(ids)," but got:",length(forecSeries)))
+}
+
+SIZE_OF_CHUNK=1000
+out_df=NULL; ou_df=NULL
+fSeries=forecSeries[1]
+for (fSeries in forecSeries) {
+	oneSeriesForecs_df=comp_df[comp_df$id==fSeries,]
+	o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)])
+	o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F)
+	ou_df=rbind(ou_df, o_df)
+	if (nrow(ou_df)>=SIZE_OF_CHUNK) {
+		out_df=rbind(out_df,ou_df)
+		ou_df=NULL
+		print(nrow(out_df))
+	}
+}
+out_df=rbind(out_df,ou_df)
+print(nrow(out_df))
+out_df=out_df[order(as.integer(substring(out_df$id, 2))),]
+
+outPath=paste0(FOREC_DIR,'\\',SP,"ForecL.csv")
+write.csv(out_df,file=outPath,row.names = F)
+
+lower_df=out_df
+
+#####################################
+#higher
+inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*HLB",LBACK), full.names = T)
+if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) {
+	stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS")
+}
+
+comp_df=NULL
+fil=inputFiles[1]
+for (fil in inputFiles) {
+	print(fil)
+	c_df=read.csv(fil, header=F)
+	comp_df=rbind(comp_df,c_df)
+} 
+names(comp_df)[1]='id'
+
+forecSeries=sort(unique(comp_df$id))
+if (length(forecSeries)!=length(ids) && LBACK==0) {
+	print(paste0("Warning. Expected number of cases:",length(ids)," but got:",length(forecSeries)))
+}
+
+SIZE_OF_CHUNK=1000
+out_df=NULL; ou_df=NULL
+fSeries=forecSeries[1]
+for (fSeries in forecSeries) {
+	oneSeriesForecs_df=comp_df[comp_df$id==fSeries,]
+	o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)])
+	o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F)
+	ou_df=rbind(ou_df, o_df)
+	if (nrow(ou_df)>=SIZE_OF_CHUNK) {
+		out_df=rbind(out_df,ou_df)
+		ou_df=NULL
+		print(nrow(out_df))
+	}
+}
+out_df=rbind(out_df,ou_df)
+print(nrow(out_df))
+out_df=out_df[order(as.integer(substring(out_df$id, 2))),]
+
+outPath=paste0(FOREC_DIR,'\\',SP,"ForecH.csv")
+write.csv(out_df,file=outPath,row.names = F)
+
+higher_df=out_df
+
+
+################ Main work done, now just diagnostics calculations and plots
+
+#display a sample of forecasts and, if LBACK>0,  actuals
+MAX_NUM_OF_POINTS_TO_SHOW=200
+i=1
+for (i in 1:100) {
+	irand=sample(1:length(forecSeries),1)
+	fSeries=forecSeries[irand]
+	forecL=as.numeric(lower_df[lower_df$id==fSeries,2:ncol(lower_df)])
+	forecH=as.numeric(higher_df[higher_df$id==fSeries,2:ncol(higher_df)])
+	actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
+	actual=actual[!is.na(actual)]
+	if (length(actual)>MAX_NUM_OF_POINTS_TO_SHOW) {
+		actual=actual[(length(actual)-MAX_NUM_OF_POINTS_TO_SHOW):length(actual)]	
+	}
+	if (LBACK==0) {
+		plot(c(actual,forecH), col=c(rep(1,length(actual)),rep(2,length(forecH))), main=fSeries)
+    lines(c(actual,forecL), col=c(rep(1,length(actual)),rep(3,length(forecL))), type='p')		
+	} else {
+		ymin=min(actual,forecL)
+		ymax=max(actual,forecH)
+		plot(1:length(actual),actual, main=fSeries, ylim=c(ymin,ymax))
+		lines((length(actual)-length(forecH)+1):length(actual), forecH, col=2, type='p')
+		lines((length(actual)-length(forecL)+1):length(actual), forecL, col=3, type='p')
+	}
+	
+	Sys.sleep(5)
+}
+
+
+
+#calc error metric: MSIS
+if (LBACK>0) {
+	summErrors=0
+	fSeries=forecSeries[1]
+	i=1
+	for (fSeries in forecSeries) {
+		if (i%%1000==0)
+			cat(".")
+		forecL=as.numeric(lower_df[lower_df$id==fSeries,2:ncol(lower_df)])
+		forecH=as.numeric(higher_df[higher_df$id==fSeries,2:ncol(higher_df)])
+		actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
+		actual=actual[!is.na(actual)]
+		summErrors=summErrors+errorFunc(forecL, forecH, actual)
+		i=i+1
+	}
+	print(".")
+	print(paste0("avg error:",round(summErrors/length(forecSeries),2)))
+}
+
+
diff --git a/118 - slaweks17/R/readme.txt b/118 - slaweks17/R/readme.txt
new file mode 100644
index 0000000..013d8f7
--- /dev/null
+++ b/118 - slaweks17/R/readme.txt	
@@ -0,0 +1,8 @@
+When the c++ workers run, they output results (forecasts) to a directory or two. 
+(Sorry occasionally two directories are filled, in such case first "manually" put all the output files to a single dir)
+These scripts merge them into one file and save it, show a sample of graphs, and if this is backtesting run (LBACK>0), calculate some accuracy metrics.
+
+Both scripts needs to be updated with your input, output dirs, and other params, see inside, there are a lot of comments there.
+
+merge.R is meant to be used for point forecst runs, so for ES_RNN and ES_RNN_E programs.
+mergePI.R - for Prediction Interval runs, so for ES_RNN_PI and ES_RNN_E_PI programs.
diff --git a/118 - slaweks17/c++/ES_RNN.cc b/118 - slaweks17/c++/ES_RNN.cc
new file mode 100644
index 0000000..43dc358
--- /dev/null
+++ b/118 - slaweks17/c++/ES_RNN.cc	
@@ -0,0 +1,1193 @@
+/*ES-RNN: ES-RNN Exponential Smoothing Recurrent Neural Network hybrid. Point forecast.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention.
+It is meant to be used for Monthly and Quarterly series of M4 competition, becasue the DE (Diversified Ensemble) version is too slow.
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+It has to be invoked in pair of executables, passing at least two integers: seedForChunks, chunkNo
+so e.g. create a script with following lines on Windows
+start <this_executable> 10 1
+start <this_executable> 10 2
+Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers:
+# start <this_executable> 10 1 0
+# start <this_executable> 10 2 0
+# start <this_executable> 20 1 5
+# start <this_executable> 20 2 5
+# start <this_executable> 30 1 10
+# start <this_executable> 30 2 10
+seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2. 
+We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP.
+Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, 
+so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R.
+By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - 
+a decent number to ensemble (in a separate R script).
+
+There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. 
+The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. 
+The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. 
+So, you need comment/uncomment to have one block of interest active.
+
+
+*/
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+#include <fstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+string VARIABLE = "Quarterly";
+const string run = "50/45 (1,2),(4,8), LR=0.001/{10,1e-4f}, EPOCHS=15, LVP=80 40*"; 
+const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 45;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE.
+
+vector<vector<unsigned>> dilations={{1,2},{4,8}};//Each vector represents one chunk of Dilateed LSTMS, connected in standard resnNet fashion
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM  //so for Quarterly series, we do not use either the more advanced residual connections nor attention.
+const bool ADD_NL_LAYER=false;  //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer
+
+const float INITIAL_LEARNING_RATE = 0.001f;
+const map<int, float> LEARNING_RATES = { { 10,1e-4f } }; //at which epoch we set them up to what
+const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate.
+
+const int NUM_OF_TRAIN_EPOCHS = 15;
+const unsigned int STATE_HSIZE = 40;
+
+const int SEASONALITY = 4;
+const unsigned int INPUT_SIZE = 4;
+const int INPUT_SIZE_I= INPUT_SIZE;
+const unsigned int OUTPUT_SIZE = 8;
+const int OUTPUT_SIZE_I= OUTPUT_SIZE;
+const int MIN_INP_SEQ_LEN = 0;
+const float LEVEL_VARIABILITY_PENALTY = 80;  //Multiplier for L" penalty against wigglines of level vector. Important.
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2;
+const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+
+
+
+/*
+string VARIABLE = "Monthly";
+const string run = "50/49 Res (1,3,6,12), LR=5e-4 {12,1e-4f}, EPOCHS=10, 20*";  
+const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+#define USE_RESIDUAL_LSTM  //so for Monthly we use only one block, so no standard resNet shortcuts, but instead but of the special residual shortcuts, after https://arxiv.org/abs/1701.03360.
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;  //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer
+
+vector<vector<unsigned>> dilations={{1,3,6,12}};//so for Monthly we use only one block, so no standard resNet shortcut
+const float INITIAL_LEARNING_RATE = 5e-4;
+const map<int, float> LEARNING_RATES = { { 12,1e-4f } }; //at which epoch we set them up to what
+const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate.
+
+const int NUM_OF_TRAIN_EPOCHS = 10;
+const unsigned int STATE_HSIZE = 50;
+
+const float LEVEL_VARIABILITY_PENALTY = 50;  //Multiplier for L" penalty against wigglines of level vector.
+
+const int SEASONALITY = 12;
+const unsigned int OUTPUT_SIZE = 18;
+const unsigned int INPUT_SIZE = 12;
+const int INPUT_SIZE_I= INPUT_SIZE;
+const int OUTPUT_SIZE_I= OUTPUT_SIZE;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2;
+const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+*/
+
+
+/*
+string VARIABLE = "Daily";
+const string run = "50/49  NL LRMult=1.5, 3/5 (1,7,28) LR=3e-4 {9,1e-4f} EPOCHS=15, LVP=100 HSIZE=40 20w";
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = true;
+
+const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const int TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 7;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,7,28 } };
+
+const float INITIAL_LEARNING_RATE = 3e-4;
+const map<int, float> LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1.5;
+const int NUM_OF_TRAIN_EPOCHS = 15;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector.
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 7;
+const int INPUT_SIZE_I = INPUT_SIZE;
+const unsigned int OUTPUT_SIZE = 14;
+const int OUTPUT_SIZE_I = OUTPUT_SIZE;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH;  //we are chopping longer series, to max of last 20 years
+*/
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 40;
+#else
+  const int MAX_NUM_OF_SERIES = -1; //use all series
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;//in data provided
+const int BIG_LOOP = 3;
+const int NUM_OF_CHUNKS = 2;
+const float EPS=1e-6;
+const int AVERAGING_LEVEL=5;
+const bool USE_MEDIAN = false;
+const int MIDDLE_POS_FOR_AVG = 2; //if using medians
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=20;
+const float C_STATE_PENALTY = 0;
+
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN=true;
+const float TAU = PERCENTILE / 100.;
+const float TRAINING_TAU = TRAINING_PERCENTILE / 100.;
+const unsigned ATTENTION_HSIZE=STATE_HSIZE;
+
+const bool USE_AUTO_LEARNING_RATE=false;
+//if USE_AUTO_LEARNING_RATE, and only if LBACK>0
+const float MIN_LEARNING_RATE = 0.0001f;
+const float LR_RATIO = sqrt(10);
+const float LR_TOLERANCE_MULTIP = 1.005;
+const int L3_PERIOD = 2;
+const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2;
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+    if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE_I) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) { //chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+
+struct AdditionalParams {//Per series, important
+  Parameter levSm;
+  Parameter sSm;
+  array<Parameter, SEASONALITY> initSeasonality;
+};
+
+struct AdditionalParamsF {//Used for storing diagnostics
+  float levSm;
+  float sSm;
+  array<float, SEASONALITY> initSeasonality;
+  vector<float> levels;
+  vector<float> seasons;
+};
+
+
+Expression pinBallLoss(const Expression& out_ex, const Expression& actuals_ex) {//used by Dynet, learning loss function
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = pick(out_ex, indx);
+    auto actual = pick(actuals_ex, indx);
+    if (as_scalar(actual.value()) > as_scalar(forec.value()))
+      losses.push_back((actual - forec)*TRAINING_TAU);
+    else
+      losses.push_back((actual - forec)*(TRAINING_TAU - 1));
+  }
+  return sum(losses) / OUTPUT_SIZE * 2;
+}
+
+
+//weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0; float suma=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    suma+= abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*TAU;
+    else
+      sumf = sumf + (actual - forec)*(TAU - 1);
+  }
+  return sumf / suma * 200;
+}
+
+//used just for diagnostics, if LBACK>0 and PERCENTILE==50
+float sMAPE(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    sumf+=abs(forec-actual)/(abs(forec)+abs(actual));
+  }
+  return sumf / OUTPUT_SIZE * 200;
+}
+
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect) {
+  if (PERCENTILE==50)
+    return sMAPE(out_vect, actuals_vect);
+  else
+    return wQuantLoss(out_vect, actuals_vect);
+}
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples.
+  int chunkNo = 1;
+  int ibigOffset = 0;
+  if (argc >= 3) {
+    seedForChunks = atoi(argv[1]);
+    chunkNo = atoi(argv[2]);
+  } 
+  if (argc >= 4)
+	  ibigOffset = atoi(argv[3]);
+
+  if (chunkNo > NUM_OF_CHUNKS) {
+    cerr << "chunkNo > NUM_OF_CHUNKS";
+    exit(-1);
+  }
+  else if (chunkNo <= 0) {
+    cerr << "chunkNo <= 0";
+    exit(-1);
+  }
+
+  cout<<VARIABLE<<" "<<run<<endl;
+  std::cout << "seed:" << seedForChunks << " chunk no:" << chunkNo;
+  if (ibigOffset>0) 
+    std::cout<< " ibigOffset:"<< ibigOffset;  //if continuing prematurely stopped run
+  if (LBACK>0) 
+    std::cout<<" lback:"<<LBACK;
+  std::cout<<endl;
+
+   if  (USE_AUTO_LEARNING_RATE && LBACK == 0) {
+    cerr<<"Can't use auto learning rate when LBACK==0";
+    exit(-1);
+   }
+
+ 
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;  //so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+    
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister)
+  mt19937 rngForChunks(seedForChunks);
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+
+  int series_len=(int)series_vect.size();
+  int chunkSize= series_len/NUM_OF_CHUNKS;
+  std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize<<endl;
+  uniform_int_distribution<int> uniOnSeries(0, chunkSize -1);  // closed interval [a, b]
+  
+  unordered_map<string, array<vector<float>, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5);
+  set<string> diagSeries;
+  
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) { //the loop :-)
+	  int ibigDb= ibigOffset+ibig;
+    string outputPath = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(seedForChunks) + "_"+ to_string(chunkNo)+"_"+ to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+
+#if defined USE_ODBC      
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif
+
+    ParameterCollection pc;
+    ParameterCollection perSeriesPC;
+
+    float learning_rate= INITIAL_LEARNING_RATE;
+    AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS);
+    trainer.clip_threshold = GRADIENT_CLIPPING;
+    AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+    perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING;  
+    
+    #if defined USE_RESIDUAL_LSTM
+      vector<ResidualDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      vector<AttentiveDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+       vector<DilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+    Parameter MLPW_par,MLPB_par;
+    if (ADD_NL_LAYER) { 
+      MLPW_par = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+      MLPB_par = pc.add_parameters({ STATE_HSIZE });
+    }
+    Parameter adapterW_par = pc.add_parameters({ OUTPUT_SIZE, STATE_HSIZE });
+    Parameter adapterB_par = pc.add_parameters({ OUTPUT_SIZE });
+
+    shuffle(series_vect.begin(), series_vect.end(), rngForChunks);//this shuffling is psudo random (it uses the same seed) so it is synchronized across pairs of workers
+    auto start= series_vect.begin()+ (chunkNo-1)*chunkSize;
+    auto end= start+ chunkSize;
+    if (chunkNo== NUM_OF_CHUNKS)
+      end = series_vect.end();
+    vector<string> oneChunk_vect(start,end);
+    if (PRINT_DIAGN) {
+      for (int k = 0; k<10; k++)  //diag
+        cout << oneChunk_vect[k] << " ";
+      cout << endl;
+    }  
+    if (chunkNo == NUM_OF_CHUNKS)
+      cout<<"last chunk size:"<< oneChunk_vect.size()<<endl;
+
+    unordered_map<string, AdditionalParams> additionalParams_map((int)oneChunk_vect.size()*1.5); //per series
+    unordered_map<string, array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup
+      string series = *iter;
+      AdditionalParams addParams;
+      addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5);  //level smoothing
+      addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5);    //seasonality smoothing
+      for (int isea = 0; isea<SEASONALITY; isea++)
+        addParams.initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);  //initial seasonality (over first SEASONALITY points)
+      additionalParams_map[series] = addParams;
+
+      historyOfAdditionalParams_map[series] = new array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>();
+    }
+    
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      if (!USE_AUTO_LEARNING_RATE && LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        trainer.learning_rate = LEARNING_RATES.at(iEpoch);
+        perSeriesTrainer.learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+        cout << "changing LR to:" << trainer.learning_rate << endl;
+      }
+
+      vector<float> testLosses; //test losses of all series in this epoch
+      vector<float> testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch 
+      vector<float> trainingLosses; //training losses of all series in one epoch
+      vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+      #if defined USE_ODBC
+      TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+      
+      for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) {
+        string series=*iter;
+        auto m4Obj = allSeries_map[series];
+
+        #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+        #endif
+      
+        ComputationGraph cg;
+         for (int il=0; il<dilations.size(); il++) {
+           rNNStack[il].new_graph(cg);
+           rNNStack[il].start_new_sequence(); 
+         }
+          
+        Expression MLPW_ex, MLPB_ex;
+        if (ADD_NL_LAYER) {   
+          MLPW_ex = parameter(cg, MLPW_par);
+          MLPB_ex = parameter(cg, MLPB_par);
+        }
+        Expression adapterW_ex=parameter(cg, adapterW_par);
+        Expression adapterB_ex=parameter(cg, adapterB_par);
+
+        auto additionalParams= additionalParams_map[series];
+        Expression levSm_ex = logistic(parameter(cg, additionalParams.levSm));  //level smoothing
+		    Expression sSm_ex = logistic(parameter(cg, additionalParams.sSm)); //seasonality smoothing
+
+			  vector<Expression> season_exVect;//vector, because we do not know how long the series is
+			  for (int iseas=0; iseas<SEASONALITY; iseas++){
+			    Expression seas=exp(parameter(cg, additionalParams.initSeasonality[iseas]));
+			    //so, when additionalParams_map[series].initSeasonality[iseas]==0 => seas==1
+			    season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+			  }
+			  season_exVect.push_back(season_exVect[0]);
+
+			  vector<Expression> logDiffOfLevels_vect;
+        vector<Expression> levels_exVect;
+			  Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+			  levels_exVect.push_back(lev);
+        for (int i=1; i<m4Obj.vals.size();i++) {  //Exponential Smoothing-style deseasonalization and smoothing
+			    Expression newLevel_ex=m4Obj.vals[i]*cdiv(levSm_ex,season_exVect[i]) + (1-levSm_ex)*levels_exVect[i-1];
+			    levels_exVect.push_back(newLevel_ex);
+			    Expression diff_ex=log(cdiv(newLevel_ex,levels_exVect[i-1]));//penalty for wiggliness of level
+			    logDiffOfLevels_vect.push_back(diff_ex);
+
+			    Expression newSeason_ex=m4Obj.vals[i]*cdiv(sSm_ex,newLevel_ex) + (1-sSm_ex)*season_exVect[i];
+			    season_exVect.push_back(newSeason_ex);
+        }
+         
+        Expression levelVarLoss_ex;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          vector<Expression> levelVarLoss_v;
+          for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+            Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+            levelVarLoss_v.push_back(diff_ex*diff_ex);
+          }
+          levelVarLoss_ex = average(levelVarLoss_v);
+        }
+
+			  //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+			  if (OUTPUT_SIZE_I>SEASONALITY) {
+			    unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY;
+			    for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++)
+			      season_exVect.push_back(season_exVect[startSeasonalityIndx+i]);
+			  }
+        vector<Expression> losses;
+        for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { 
+			    vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I;
+			    vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			    vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex=input(cg,{INPUT_SIZE},input_vect);
+			    Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);
+          joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+          }  catch (exception& e) {
+            cerr<<"cought exception 2 while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            cerr <<as_vector(input_ex.value())<<endl;
+          }
+          Expression out_ex;
+          if (ADD_NL_LAYER) {
+            out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+            out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+          } else 
+            out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+          //labels
+			    firstE = season_exVect.begin() +i+1;
+			    pastLastE = season_exVect.begin() +i+1+OUTPUT_SIZE_I;
+			    vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect);
+
+          first = m4Obj.vals.begin() +i+1;
+          pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I;
+          vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+          Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect);
+			    Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization
+          labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization
+			    Expression labels_ex=squash(labels1_ex);
+
+          Expression loss_ex=pinBallLoss(out_ex, labels_ex);
+          if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN)
+            losses.push_back(loss_ex);  
+        }
+        
+        Expression forecLoss_ex= average(losses);
+			  Expression loss_exp = forecLoss_ex;
+
+        float levVarLoss=0;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+          levVarLoss = as_scalar(levelVarLossP_ex.value());
+          levVarLosses.push_back(levVarLoss);
+          loss_exp= loss_exp + levelVarLossP_ex;
+        }
+
+        float cStateLoss=0;
+        if (C_STATE_PENALTY>0) {
+          vector<Expression> cStateLosses_vEx;
+          for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+            for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+              auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+              Expression penalty_ex = square(state_ex);
+              cStateLosses_vEx.push_back(sum_elems(penalty_ex));
+            }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        trainingLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+        forecLosses.push_back(forecastLoss);
+
+        cg.backward(loss_exp);
+        try {
+          trainer.update();//update shared weights
+          perSeriesTrainer.update();  //apdate params of this series only
+        } catch (exception& e) {  //long diagnostics for this unlikely event :-)
+          cerr<<"cought exception while doing "<<series<<endl;
+          cerr << e.what() << endl;
+
+            float minSeason = BIG_FLOAT;
+            cout << "season:";
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val = as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+
+            float minLevel = BIG_FLOAT;
+            cout << "levels:";
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax = 0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx = rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state = as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax = it;
+                      layerOfMax = il;
+                      chunkOfMax = irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            cout << " min season=" << minSeason << endl;
+            cout << " min level=" << minLevel << endl;
+            cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl;
+
+            //diagSeries.insert(series);
+          pc.reset_gradient();
+          perSeriesPC.reset_gradient();
+        }
+
+        //saving per-series values for diagnostics purposes
+        AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch);
+        histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+        histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+			  for (int isea=0; isea<SEASONALITY; isea++)
+			    histAdditionalParams.initSeasonality[isea]=as_scalar(season_exVect[isea].value());    
+		    if (iEpoch==1 || iEpoch == NUM_OF_TRAIN_EPOCHS /2 || iEpoch == NUM_OF_TRAIN_EPOCHS-1)
+          for (int iv = 0; iv<m4Obj.vals.size(); iv++) {
+            histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+          }
+          
+        //TEST. We walk (without learning) till end of the series. At the last point, the output is taken as the forecast
+        for (int i=(m4Obj.n - OUTPUT_SIZE_I); i<m4Obj.n; i++) {
+          vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I;
+          vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one
+          vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+          Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect);
+          Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization
+          joinedInput_ex.emplace_back(squash(input1_ex));
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+          }
+          catch (exception& e) {
+            cerr << "cought exception 2 while doing " << series << endl;
+            cerr << e.what() << endl;
+            cerr << as_vector(input_ex.value()) << endl;
+          }
+          if (i== m4Obj.n-1) {//make forecast
+            firstE = season_exVect.begin() + i + 1;
+            pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE_I;
+            vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+            Expression outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+            
+            out_ex = cmult(expand(out_ex), outputSeasonality_ex)*levels_exVect[i];//back to original scale
+            vector<float> out_vect = as_vector(out_ex.value());
+
+            if (LBACK > 0) {
+              float qLoss = errorFunc(out_vect, m4Obj.testVals);
+              testLosses.push_back(qLoss);
+            }
+
+            testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect;
+            if (iEpoch >= AVERAGING_LEVEL) {
+              if (USE_MEDIAN) {
+                if (testResults_map[series][AVERAGING_LEVEL].size() == 0)
+                  testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten
+                for (int iii = 0; iii < OUTPUT_SIZE_I; iii++) {
+                  vector<float> temp_vect2;
+                  for (int ii = 0; ii<AVERAGING_LEVEL; ii++)
+                    temp_vect2.push_back(testResults_map[series][ii][iii]);
+                  sort(temp_vect2.begin(), temp_vect2.end());
+                  testResults_map[series][AVERAGING_LEVEL][iii] = temp_vect2[MIDDLE_POS_FOR_AVG];
+                }
+              }
+              else {
+                vector<float> firstForec = testResults_map[series][0];
+                testResults_map[series][AVERAGING_LEVEL] = firstForec;
+                for (int ii = 1; ii<AVERAGING_LEVEL; ii++) {
+                  vector<float> nextForec = testResults_map[series][ii];
+                  for (int iii = 0; iii<OUTPUT_SIZE_I; iii++)
+                    testResults_map[series][AVERAGING_LEVEL][iii] += nextForec[iii];
+                }
+                for (int iii = 0; iii<OUTPUT_SIZE_I; iii++)
+                  testResults_map[series][AVERAGING_LEVEL][iii] /= AVERAGING_LEVEL;
+              }
+
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals);
+                testAvgLosses.push_back(qLoss);
+                
+                #if defined USE_ODBC       //save
+                TRYODBC(hInsertStmt,
+                  SQL_HANDLE_STMT,
+                  SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL));
+
+                for (int io = 0; io < OUTPUT_SIZE_I; io++) {
+                  int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io;
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL));
+
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io], 0, NULL));
+                }
+                if (MAX_NUM_OF_SERIES<0)
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLExecute(hInsertStmt));
+                #endif    
+              }
+            } //time to average
+          }//last anchor point of the series
+        }//through TEST loop        
+      }//through series
+
+  
+      if (iEpoch % FREQ_OF_TEST == 0) {
+        float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size();
+
+        cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forecast loss:" << averageForecLoss*100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+
+        float averageTestLoss=0;
+        if (LBACK > 0) {
+          float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size();
+          cout<<" Test loss:" << averageTestLoss;
+          if (iEpoch >= AVERAGING_LEVEL) {
+            float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch
+            cout << " avgLoss:" << averageTestAvgLoss;
+          }
+          if (USE_AUTO_LEARNING_RATE)
+            perfValid_vect.push_back(averageTestLoss);
+        }
+        cout << endl;
+      }
+      
+      if (USE_AUTO_LEARNING_RATE) {
+        bool changeL2Rate = false;
+        if (iEpoch >= 2) {
+          if (iEpoch < L3_PERIOD)
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - 2]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+          else
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - L3_PERIOD - 1]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+        }
+
+        if (changeL2Rate && learning_rate > MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) {
+          learning_rate /= LR_RATIO;
+          cout << "decreasing LR to:" << learning_rate << endl;
+          epochOfLastChangeOfLRate = iEpoch;
+          trainer.learning_rate = learning_rate;
+        }
+      }
+      #if defined USE_ODBC 
+      TRYODBC(hDbc,
+        SQL_HANDLE_DBC,
+        SQLEndTran(
+          SQL_HANDLE_DBC,
+          hDbc,
+          SQL_COMMIT));
+      #endif    
+    }//through epochs
+
+    if (PRINT_DIAGN) {//some diagnostic info
+      set<string> diagSeries;
+      for (int i = 0; i<1; i++) {//add a few normal ones
+        int irand = uniOnSeries(rng);
+        diagSeries.insert(oneChunk_vect[irand]);
+      }
+      for (auto series : diagSeries) {
+        cout << endl << series << endl;
+        array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series];
+        cout << "lSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levSm << " ";
+        cout << endl;
+        cout << "sSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).sSm << " ";
+        cout << endl;
+        cout << "seasons:" << endl;
+        for (int isea = 0; isea<SEASONALITY; isea++) {
+          for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).initSeasonality[isea] << " ";
+          cout << endl;
+        }
+        cout << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size()>0) {
+            cout << "levels:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", ";
+            cout << endl;
+            cout << "seas:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", ";
+            cout << endl;
+          }
+        }
+      }
+    }
+
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPath);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE_I; io++)
+        outputFile << ", "<< testResults_map[series][AVERAGING_LEVEL][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+
+
+    //delete
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      auto addHistArr_ptr= historyOfAdditionalParams_map[series];
+      delete addHistArr_ptr;
+    }
+  }//ibig
+}//main
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/ES_RNN_E.cc b/118 - slaweks17/c++/ES_RNN_E.cc
new file mode 100644
index 0000000..aaf4659
--- /dev/null
+++ b/118 - slaweks17/c++/ES_RNN_E.cc	
@@ -0,0 +1,1665 @@
+/*ES-RNN-E: Exponential Smoothing Recurrent Neural Network hybrid, Ensemble of specialists. Point forecast.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention. Non-seasonal, single, or double seasonal.
+It is meant to be used for all types of series from M4 competition, except Monthly and Quarterly (for performance reasons - it is slower).
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+In contradistinction to ES-RNN, each executable uses all series, but in a similar manner repeating the whole learning process BIG_LOOP times (by default 3).
+Invocation should pass BIG_LOOP offset
+so e.g. create a script with following lines on Windows
+start <this_executable> 0
+start <this_executable> 10
+start <this_executable> 20
+start <this_executable> 30
+on 4-core computer.
+In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble.
+Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1.
+(Creating final forecasts is done in a supplied R script)
+
+There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive.
+These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active.
+*/
+
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+//#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+string VARIABLE = "Hourly";
+const string run = "50/49 Att 4/5 1,4)(24,168) LR=0.01,{7,5e-3f},{18,1e-3f},{22,3e-4f} EPOCHS=27, LVP=10, CSP=1";
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 24;
+const int SEASONALITY2 = 168;
+vector<vector<unsigned>> dilations = { { 1,4 },{ 24, 168 } };
+
+const float INITIAL_LEARNING_RATE = 0.01f;
+const map<int, float> LEARNING_RATES = { { 7,5e-3f },{ 18,1e-3f },{ 22,3e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 27;
+
+float LEVEL_VARIABILITY_PENALTY = 10;  //Multiplier for L" penalty against wigglines of level vector.
+const float C_STATE_PENALTY = 1;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 24;
+const unsigned int OUTPUT_SIZE = 48;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH;  //==all
+const int TOPN = 4;
+
+
+/*
+string VARIABLE = "Weekly";
+const string run = "50/47 Att 3/5 (1,52) LR=1e-3  {11,3e-4f}, {17,1e-4f} EPOCHS=23, LVP=100 6y";
+
+const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const int TRAINING_PERCENTILE = 47;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 52;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1, 52 } };
+
+const float INITIAL_LEARNING_RATE = 1e-3;
+const map<int, float> LEARNING_RATES = { { 11,3e-4f },{ 17,1e-4f } }; //at which epoch we manually set them up to what
+const int NUM_OF_TRAIN_EPOCHS = 23;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+const float PER_SERIES_LR_MULTIP = 1;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 10;
+const unsigned int OUTPUT_SIZE = 13;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#81     380     935    1023    1604    2598
+const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH;  //==all
+const int TOPN = 3;
+*/
+
+/*
+string VARIABLE = "Daily";
+const string run = "Final 50/49 730 4/5 (1,3)(7,14) LR=3e-4 {9,1e-4f} EPOCHS=13, LVP=100 13w";
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const int TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 7;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,3 },{ 7, 14 } };
+
+const float INITIAL_LEARNING_RATE = 3e-4;
+const map<int, float> LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 13;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 7;
+const unsigned int OUTPUT_SIZE = 14;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //##93     323    2940    2357    4197    9919 
+const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+/*
+string VARIABLE = "Yearly";
+const string run = "50 Att 4/5 (1,6) LR=1e-4  EPOCHS=12, 60*";
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 50;  
+
+const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 0;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,6 } };
+
+const float INITIAL_LEARNING_RATE = 1e-4;
+const map<int, float> LEARNING_RATES = { { 15,1e-5 } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 12;
+
+float LEVEL_VARIABILITY_PENALTY = 0;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 30;
+
+const unsigned int INPUT_SIZE = 4;
+const unsigned int OUTPUT_SIZE = 6;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#13.00   20.00   29.00   31.32   40.00  835.00
+const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+//end of VARIABLE-specific params
+
+const int BIG_LOOP = 3;
+const int NUM_OF_NETS = 5;
+const unsigned int ATTENTION_HSIZE = STATE_HSIZE;
+
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 20;
+#else
+  const int MAX_NUM_OF_SERIES = -1;
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;
+const int AVERAGING_LEVEL = 5;
+const float EPS=1e-6;
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=50;
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN = false;
+const float TAU = PERCENTILE / 100.;
+const float TRAINING_TAU = TRAINING_PERCENTILE / 100.; 
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+float squash(float x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+float expand(float x) {
+  return exp(x);
+}
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+    if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) {//chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+#if defined USE_ODBC        
+void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+  SQLSMALLINT    hType,
+  RETCODE        RetCode);
+#endif 
+
+
+
+struct AdditionalParams {//Per series, important
+    Parameter levSm;
+    Parameter sSm;
+    array<Parameter, SEASONALITY> initSeasonality;
+    Parameter sSm2;
+    array<Parameter, SEASONALITY2> initSeasonality2;
+};
+struct AdditionalParamsF {//Used for storing diagnostics
+    float levSm;
+    float sSm;
+    array<float, SEASONALITY> initSeasonality;
+    float sSm2;
+    array<float, SEASONALITY2> initSeasonality2;
+    vector<float> levels;
+    vector<float> seasons;
+    vector<float> seasons2;
+};
+  
+
+array<int, NUM_OF_NETS> perfToRanking (array<float, NUM_OF_NETS> perf_arr) {
+  array<int, NUM_OF_NETS> index;
+  
+  for (int itop=0; itop<TOPN; itop++) {
+    float currMin=BIG_FLOAT; int indexOfMin=-1;
+    for (int i=0; i<NUM_OF_NETS; i++) {
+      if (perf_arr[i]<currMin) {
+        currMin=perf_arr[i];
+        indexOfMin=i;
+      }
+    }
+    index[itop]=indexOfMin;
+    perf_arr[indexOfMin]=BIG_FLOAT;
+  }
+  return index;
+}
+
+
+Expression pinBallLoss(const Expression& out_ex, const Expression& actuals_ex) {//used by Dynet
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = pick(out_ex, indx);
+    auto actual = pick(actuals_ex, indx);
+    if (as_scalar(actual.value()) > as_scalar(forec.value()))
+      losses.push_back((actual - forec)*TRAINING_TAU);
+    else
+      losses.push_back((actual - forec)*(TRAINING_TAU - 1));
+  }
+  return sum(losses) / OUTPUT_SIZE * 2;
+}
+
+
+// weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0; float suma=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    suma+= abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*TAU;
+    else
+      sumf = sumf + (actual - forec)*(TAU - 1);
+  }
+  return sumf / suma * 200;
+}
+
+//used just for diagnostics, if LBACK>0 and PERCENTILE==50
+float sMAPE(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    sumf+=abs(forec-actual)/(abs(forec)+abs(actual));
+  }
+  return sumf / OUTPUT_SIZE * 200;
+}
+
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect) {
+  if (PERCENTILE==50)
+    return sMAPE(out_vect, actuals_vect);
+  else
+    return wQuantLoss(out_vect, actuals_vect);
+}
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int ibigOffset = 0;
+  if (argc == 2)
+    ibigOffset = atoi(argv[1]);
+    
+  cout << VARIABLE<<" "<<run << " Lback=" << LBACK << endl;
+  cout << "ibigOffset:"<< ibigOffset<<endl;
+
+  if (SEASONALITY_NUM <= 0 && LEVEL_VARIABILITY_PENALTY > 0) {
+    cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<<endl;
+    LEVEL_VARIABILITY_PENALTY=0;
+  }
+  
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;//so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+   
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister in this case)
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 24k for yearly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+  cout << "num of series:" << series_vect.size() << endl;
+
+  unsigned int series_len=(unsigned int)series_vect.size();
+  uniform_int_distribution<int> uniOnSeries(0,series_len-1);  // closed interval [a, b]
+  uniform_int_distribution<int> uniOnNets(0,NUM_OF_NETS-1);  // closed interval [a, b]
+  
+  unordered_map<string, array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc...
+  unordered_map<string, vector<float>> finalResults_map((int)series_len*1.5);//per series
+  set<string> diagSeries;
+  
+  unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map;
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) {
+  	int ibigDb= ibigOffset+ibig;
+    string outputPath = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+    
+#if defined USE_ODBC        
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif 
+  
+    //create nets
+    array<ParameterCollection, NUM_OF_NETS> paramsCollection_arr;//per net
+    array<ParameterCollection, NUM_OF_NETS> perSeriesParamsCollection_arr;//per net
+    array<AdamTrainer*, NUM_OF_NETS> trainers_arr;
+    array<AdamTrainer*, NUM_OF_NETS> perSeriesTrainers_arr;
+    
+
+    #if defined USE_RESIDUAL_LSTM
+      array<vector<ResidualDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #elif defined USE_ATTENTIVE_LSTM
+      array<vector<AttentiveDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #else
+      array<vector<DilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #endif
+
+    array<Parameter, NUM_OF_NETS> MLPW_parArr;
+    array<Parameter, NUM_OF_NETS> MLPB_parArr;
+    array<Parameter, NUM_OF_NETS> adapterW_parArr;
+    array<Parameter, NUM_OF_NETS> adapterB_parArr;
+    
+    //this is not a history, this is the real stuff
+    unordered_map<string, array<AdditionalParams, NUM_OF_NETS>* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      additionalParams_mapOfArr[series]=new array<AdditionalParams, NUM_OF_NETS>();
+    }
+    
+    for (int inet=0; inet<NUM_OF_NETS; inet++) {
+      ParameterCollection& pc=paramsCollection_arr[inet];
+      ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+      
+      trainers_arr[inet]=new AdamTrainer (pc, INITIAL_LEARNING_RATE, 0.9, 0.999, EPS);
+      trainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+      perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+      perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+            
+    auto& rNNStack=rnnStack_arr[inet];
+    #if defined USE_RESIDUAL_LSTM
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+      if (ADD_NL_LAYER) { 
+        MLPW_parArr[inet] = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+        MLPB_parArr[inet] = pc.add_parameters({ STATE_HSIZE });
+      }
+  	  adapterW_parArr[inet]=pc.add_parameters({OUTPUT_SIZE, STATE_HSIZE});
+  	  adapterB_parArr[inet]=pc.add_parameters({OUTPUT_SIZE});
+      
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        array<AdditionalParams, NUM_OF_NETS>*  additionalParams_arr=additionalParams_mapOfArr[series];
+        additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net
+        if (SEASONALITY_NUM > 0) {
+          additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY; isea++)
+            additionalParams_arr->at(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+        if (SEASONALITY_NUM > 1) {
+          additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY2; isea++)
+            additionalParams_arr->at(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+      }
+    }//seting up, through nets
+    
+    //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS]
+    unordered_map<string, array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5);
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      historyOfAdditionalParams_map[series]=new array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>();
+    }
+    
+    //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted
+    array<vector<string>, NUM_OF_NETS> seriesAssignment;//every net has an array
+    for (int j=0; j<NUM_OF_NETS/2; j++)
+      for (int i=0; i<series_len; i++) {
+        int inet=uniOnNets(rng);
+        seriesAssignment[inet].push_back(series_vect[i]);
+      }
+    
+    //nesting: ibig
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+    
+      clock_t begin_time = clock();
+      unordered_map<string, array<float, NUM_OF_NETS>> netPerf_map;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {  //Parellalize here, if you can :-)
+        //initialize perf matrix
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          netPerf_map[series][inet]=BIG_FLOAT;
+        }
+        
+        ParameterCollection& pc=paramsCollection_arr[inet];
+        auto& trainer=trainers_arr[inet];    
+        ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+        auto& perSeriesTrainer=perSeriesTrainers_arr[inet];
+        
+      	if (LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        		trainer->learning_rate = LEARNING_RATES.at(iEpoch);
+        		if (inet==0)
+        		  cout << "changing LR to:" << trainer->learning_rate << endl;
+        		perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+      	}
+
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+        
+        vector<string> oneNetAssignments=seriesAssignment[inet];
+        random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end());
+        
+        vector<float> epochLosses;
+        vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+        for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+        
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+
+					Expression MLPW_ex,MLPB_ex;
+          if (ADD_NL_LAYER)  {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+		      vector<Expression> logDiffOfLevels_vect;
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//Exponential Smoothing-style deseasonalization and smoothing
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1]));//penalty for wiggliness of level
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1]));
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+		     
+          Expression levelVarLoss_ex;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            vector<Expression> levelVarLoss_v;
+            for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+              Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+              levelVarLoss_v.push_back(diff_ex*diff_ex);
+            }
+            levelVarLoss_ex = average(levelVarLoss_v);
+          }
+			   
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { 
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+
+            first = m4Obj.vals.begin() + i + 1;
+            pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+            vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+            Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+              firstE = season_exVect.begin() + i + 1;
+              pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+              outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+
+              firstE = season2_exVect.begin() + i + 1;
+              pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+            if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+                losses.push_back(loss_ex); 
+          }//through points of a series
+
+          Expression forecLoss_ex= average(losses);
+			    Expression loss_exp = forecLoss_ex;
+			    
+          float levVarLoss=0;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+            levVarLoss = as_scalar(levelVarLossP_ex.value());
+            levVarLosses.push_back(levVarLoss);
+            loss_exp= loss_exp + levelVarLossP_ex;
+          }
+
+          float cStateLoss=0;
+          if (C_STATE_PENALTY>0) {
+            vector<Expression> cStateLosses_vEx;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+              for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+                auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+                Expression penalty_ex = square(state_ex);
+                cStateLosses_vEx.push_back(mean_elems(penalty_ex));
+              }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        epochLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+          forecLosses.push_back(forecastLoss);
+        
+          cg.backward(loss_exp);
+          try {
+          trainer->update();//update shared weights
+          perSeriesTrainer->update();  //update params of this series only
+        } catch (exception& e) {  //long diagnostics for this unlikely event :-)
+            cerr<<"cought exception while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            
+            float minSeason=BIG_FLOAT;
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val= as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason=val;
+            }  
+            cout << "min season:"<<minSeason<<endl;
+
+            minSeason = BIG_FLOAT;
+            for (int isea = 0; isea < season2_exVect.size(); isea++) {
+              float val = as_scalar(season2_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+            cout << "min season2:"<<minSeason<<endl;
+
+            float minLevel = BIG_FLOAT;
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+            cout << "min level:"<<minLevel<<endl;
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax=0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx= rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state=as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax=it;
+                      layerOfMax=il;
+                      chunkOfMax= irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 0) 
+              cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 1) 
+              cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl;
+            cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax<<endl;
+
+            //diagSeries.insert(series);
+            pc.reset_gradient();
+            perSeriesPC.reset_gradient();
+          }
+
+          //diagnostics saving
+          AdditionalParamsF histAdditionalParams;
+          histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+          if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+            for (int iv = 0; iv<levels_exVect.size(); iv++) {
+              histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            }
+          }
+
+          if (SEASONALITY_NUM > 0) {
+            histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+            for (int isea = 0; isea<SEASONALITY; isea++)
+              histAdditionalParams.initSeasonality[isea] = as_scalar(season_exVect[isea].value());
+
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season_exVect.size(); iv++) {
+                histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+              }
+            }
+          }
+         
+          if (SEASONALITY_NUM > 1) {
+            histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value());
+		        for (int isea=0; isea<SEASONALITY2; isea++) 
+			        histAdditionalParams.initSeasonality2[isea]=as_scalar(season2_exVect[isea].value());   
+               
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season2_exVect.size(); iv++) {
+                histAdditionalParams.seasons2.push_back(as_scalar(season2_exVect[iv].value()));
+              }
+            }
+          }     
+
+          historyOfAdditionalParams_arr[iEpoch]=histAdditionalParams;
+        }//through series
+
+        float averageLoss = accumulate( epochLosses.begin(), epochLosses.end(), 0.0)/epochLosses.size();
+        cout << ibig << " " << iEpoch << " " << inet << " count:" << oneNetAssignments.size() << " loss:" << averageLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forec loss:" << averageForecLoss * 100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+        cout<<endl;
+      }//through nets. This should be done in parallel. One day it will, when Dynet allows it.
+      cout << (clock() - begin_time) / CLOCKS_PER_SEC<<"s"<<endl;
+
+
+      //Validation. We just save outputs of all nets on all series
+      //We can't attach validation to training, because training happens across subset of series*nets, and we need to store results from all of these combinations, for future use
+      //level: epoch, but we do not use the epoch value, we overwrite
+      begin_time = clock();
+      for (int inet=0; inet<NUM_OF_NETS; inet++) { //through _all_ nets. Paralellize here.
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {//through _all_ series.
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          Expression MLPW_ex, MLPB_ex;
+          if (ADD_NL_LAYER) {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+          
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//if lback>0 then this is shortened, so it always contains data awe have right to access
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+
+
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          Expression out_ex;//we declare it here, bcause the last one will be the forecast
+          for (int i=INPUT_SIZE-1; i<m4Obj.n; i++) {
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            if (i<(m4Obj.n- OUTPUT_SIZE)) {//calc perf on training area
+              vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1;
+              vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+              vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+              Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+              if (SEASONALITY_NUM > 0) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+              }
+              if (SEASONALITY_NUM > 1) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+              }
+              Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+          	  Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+          	  if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+          			  losses.push_back(loss_ex);  //training area losses
+            }
+            
+            if (i==(m4Obj.n-1)) {//validation loss
+            	out_ex=expand(out_ex)*levels_exVect[i];//back to original scale
+							if (SEASONALITY_NUM > 0 ) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                out_ex = cmult(out_ex, outputSeasonality_ex);//reseasonalize
+              }
+            	if (SEASONALITY_NUM > 1 ) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+            		out_ex = cmult(out_ex, outputSeasonality2_ex);//reseasonalize
+              }
+                //we do not need the matching label here, because we do not bother calculate valid losses of each net across all series.
+                //We care about best and topn performance
+            }
+          }//end of going through all point of a series
+          
+          Expression loss_exp = average(losses);
+          float loss = as_scalar(cg.forward(loss_exp));//training loss of a single series
+          netPerf_map[series][inet]=loss;
+          
+          //unordered_map<string, array<array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc...
+          //No epoch here, because this will just reflect the current (latest) situation - the last few epochs
+          vector<float> out_vect=as_vector(out_ex.value());
+          testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect;
+          if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) {
+            vector<float> firstForec=testResults_map[series][inet][0];
+            testResults_map[series][inet][AVERAGING_LEVEL]=firstForec;
+            for (int ii=1; ii<AVERAGING_LEVEL; ii++) {
+              vector<float> nextForec=testResults_map[series][inet][ii];
+              for (int iii=0; iii<OUTPUT_SIZE; iii++)
+                testResults_map[series][inet][AVERAGING_LEVEL][iii]+=nextForec[iii];
+            }
+            for (int iii=0; iii<OUTPUT_SIZE; iii++)
+              testResults_map[series][inet][AVERAGING_LEVEL][iii]/=AVERAGING_LEVEL;
+          } //time to average
+        }//through series
+      } //through nets
+      cout << (clock() - begin_time) / CLOCKS_PER_SEC << "s" << endl;
+      
+      if (iEpoch>0 && iEpoch % FREQ_OF_TEST==0) {
+        //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch.
+        vector<float> bestEpochLosses;
+        vector<float> bestEpochAvgLosses;
+        vector<float> topnEpochLosses;
+        vector<float> topnEpochAvgLosses;
+        
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+#if defined USE_ODBC        
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+#endif 
+          
+          float avgLoss;
+          vector<float> avgLatest;
+          vector<float> avgAvg;
+          
+          for (int itop=0; itop<TOPN; itop++) {
+            int inet=netRanking_map[series][itop];
+            
+            if (itop==0) {
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals);
+                bestEpochLosses.push_back(qLoss);
+              }
+              avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL];  //used later for calculating topn loss
+              
+              if (iEpoch>=AVERAGING_LEVEL) {
+                if (LBACK > 0) {
+                  float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals);
+                  bestEpochAvgLosses.push_back(qLoss);
+                }
+                avgAvg=testResults_map[series][inet][AVERAGING_LEVEL];
+              }
+            } else {
+              for (int iii=0; iii<OUTPUT_SIZE; iii++) {
+                avgLatest[iii]+=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL][iii];//calculate current topn
+                if (iEpoch>=AVERAGING_LEVEL)
+                  avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii];
+              }
+            }
+          }//through topn
+          
+          for (int iii=0; iii<OUTPUT_SIZE; iii++)
+	          avgLatest[iii]/=TOPN;
+          if (LBACK > 0) {
+            float qLoss = errorFunc(avgLatest, m4Obj.testVals);
+            topnEpochLosses.push_back(qLoss);
+          }
+          
+          if (iEpoch>=AVERAGING_LEVEL) {
+            for (int iii = 0; iii<OUTPUT_SIZE; iii++) 
+              avgAvg[iii] /= TOPN;
+            finalResults_map[series] = avgAvg;
+
+            if (LBACK > 0) {
+#if defined USE_ODBC        
+              TRYODBC(hInsertStmt,
+                SQL_HANDLE_STMT,
+                SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL));
+       
+              for (int iii=0; iii<OUTPUT_SIZE; iii++) {              
+               int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*iii;
+               TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[iii], 0, NULL));
+
+               TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgAvg[iii], 0, NULL));
+              }
+              TRYODBC(hInsertStmt,
+               SQL_HANDLE_STMT,
+               SQLExecute(hInsertStmt));
+#endif 
+              float qLoss = errorFunc(avgAvg, m4Obj.testVals);
+              topnEpochAvgLosses.push_back(qLoss);
+            }
+          }
+        }//through series
+        if (LBACK > 0) {
+          float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size();
+          float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size();
+          cout<<ibig<<" "<<iEpoch<<" VALID best:"<<bestEpochLoss<<" topn:"<<topnEpochLoss;
+          if (iEpoch>=AVERAGING_LEVEL) {
+            float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size();
+            float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size();
+            cout<<" bestAvg:"<<bestEpochAvgLoss<<" topnAvg:"<<topnEpochAvgLoss<<endl;
+          } else
+            cout<<endl;
+        }
+      }//time to report
+      
+      //assign
+      for (int inet=0; inet<NUM_OF_NETS; inet++)
+        seriesAssignment[inet].clear();
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        //unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map
+        netRanking_map[series]=perfToRanking(netPerf_map[series]);
+        
+        for (int itop=0; itop<TOPN; itop++) {
+          int inet=netRanking_map[series][itop];
+          seriesAssignment[inet].push_back(series); //every net has a set
+        }
+      }
+      
+      //check and fix degenerations
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        if (seriesAssignment[inet].size()==0) {
+          cout<<"Resetting "<<inet<<endl;
+          for (int i=0; i<series_len/2; i++) {
+            int irand=uniOnSeries(rng);
+            seriesAssignment[inet].push_back(series_vect[irand]);
+          }
+        }
+      }
+#if defined USE_ODBC  
+      TRYODBC(hDbc,
+      SQL_HANDLE_DBC,
+      SQLEndTran(
+        SQL_HANDLE_DBC,
+        hDbc,
+        SQL_COMMIT));
+#endif
+    }//through epochs of RNN
+    
+    //some diagnostic info
+    set<string> diagSeries;
+    for (int i=0; i<1; i++) {//add a few normal ones
+      int irand=uniOnSeries(rng);
+      diagSeries.insert(series_vect[irand]);
+    }
+    for(auto series : diagSeries) {
+      cout<<endl<<series<<endl;
+      
+      cout<<"lSm:"<<endl;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+    	auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].levSm<<" ";
+        cout<<endl;
+      }
+      
+      if (SEASONALITY_NUM > 0 ) {
+        cout<<"sSm:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	    auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm<<" ";
+          cout<<endl;
+        }
+      }  
+      
+      if (SEASONALITY_NUM > 1 ) {
+        cout<<"sSm2:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	  auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm2<<" ";
+        cout<<endl;
+        }
+      }
+      
+      for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+        auto& historyOfAdditionalParams_arr = historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_arr[iEpoch].levels.size()>0) {
+            cout << "levels:" << iEpoch<<" ";
+            for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+              cout << historyOfAdditionalParams_arr[iEpoch].levels[iv] << ", ";
+            cout << endl;
+            if (SEASONALITY_NUM > 0 ) {
+              cout << "seasons:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons[iv] << ", ";
+              cout << endl;
+            }
+            if (SEASONALITY_NUM > 1 ) {
+              cout << "seasons2:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons2[iv] << ", ";
+              cout << endl;
+            }
+          }
+        }
+      }
+    }//end of diag printing
+    
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPath);
+    for (auto iter = series_vect.begin(); iter != series_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE; io++)
+        outputFile << ", " << finalResults_map[series][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+    
+    
+    //delete    
+    for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+      delete trainers_arr[inet];
+      perSeriesTrainers_arr[inet];
+    }
+
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      delete additionalParams_mapOfArr[series];
+      delete historyOfAdditionalParams_map[series];
+    }
+    additionalParams_mapOfArr.clear();
+    historyOfAdditionalParams_map.clear();
+  }//big loop
+}//main
+
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/ES_RNN_E_PI.cc b/118 - slaweks17/c++/ES_RNN_E_PI.cc
new file mode 100644
index 0000000..e9729d5
--- /dev/null
+++ b/118 - slaweks17/c++/ES_RNN_E_PI.cc	
@@ -0,0 +1,1744 @@
+/*ES-RNN-E: Exponential Smoothing Recurrent Neural Network hybrid, Ensemble of specialists. Prediction Intervals forecast.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention. Non-seasonal, single, or double seasonal.
+It is meant to be used for all types of series from M4 competition, except Monthly and Quarterly (for performance reasons - Ensamble of Specilists is slower).
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+In contradistinction to ES-RNN, each executable uses all series, but in a similar manner repeating the whole learning process BIG_LOOP times (by default 3).
+Invocation should pass BIG_LOOP offset
+so e.g. create a script with following lines on Windows
+start <this_executable> 0
+start <this_executable> 10
+start <this_executable> 20
+start <this_executable> 30
+on 4-core computer.
+In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble.
+Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1.
+(Creating final forecasts is done in a supplied R script)
+
+There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive.
+These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active.
+*/
+
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+//#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+
+string VARIABLE = "Hourly";
+const string run0 = "(1,4)(24,168) LR=0.01, {25,3e-3f} EPOCHS=37, LVP=10, CSP=0";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 24;
+const int SEASONALITY2 = 168;
+vector<vector<unsigned>> dilations = { { 1,4 },{ 24, 168 } };
+
+const float INITIAL_LEARNING_RATE = 0.01f;
+const map<int, float> LEARNING_RATES = { { 20,1e-3f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 37;
+
+float LEVEL_VARIABILITY_PENALTY = 10;  //Multiplier for L" penalty against wigglines of level vector.
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 24;
+const unsigned int OUTPUT_SIZE = 48;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH;  //==all
+const int TOPN = 4;
+
+
+/*
+string VARIABLE = "Weekly";
+const string run0 = "Att 4/5 (1,52) LR=1e-3 {15,3e-4f} EPOCHS=31, LVP=100 6y";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 52;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1, 52 } };
+
+const float INITIAL_LEARNING_RATE = 1e-3;
+const map<int, float> LEARNING_RATES = { { 15,3e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 31;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 10;
+const unsigned int OUTPUT_SIZE = 13;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#81     380     935    1023    1604    2598
+const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+/*
+
+string VARIABLE = "Daily";
+const string run0 = "4/5 (1,3)(7,14) LR=3e-4 {13,1e-4f} EPOCHS=21, LVP=100 13w";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER=false;
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 7;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,3 },{ 7, 14 } };
+
+const float INITIAL_LEARNING_RATE = 3e-4;
+const map<int, float> LEARNING_RATES = { { 13,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 21;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 7;
+const unsigned int OUTPUT_SIZE = 14;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //##93     323    2940    2357    4197    9919 
+const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+/*
+string VARIABLE = "Yearly";
+const string run0 = "Att NL 4/5 (1,6) LR=1e-4 {17,3e-5}{22,1e-5} EPOCHS=29, 60*";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = true;
+
+const int SEASONALITY_NUM = 0; //0 means no seasonality
+const int SEASONALITY = 1; //for no seasonality, set it to 1, important
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,6 } };
+
+const float INITIAL_LEARNING_RATE = 1e-4;
+const map<int, float> LEARNING_RATES = { { 17,3e-5 },{ 22,1e-5 } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 29;
+
+float LEVEL_VARIABILITY_PENALTY = 0;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 30;
+
+const unsigned int INPUT_SIZE = 4;
+const unsigned int OUTPUT_SIZE = 6;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#13.00   20.00   29.00   31.32   40.00  835.00
+const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+const float ALPHA = 0.05;
+const float TAUL = ALPHA / 2;
+const float TAUH = 1 - TAUL;
+const float ALPHA_MULTIP = 2 / ALPHA;
+
+const int BIG_LOOP = 3;
+const int NUM_OF_NETS = 5;
+const unsigned ATTENTION_HSIZE = STATE_HSIZE;
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 20;
+#else
+  const int MAX_NUM_OF_SERIES = -1;
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;
+const int AVERAGING_LEVEL = 5;
+const float EPS=1e-6;
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=50;
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN = false;
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+float squash(float x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+float expand(float x) {
+  return exp(x);
+}
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  float meanAbsSeasDiff;
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+
+    meanAbsSeasDiff = 0;
+    float sumf = 0;
+    for (int ip = SEASONALITY; ip<vals.size(); ip++) {
+      float diff = vals[ip] - vals[ip - SEASONALITY];
+      sumf += abs(diff);
+    }
+    if (sumf>0)
+      meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY);
+
+    if (LBACK > 0) {  //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) {//chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+#if defined USE_ODBC        
+void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+  SQLSMALLINT    hType,
+  RETCODE        RetCode);
+#endif 
+
+struct AdditionalParams {//Per series, important
+    Parameter levSm;
+    Parameter sSm;
+    array<Parameter, SEASONALITY> initSeasonality;
+    Parameter sSm2;
+    array<Parameter, SEASONALITY2> initSeasonality2;
+};
+struct AdditionalParamsF {//Used for storing diagnostics
+    float levSm;
+    float sSm;
+    array<float, SEASONALITY> initSeasonality;
+    float sSm2;
+    array<float, SEASONALITY2> initSeasonality2;
+    vector<float> levels;
+    vector<float> seasons;
+    vector<float> seasons2;
+};
+  
+
+array<int, NUM_OF_NETS> perfToRanking (array<float, NUM_OF_NETS> perf_arr) {
+  array<int, NUM_OF_NETS> index;
+  
+  for (int itop=0; itop<TOPN; itop++) {
+    float currMin=BIG_FLOAT; int indexOfMin=-1;
+    for (int i=0; i<NUM_OF_NETS; i++) {
+      if (perf_arr[i]<currMin) {
+        currMin=perf_arr[i];
+        indexOfMin=i;
+      }
+    }
+    index[itop]=indexOfMin;
+    perf_arr[indexOfMin]=BIG_FLOAT;
+  }
+  return index;
+}
+
+//loss function
+Expression MSIS(const Expression& out_ex, const Expression& actuals_ex) {
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = pick(out_ex, indx);
+    auto forecH = pick(out_ex, indx+ OUTPUT_SIZE);
+    auto actual = pick(actuals_ex, indx);
+    float actualf= as_scalar(actual.value());
+
+    Expression loss= forecH - forecL;
+    if (actualf< as_scalar(forecL.value()))
+      loss=loss+(forecL - actual)*ALPHA_MULTIP;
+    if (actualf > as_scalar(forecH.value()))
+      loss = loss + (actual - forecH)*ALPHA_MULTIP;
+    losses.push_back(loss);
+  }
+  return sum(losses) / OUTPUT_SIZE;
+}
+
+// weighted quantile Loss
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+  float sumf = 0; float suma = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx+ offset];
+    auto actual = actuals_vect[indx];
+    suma += abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*tau;
+    else
+      sumf = sumf + (actual - forec)*(tau - 1);
+  }
+  return sumf / suma * 200;
+}
+
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect, float meanAbsSeasDiff) {
+  float sumf=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = out_vect[indx];
+    auto forecH = out_vect[indx + OUTPUT_SIZE];
+    auto actualf = actuals_vect[indx];
+
+    float loss = forecH - forecL;
+    if (actualf< forecL)
+      loss = loss + (forecL - actualf)*ALPHA_MULTIP;
+    if (actualf > forecH)
+      loss = loss + (actualf - forecH)*ALPHA_MULTIP;
+    sumf+=loss;
+  }
+  return sumf / (OUTPUT_SIZE*meanAbsSeasDiff);
+}
+
+
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int ibigOffset = 0;
+  if (argc == 2)
+    ibigOffset = atoi(argv[1]);
+    
+  cout<<VARIABLE<<" "<<runL<<endl;
+  cout << runH << " Lback=" << LBACK << endl;
+  cout << "ibigOffset:"<< ibigOffset<<endl;
+
+  if (SEASONALITY_NUM <= 0 && LEVEL_VARIABILITY_PENALTY > 0) {
+    cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<<endl;
+    LEVEL_VARIABILITY_PENALTY=0;
+  }
+  
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+  convert2 << int(ALPHA * 100);
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;//so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+    
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister in this case)
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 24k for yearly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      if (m4Obj.meanAbsSeasDiff==0) {
+        cout<<"Warning, flat series:"<<series<<endl;
+        m4Obj.meanAbsSeasDiff= m4Obj.testVals[0]/100;
+      }
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+  cout << "num of series:" << series_vect.size() << endl;
+
+  unsigned int series_len=(unsigned int)series_vect.size();
+  uniform_int_distribution<int> uniOnSeries(0,series_len-1);  // closed interval [a, b]
+  uniform_int_distribution<int> uniOnNets(0,NUM_OF_NETS-1);  // closed interval [a, b]
+  
+  unordered_map<string, array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc...
+  unordered_map<string, vector<float>> finalResults_map((int)series_len*1.5);//per series
+  set<string> diagSeries;
+  
+  unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map;
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) {
+  	int ibigDb= ibigOffset+ibig;
+    string outputPathL = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(ibigDb)+"_LLB"+ to_string(LBACK)+ ".csv";
+    string outputPathH = OUTPUT_DIR + '/' + VARIABLE + "_" + to_string(ibigDb) + "_HLB" + to_string(LBACK) + ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+    
+#if defined USE_ODBC        
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif 
+  
+    //create nets
+    array<ParameterCollection, NUM_OF_NETS> paramsCollection_arr;//per net
+    array<ParameterCollection, NUM_OF_NETS> perSeriesParamsCollection_arr;//per net
+    array<AdamTrainer*, NUM_OF_NETS> trainers_arr;
+    array<AdamTrainer*, NUM_OF_NETS> perSeriesTrainers_arr;
+    
+
+    #if defined USE_RESIDUAL_LSTM
+      array<vector<ResidualDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #elif defined USE_ATTENTIVE_LSTM
+      array<vector<AttentiveDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #else
+      array<vector<DilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #endif
+
+    array<Parameter, NUM_OF_NETS> MLPW_parArr;
+    array<Parameter, NUM_OF_NETS> MLPB_parArr;
+    array<Parameter, NUM_OF_NETS> adapterW_parArr;
+    array<Parameter, NUM_OF_NETS> adapterB_parArr;
+    
+    //this is not a history, this is the real stuff
+    unordered_map<string, array<AdditionalParams, NUM_OF_NETS>* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      additionalParams_mapOfArr[series]=new array<AdditionalParams, NUM_OF_NETS>();
+    }
+    
+    for (int inet=0; inet<NUM_OF_NETS; inet++) {
+      ParameterCollection& pc=paramsCollection_arr[inet];
+      ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+      
+      trainers_arr[inet]=new AdamTrainer (pc, INITIAL_LEARNING_RATE, 0.9, 0.999, EPS);
+      trainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+      perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+      perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+            
+    auto& rNNStack=rnnStack_arr[inet];
+    #if defined USE_RESIDUAL_LSTM
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+      if (ADD_NL_LAYER) { 
+        MLPW_parArr[inet] = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+        MLPB_parArr[inet] = pc.add_parameters({ STATE_HSIZE });
+      }
+  	  adapterW_parArr[inet]=pc.add_parameters({OUTPUT_SIZE*2, STATE_HSIZE});
+  	  adapterB_parArr[inet]=pc.add_parameters({OUTPUT_SIZE*2});
+      
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        array<AdditionalParams, NUM_OF_NETS>*  additionalParams_arr=additionalParams_mapOfArr[series];
+        additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net
+        if (SEASONALITY_NUM > 0) {
+          additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY; isea++)
+            additionalParams_arr->at(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+        if (SEASONALITY_NUM > 1) {
+          additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY2; isea++)
+            additionalParams_arr->at(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+      }
+    }//seting up, through nets
+    
+    //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS]
+    unordered_map<string, array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5);
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      historyOfAdditionalParams_map[series]=new array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>();
+    }
+    
+    //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted
+    array<vector<string>, NUM_OF_NETS> seriesAssignment;//every net has an array
+    for (int j=0; j<NUM_OF_NETS/2; j++)
+      for (int i=0; i<series_len; i++) {
+        int inet=uniOnNets(rng);
+        seriesAssignment[inet].push_back(series_vect[i]);
+      }
+    
+    //nesting: ibig
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+    
+      unordered_map<string, array<float, NUM_OF_NETS>> netPerf_map;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {  //Parellalize here, if you can :-)
+        //initialize perf matrix
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          netPerf_map[series][inet]=BIG_FLOAT;
+        }
+        
+        ParameterCollection& pc=paramsCollection_arr[inet];       
+        auto& trainer=trainers_arr[inet];
+        ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+        auto& perSeriesTrainer=perSeriesTrainers_arr[inet];
+        
+      	if (LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        		trainer->learning_rate = LEARNING_RATES.at(iEpoch);
+        		if (inet==0)
+        		  cout << "changing LR to:" << trainer->learning_rate << endl;
+        		perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+      	}
+
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+        
+        vector<string> oneNetAssignments=seriesAssignment[inet];
+        random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end());
+        
+        vector<float> epochLosses;
+        vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+        for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+        
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+
+					Expression MLPW_ex,MLPB_ex;
+          if (ADD_NL_LAYER)  {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+		      vector<Expression> logDiffOfLevels_vect;
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//Exponential Smoothing-style deseasonalization and smoothing
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1])); //penalty for wiggliness of level
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1]));
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+		     
+          Expression levelVarLoss_ex;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            vector<Expression> levelVarLoss_v;
+            for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+              Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+              levelVarLoss_v.push_back(diff_ex*diff_ex);
+            }
+            levelVarLoss_ex = average(levelVarLoss_v);
+          }
+			   
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { 
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+
+            first = m4Obj.vals.begin() + i + 1;
+            pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+            vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+            Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+              firstE = season_exVect.begin() + i + 1;
+              pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+              outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+
+              firstE = season2_exVect.begin() + i + 1;
+              pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            Expression loss_ex=MSIS(out_ex, labels_ex);
+            //Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+            if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+                losses.push_back(loss_ex); 
+          }//through points of a series
+
+          Expression forecLoss_ex= average(losses);
+			    Expression loss_exp = forecLoss_ex;
+			    
+          float levVarLoss=0;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+            levVarLoss = as_scalar(levelVarLossP_ex.value());
+            levVarLosses.push_back(levVarLoss);
+            loss_exp= loss_exp + levelVarLossP_ex;
+          }
+
+          float cStateLoss=0;
+          if (C_STATE_PENALTY>0) {
+            vector<Expression> cStateLosses_vEx;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+              for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+                auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+                Expression penalty_ex = square(state_ex);
+                cStateLosses_vEx.push_back(mean_elems(penalty_ex));
+              }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        epochLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+          forecLosses.push_back(forecastLoss);
+        
+          cg.backward(loss_exp);
+          try {
+            trainer->update();//update shared weights
+            perSeriesTrainer->update();//update params of this series only
+          } catch (exception& e) {//it may happen occasionally. I believe it is due to not robust enough implementation of squashing functions in Dynet. When abs(x)>35 NAs appear.
+          //so the code below is trying to produce some diagnostics, hopefully useful when setting LEVEL_VARIABILITY_PENALTY and  C_STATE_PENALTY.
+            cerr<<"cought exception while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            
+            float minSeason=BIG_FLOAT;
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val= as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason=val;
+            }  
+            cout << "min season:"<<minSeason<<endl;
+
+            minSeason = BIG_FLOAT;
+            for (int isea = 0; isea < season2_exVect.size(); isea++) {
+              float val = as_scalar(season2_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+            cout << "min season2:"<<minSeason<<endl;
+
+            float minLevel = BIG_FLOAT;
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+            cout << "min level:"<<minLevel<<endl;
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax=0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx= rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state=as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax=it;
+                      layerOfMax=il;
+                      chunkOfMax= irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 0) 
+              cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 1) 
+              cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl;
+            cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax<<endl;
+
+            //diagSeries.insert(series);
+            pc.reset_gradient();
+            perSeriesPC.reset_gradient();
+          }
+
+          //diagnostics saving
+          AdditionalParamsF histAdditionalParams;
+          histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+          if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+            for (int iv = 0; iv<levels_exVect.size(); iv++) {
+              histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            }
+          }
+
+          if (SEASONALITY_NUM > 0) {
+            histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+            for (int isea = 0; isea<SEASONALITY; isea++)
+              histAdditionalParams.initSeasonality[isea] = as_scalar(season_exVect[isea].value());
+
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season_exVect.size(); iv++) {
+                histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+              }
+            }
+          }
+         
+          if (SEASONALITY_NUM > 1) {
+            histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value());
+		        for (int isea=0; isea<SEASONALITY2; isea++) 
+			        histAdditionalParams.initSeasonality2[isea]=as_scalar(season2_exVect[isea].value());   
+               
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season2_exVect.size(); iv++) {
+                histAdditionalParams.seasons2.push_back(as_scalar(season2_exVect[iv].value()));
+              }
+            }
+          }     
+
+          historyOfAdditionalParams_arr[iEpoch]=histAdditionalParams;
+        }//through series
+
+        float averageLoss = accumulate( epochLosses.begin(), epochLosses.end(), 0.0)/epochLosses.size();
+        cout << ibig << " " << iEpoch << " " << inet << " count:" << oneNetAssignments.size() << " loss:" << averageLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forec loss:" << averageForecLoss * 100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+        cout<<endl;
+      }//through nets. This should be done in parallel. One day it will, when Dynet allows it.
+
+
+      //Validation. We just save outputs of all nets on all series
+      //We can't attach validation to training, because training happens across subset of series*nets, and we need to store results from all of these combinations, for future use
+      //level: epoch, but we do not use the epoch value, we overwrite
+      for (int inet=0; inet<NUM_OF_NETS; inet++) { //through _all_ nets. Paralellize here.
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {//through _all_ series.
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          Expression MLPW_ex, MLPB_ex;
+          if (ADD_NL_LAYER) {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+          
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//if lback>0 then this is shortened, so it always contains data awe have right to access
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+
+
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          Expression out_ex;//we declare it here, bcause the last one will be the forecast
+          for (int i=INPUT_SIZE-1; i<m4Obj.n; i++) {
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            if (i<(m4Obj.n- OUTPUT_SIZE)) {//calc perf on training area
+              vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1;
+              vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+              vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+              Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+              if (SEASONALITY_NUM > 0) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+              }
+              if (SEASONALITY_NUM > 1) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;//checking if enough elements is in the vecor was done a few pe
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+              }
+              Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+          	  //Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+              Expression loss_ex = MSIS(out_ex, labels_ex);
+          	  if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+          			  losses.push_back(loss_ex);  //training area losses
+            }
+            
+            if (i==(m4Obj.n-1)) {//validation loss
+            	out_ex=expand(out_ex)*levels_exVect[i];//back to original scale
+							if (SEASONALITY_NUM > 0 ) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                for (int ios=0; ios<OUTPUT_SIZE; ios++) 
+                  outputSeasonality_exVect.push_back(outputSeasonality_exVect[ios]);//we are duplicating it, as we deal with two outputs
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                out_ex = cmult(out_ex, outputSeasonality_ex);//reseasonalize
+              }
+            	if (SEASONALITY_NUM > 1 ) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                for (int ios = 0; ios<OUTPUT_SIZE; ios++)
+                  outputSeasonality2_exVect.push_back(outputSeasonality2_exVect[ios]);//we are duplicating it, as we deal with two outputs
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+            		out_ex = cmult(out_ex, outputSeasonality2_ex);//reseasonalize
+              }
+                //we do not need the matching label here, because we do not bother calculate valid losses of each net across all series.
+                //We care about best and topn performance
+            }
+          }//end of going through all point of a series
+          
+          Expression loss_exp = average(losses);
+          float loss = as_scalar(cg.forward(loss_exp));//training loss of a single series
+          netPerf_map[series][inet]=loss;
+          
+          //unordered_map<string, array<array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc...
+          //No epoch here, because this will just reflect the current (latest) situation - the last few epochs
+          vector<float> out_vect=as_vector(out_ex.value());
+          testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect;
+          if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) {
+            vector<float> firstForec=testResults_map[series][inet][0];
+            testResults_map[series][inet][AVERAGING_LEVEL]=firstForec;
+            for (int ii=1; ii<AVERAGING_LEVEL; ii++) {
+              vector<float> nextForec=testResults_map[series][inet][ii];
+              for (int iii=0; iii<2*OUTPUT_SIZE; iii++)
+                testResults_map[series][inet][AVERAGING_LEVEL][iii]+=nextForec[iii];
+            }
+            for (int iii=0; iii<2*OUTPUT_SIZE; iii++)
+              testResults_map[series][inet][AVERAGING_LEVEL][iii]/=AVERAGING_LEVEL;
+          } //time to average
+        }//through series
+      } //through nets
+      
+      if (iEpoch>0 && iEpoch % FREQ_OF_TEST==0) {
+        //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch.
+        vector<float> bestEpochLosses;
+        vector<float> bestEpochAvgLosses;
+        vector<float> topnEpochLosses;
+        vector<float> topnEpochAvgLosses;
+        vector<float> bestEpochLossesL;
+        vector<float> bestEpochAvgLossesL;
+        vector<float> topnEpochLossesL;
+        vector<float> topnEpochAvgLossesL;
+        vector<float> bestEpochLossesH;
+        vector<float> bestEpochAvgLossesH;
+        vector<float> topnEpochLossesH;
+        vector<float> topnEpochAvgLossesH;
+        
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+#if defined USE_ODBC        
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+#endif 
+          
+          float avgLoss;
+          vector<float> avgLatest;
+          vector<float> avgAvg;
+          
+          for (int itop=0; itop<TOPN; itop++) {
+            int inet=netRanking_map[series][itop];
+            
+            if (itop==0) {
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+                bestEpochLosses.push_back(qLoss);
+
+                qLoss=wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0);
+                bestEpochLossesL.push_back(qLoss);
+
+                qLoss = wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE);
+                bestEpochLossesH.push_back(qLoss);
+              }
+              avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL];  //used later for calculating topn loss
+              
+              if (iEpoch>=AVERAGING_LEVEL) {
+                if (LBACK > 0) {
+                  float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+                  bestEpochAvgLosses.push_back(qLoss);
+
+                  qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0);
+                  bestEpochAvgLossesL.push_back(qLoss);
+
+                  qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE);
+                  bestEpochAvgLossesH.push_back(qLoss);
+                }
+                avgAvg=testResults_map[series][inet][AVERAGING_LEVEL];
+              }
+            } else {
+              for (int iii=0; iii<2*OUTPUT_SIZE; iii++) {
+                avgLatest[iii]+=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL][iii];//calculate current topn
+                if (iEpoch>=AVERAGING_LEVEL)
+                  avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii];
+              }
+            }
+          }//through topn
+          
+          for (int iii=0; iii<2*OUTPUT_SIZE; iii++)
+	          avgLatest[iii]/=TOPN;
+
+          if (LBACK > 0) {
+            float qLoss = errorFunc(avgLatest, m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+            topnEpochLosses.push_back(qLoss);
+
+            qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUL, 0);
+            topnEpochLossesL.push_back(qLoss);
+
+            qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUH, OUTPUT_SIZE);
+            topnEpochLossesH.push_back(qLoss);
+          }
+          
+          if (iEpoch>=AVERAGING_LEVEL) {
+            for (int iii = 0; iii<2*OUTPUT_SIZE; iii++) 
+              avgAvg[iii] /= TOPN;
+
+            finalResults_map[series] = avgAvg;
+
+            if (LBACK > 0) {
+#if defined USE_ODBC        
+              TRYODBC(hInsertStmt,
+                SQL_HANDLE_STMT,
+                SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL));
+          
+              for (int iv=0; iv<2; iv++)  {
+                if (iv==0)
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun))
+                else                                                      
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun));
+                
+                for (int iii=0; iii<OUTPUT_SIZE; iii++) {              
+                  int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*iii;
+                  TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[iii], 0, NULL));
+
+                  TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgAvg[iii+iv*OUTPUT_SIZE], 0, NULL));
+                }
+                TRYODBC(hInsertStmt,
+                 SQL_HANDLE_STMT,
+                 SQLExecute(hInsertStmt));                 
+              }
+#endif               
+              float qLoss = errorFunc(avgAvg, m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+              topnEpochAvgLosses.push_back(qLoss);
+
+              qLoss = wQuantLoss(avgAvg, m4Obj.testVals, TAUL, 0);
+              topnEpochAvgLossesL.push_back(qLoss);
+
+              qLoss = wQuantLoss(avgAvg, m4Obj.testVals, TAUH, OUTPUT_SIZE);
+              topnEpochAvgLossesH.push_back(qLoss);
+            }
+          }
+        }//through series
+        if (LBACK > 0) {
+          float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size();
+          float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size();
+          float bestEpochLossL = accumulate(bestEpochLossesL.begin(), bestEpochLossesL.end(), 0.0) / bestEpochLossesL.size();
+          float topnEpochLossL = accumulate(topnEpochLossesL.begin(), topnEpochLossesL.end(), 0.0) / topnEpochLossesL.size();
+          float bestEpochLossH = accumulate(bestEpochLossesH.begin(), bestEpochLossesH.end(), 0.0) / bestEpochLossesH.size();
+          float topnEpochLossH = accumulate(topnEpochLossesH.begin(), topnEpochLossesH.end(), 0.0) / topnEpochLossesH.size();
+          cout<<ibig<<" "<<iEpoch<<" VALID best:"<<bestEpochLoss<<" L:"<< bestEpochLossL<<" H:"<< bestEpochLossH<<
+            " topn:"<<topnEpochLoss<<" L:"<< topnEpochLossL<<" H:"<< topnEpochLossH;
+          if (iEpoch>=AVERAGING_LEVEL) {
+            float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size();
+            float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size();
+            float bestEpochAvgLossL = accumulate(bestEpochAvgLossesL.begin(), bestEpochAvgLossesL.end(), 0.0) / bestEpochAvgLossesL.size();
+            float topnEpochAvgLossL = accumulate(topnEpochAvgLossesL.begin(), topnEpochAvgLossesL.end(), 0.0) / topnEpochAvgLossesL.size();
+            float bestEpochAvgLossH = accumulate(bestEpochAvgLossesH.begin(), bestEpochAvgLossesH.end(), 0.0) / bestEpochAvgLossesH.size();
+            float topnEpochAvgLossH = accumulate(topnEpochAvgLossesH.begin(), topnEpochAvgLossesH.end(), 0.0) / topnEpochAvgLossesH.size();
+            cout<<" bestAvg:"<<bestEpochAvgLoss<<" L:"<< bestEpochAvgLossL<<" H:"<< bestEpochAvgLossH<<
+              " topnAvg:"<<topnEpochAvgLoss<<" L:"<< bestEpochAvgLossL<<" H:"<< bestEpochAvgLossH<<endl;
+          } else
+            cout<<endl;
+        }
+      }//time to report
+      
+      //assign
+      for (int inet=0; inet<NUM_OF_NETS; inet++)
+        seriesAssignment[inet].clear();
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        //unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map
+        netRanking_map[series]=perfToRanking(netPerf_map[series]);
+        
+        for (int itop=0; itop<TOPN; itop++) {
+          int inet=netRanking_map[series][itop];
+          seriesAssignment[inet].push_back(series); //every net has a set
+        }
+      }
+      
+      //check and fix degenerations
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        if (seriesAssignment[inet].size()==0) {
+          cout<<"Resetting "<<inet<<endl;
+          for (int i=0; i<series_len/2; i++) {
+            int irand=uniOnSeries(rng);
+            seriesAssignment[inet].push_back(series_vect[irand]);
+          }
+        }
+      }
+#if defined USE_ODBC  
+      TRYODBC(hDbc,
+      SQL_HANDLE_DBC,
+      SQLEndTran(
+        SQL_HANDLE_DBC,
+        hDbc,
+        SQL_COMMIT));
+#endif
+    }//through epochs of RNN
+    
+    //some diagnostic info
+    set<string> diagSeries;
+    for (int i=0; i<1; i++) {//add a few normal ones
+      int irand=uniOnSeries(rng);
+      diagSeries.insert(series_vect[irand]);
+    }
+    for(auto series : diagSeries) {
+      cout<<endl<<series<<endl;
+      
+      cout<<"lSm:"<<endl;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+    	auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].levSm<<" ";
+        cout<<endl;
+      }
+      
+      if (SEASONALITY_NUM > 0 ) {
+        cout<<"sSm:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	    auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm<<" ";
+          cout<<endl;
+        }
+      }  
+      
+      if (SEASONALITY_NUM > 1 ) {
+        cout<<"sSm2:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	  auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm2<<" ";
+        cout<<endl;
+        }
+      }
+      
+      for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+        auto& historyOfAdditionalParams_arr = historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_arr[iEpoch].levels.size()>0) {
+            cout << "levels:" << iEpoch<<" ";
+            for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+              cout << historyOfAdditionalParams_arr[iEpoch].levels[iv] << ", ";
+            cout << endl;
+            if (SEASONALITY_NUM > 0 ) {
+              cout << "seasons:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons[iv] << ", ";
+              cout << endl;
+            }
+            if (SEASONALITY_NUM > 1 ) {
+              cout << "seasons2:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons2[iv] << ", ";
+              cout << endl;
+            }
+          }
+        }
+      }
+    }//end of diag printing
+    
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPathL);
+    for (auto iter = series_vect.begin(); iter != series_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE; io++)
+        outputFile << ", " << finalResults_map[series][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+    
+    outputFile.open(outputPathH);
+    for (auto iter = series_vect.begin(); iter != series_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile << series;
+      for (int io = 0; io<OUTPUT_SIZE; io++)
+        outputFile << ", " << finalResults_map[series][io+OUTPUT_SIZE];
+      outputFile << endl;
+    }
+    outputFile.close();
+    
+    //delete    
+    for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+      delete trainers_arr[inet];
+      perSeriesTrainers_arr[inet];
+    }
+
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      delete additionalParams_mapOfArr[series];
+      delete historyOfAdditionalParams_map[series];
+    }
+    additionalParams_mapOfArr.clear();
+    historyOfAdditionalParams_map.clear();
+  }//big loop
+}//main
+
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+		}
+	}
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/ES_RNN_PI.cc b/118 - slaweks17/c++/ES_RNN_PI.cc
new file mode 100644
index 0000000..268c654
--- /dev/null
+++ b/118 - slaweks17/c++/ES_RNN_PI.cc	
@@ -0,0 +1,1246 @@
+/*ES-RNN: ES-RNN Exponential Smoothing Recurrent Neural Network hybrid. Prediction intervals.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention.
+It is meant to be used for Monthly and Quarterly series of M4 competition, becasue the DE (Diversified Ensemble) version is too slow.
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+It has to be invoked in pair of executables, passing at least two integers: seedForChunks, chunkNo
+so e.g. create a script with following lines on Windows
+start <this_executable> 10 1
+start <this_executable> 10 2
+Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers:
+# start <this_executable> 10 1 0
+# start <this_executable> 10 2 0
+# start <this_executable> 20 1 5
+# start <this_executable> 20 2 5
+# start <this_executable> 30 1 10
+# start <this_executable> 30 2 10
+seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2.
+We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP.
+Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, 
+so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R.
+By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - 
+a decent number to ensemble (in a separate, supplied R script).
+
+There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. 
+The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. 
+The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. 
+So, you need comment/uncomment to have one block of interest active.
+
+
+*/
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+#include <fstream>
+#include <sstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+string VARIABLE = "Quarterly";
+const string run0 = "(1,2),(4,8), LR=1e-3/{7,3e-4f},{11,1e-4f}, EPOCHS=16, LVP=200 40*";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+vector<vector<unsigned>> dilations = { { 1,2 },{ 4,8 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion
+const float INITIAL_LEARNING_RATE = 1e-3f;
+//else
+const map<int, float> LEARNING_RATES = { { 7,3e-4f },{ 11,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate.
+
+const float ALPHA = 0.05;
+const float TAUL = ALPHA / 2;
+const float TAUH = 1 - TAUL;
+const float ALPHA_MULTIP = 2 / ALPHA;
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;  //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer
+
+const int NUM_OF_TRAIN_EPOCHS = 16;
+const unsigned int STATE_HSIZE = 40;
+
+const int SEASONALITY = 4;
+const unsigned int INPUT_SIZE = 4;
+const int INPUT_SIZE_I = INPUT_SIZE;
+const unsigned int OUTPUT_SIZE = 8;
+const int OUTPUT_SIZE_I = OUTPUT_SIZE;
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2;
+const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+
+const float LEVEL_VARIABILITY_PENALTY = 200;  //Multiplier for L" penalty against wigglines of level vector. 
+
+
+/*
+string VARIABLE = "Monthly";
+const string run0 = "Res(1,3,6,12), LR=1e-3 {8,3e-4f},{13,1e-4f}, EPOCHS=14, LVP=50, 20*";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+vector<vector<unsigned>> dilations = { { 1,3,6,12 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion^M
+const float INITIAL_LEARNING_RATE = 1e-3f;
+const map<int, float> LEARNING_RATES = { { 8,3e-4f },{ 13,1e-4f } }; //at which epoch we set them up to what^M
+const float PER_SERIES_LR_MULTIP = 1;
+
+const int NUM_OF_TRAIN_EPOCHS = 14;
+const unsigned int STATE_HSIZE = 50;
+
+const float LEVEL_VARIABILITY_PENALTY = 50;  //Multiplier for L" penalty against wigglines of level vector.
+
+const int SEASONALITY = 12;
+const unsigned int OUTPUT_SIZE = 18;
+const unsigned int INPUT_SIZE = 12;
+const int INPUT_SIZE_I = INPUT_SIZE;
+const int OUTPUT_SIZE_I = OUTPUT_SIZE;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2;
+const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+
+const float ALPHA = 0.05;
+const float TAUL = ALPHA / 2;
+const float TAUH = 1 - TAUL;
+const float ALPHA_MULTIP = 2 / ALPHA;
+*/
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 40;
+#else
+  const int MAX_NUM_OF_SERIES = -1; //use all series
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;//in data provided
+const int BIG_LOOP = 3;
+const int NUM_OF_CHUNKS = 2;
+const float EPS=1e-6;
+const int AVERAGING_LEVEL=5;
+const bool USE_MEDIAN = false;
+const int MIDDLE_POS_FOR_AVG = 2; //if using medians
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=20;
+const float C_STATE_PENALTY = 0;
+
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN=true;
+const unsigned ATTENTION_HSIZE=STATE_HSIZE;
+
+const bool USE_AUTO_LEARNING_RATE=false;
+//if USE_AUTO_LEARNING_RATE, and only if LBACK>0
+const float MIN_LEARNING_RATE = 0.0001f;
+const float LR_RATIO = sqrt(10);
+const float LR_TOLERANCE_MULTIP = 1.005;
+const int L3_PERIOD = 2;
+const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2;
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  float meanAbsSeasDiff;
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+
+    meanAbsSeasDiff = 0;
+    float sumf = 0;
+    for (int ip = SEASONALITY; ip<vals.size(); ip++) {
+      float diff = vals[ip] - vals[ip - SEASONALITY];
+      sumf += abs(diff);
+    }
+    if (sumf>0)
+      meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY);
+
+    if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE_I) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) {//chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+
+struct AdditionalParams {//Per series, important
+  Parameter levSm;
+  Parameter sSm;
+  array<Parameter, SEASONALITY> initSeasonality;
+};
+
+struct AdditionalParamsF {//Used for storing diagnostics
+  float levSm;
+  float sSm;
+  array<float, SEASONALITY> initSeasonality;
+  vector<float> levels;
+  vector<float> seasons;
+};
+
+//loss function
+Expression MSIS(const Expression& out_ex, const Expression& actuals_ex) {
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = pick(out_ex, indx);
+    auto forecH = pick(out_ex, indx+ OUTPUT_SIZE);
+    auto actual = pick(actuals_ex, indx);
+    float actualf= as_scalar(actual.value());
+
+    Expression loss= forecH - forecL;
+    if (actualf< as_scalar(forecL.value()))
+      loss=loss+(forecL - actual)*ALPHA_MULTIP;
+    if (actualf > as_scalar(forecH.value()))
+      loss = loss + (actual - forecH)*ALPHA_MULTIP;
+    losses.push_back(loss);
+  }
+  Expression ret = sum(losses) / OUTPUT_SIZE;
+  #if defined _DEBUG
+  float retf = as_scalar(ret.value());
+  if (retf>100) {
+    vector<float> out_vect = as_vector(out_ex.value());
+    vector<float> actuals_vect = as_vector(actuals_ex.value());
+    for (int i = 0; i<OUTPUT_SIZE; i++) {
+      cout << out_vect[i] << " " << actuals_vect[i] << endl;
+    }
+    cout << "ret:" << retf;
+    cout << endl;
+  }
+  #endif 
+  return ret;
+}
+
+
+// weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+  float sumf = 0; float suma = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx+ offset];
+    auto actual = actuals_vect[indx];
+    suma += abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*tau;
+    else
+      sumf = sumf + (actual - forec)*(tau - 1);
+  }
+  return sumf / suma * 200;
+}
+
+//MSIS operating on floats, used for validation
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect, float meanAbsSeasDiff) {
+  float sumf=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = out_vect[indx];
+    auto forecH = out_vect[indx + OUTPUT_SIZE];
+    auto actualf = actuals_vect[indx];
+
+    float loss = forecH - forecL;
+    if (actualf< forecL)
+      loss = loss + (forecL - actualf)*ALPHA_MULTIP;
+    if (actualf > forecH)
+      loss = loss + (actualf - forecH)*ALPHA_MULTIP;
+    sumf+=loss;
+  }
+  return sumf / (OUTPUT_SIZE*meanAbsSeasDiff);
+}
+
+
+
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples.
+  int chunkNo = 1;
+  int ibigOffset = 0;
+  if (argc >= 3) {
+    seedForChunks = atoi(argv[1]);
+    chunkNo = atoi(argv[2]);
+  } 
+  if (argc >= 4)
+	  ibigOffset = atoi(argv[3]);
+
+  if (chunkNo > NUM_OF_CHUNKS) {
+    cerr << "chunkNo > NUM_OF_CHUNKS";
+    exit(-1);
+  }
+  else if (chunkNo <= 0) {
+    cerr << "chunkNo <= 0";
+    exit(-1);
+  }
+
+  cout<<VARIABLE<<" "<<runL<<endl;
+  std::cout << "seed:" << seedForChunks << " chunk no:" << chunkNo;
+  if (ibigOffset>0) 
+    std::cout<< " ibigOffset:"<< ibigOffset;  //if continuing prematurely stopped run
+  if (LBACK>0) 
+    std::cout<<" lback:"<<LBACK;
+  std::cout<<endl;
+
+   if  (USE_AUTO_LEARNING_RATE && LBACK == 0) {
+    cerr<<"Can't use auto learning rate when LBACK==0";
+    exit(-1);
+   }
+
+ 
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+  convert2 << int(ALPHA * 100);
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;  //so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+    
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister)
+  mt19937 rngForChunks(seedForChunks);
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      if (m4Obj.meanAbsSeasDiff==0) {
+        cout<<"Warning, flat series:"<<series<<endl;
+        m4Obj.meanAbsSeasDiff= m4Obj.testVals[0]/100;
+      }
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+
+  int series_len=(int)series_vect.size();
+  int chunkSize= series_len/NUM_OF_CHUNKS;
+  std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize<<endl;
+  uniform_int_distribution<int> uniOnSeries(0, chunkSize -1);  // closed interval [a, b]
+  
+  unordered_map<string, array<vector<float>, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5);
+  set<string> diagSeries;
+  
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) { //the loop :-)
+	  int ibigDb= ibigOffset+ibig;
+    string outputPathL = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(ibigDb)+"_LLB"+ to_string(LBACK)+ ".csv";
+    string outputPathH = OUTPUT_DIR + '/' + VARIABLE + "_" + to_string(ibigDb) + "_HLB" + to_string(LBACK) + ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+    
+#if defined USE_ODBC        
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif 
+
+    ParameterCollection pc;
+    ParameterCollection perSeriesPC;
+
+    float learning_rate= INITIAL_LEARNING_RATE;
+    AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS);
+    trainer.clip_threshold = GRADIENT_CLIPPING;
+    AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+    perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING;  
+    
+    #if defined USE_RESIDUAL_LSTM
+      vector<ResidualDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      vector<AttentiveDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+       vector<DilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+    Parameter MLPW_par,MLPB_par;
+    if (ADD_NL_LAYER) { 
+      MLPW_par = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+      MLPB_par = pc.add_parameters({ STATE_HSIZE });
+    }
+    Parameter adapterW_par = pc.add_parameters({ OUTPUT_SIZE*2, STATE_HSIZE });
+    Parameter adapterB_par = pc.add_parameters({ OUTPUT_SIZE*2 });
+
+    shuffle(series_vect.begin(), series_vect.end(), rngForChunks);//this shuffling is psudo random (it uses the same seed) so it is synchronized across pairs of wokers
+    auto start= series_vect.begin()+ (chunkNo-1)*chunkSize;
+    auto end= start+ chunkSize;
+    if (chunkNo== NUM_OF_CHUNKS)
+      end = series_vect.end();
+    vector<string> oneChunk_vect(start,end);
+    if (PRINT_DIAGN) {
+      for (int k = 0; k<10; k++)  //diag
+        cout << oneChunk_vect[k] << " ";
+      cout << endl;
+    }  
+    if (chunkNo == NUM_OF_CHUNKS)
+      cout<<"last chunk size:"<< oneChunk_vect.size()<<endl;
+
+    unordered_map<string, AdditionalParams> additionalParams_map((int)oneChunk_vect.size()*1.5); //per series
+    unordered_map<string, array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup
+      string series = *iter;
+      AdditionalParams addParams;
+      addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5);  //level smoothing
+      addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5);    //seasonality smoothing
+      for (int isea = 0; isea<SEASONALITY; isea++)
+        addParams.initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);  //initial seasonality (over first SEASONALITY points)
+      additionalParams_map[series] = addParams;
+
+      historyOfAdditionalParams_map[series] = new array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>();
+    }
+    
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      if (!USE_AUTO_LEARNING_RATE && LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        trainer.learning_rate = LEARNING_RATES.at(iEpoch);
+        cout << "changing LR to:" << trainer.learning_rate << endl;
+        perSeriesTrainer.learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+      }
+
+      vector<float> testLosses; //test losses of all series in this epoch
+      vector<float> testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch 
+      vector<float> testLossesL; //lower quantile loss
+      vector<float> testAvgLossesL; //lower quantile loss
+      vector<float> testLossesH; //higher quantile loss
+      vector<float> testAvgLossesH; //higher quantile loss
+      vector<float> trainingLosses; //training losses of all series in one epoch
+      vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+      #if defined USE_ODBC
+      TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+      
+      for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) {
+        string series=*iter;
+        auto m4Obj = allSeries_map[series];
+
+        #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+        #endif
+      
+        ComputationGraph cg;
+         for (int il=0; il<dilations.size(); il++) {
+           rNNStack[il].new_graph(cg);
+           rNNStack[il].start_new_sequence(); 
+         }
+          
+        Expression MLPW_ex, MLPB_ex;
+        if (ADD_NL_LAYER) {   
+          MLPW_ex = parameter(cg, MLPW_par);
+          MLPB_ex = parameter(cg, MLPB_par);
+        }
+        Expression adapterW_ex=parameter(cg, adapterW_par);
+        Expression adapterB_ex=parameter(cg, adapterB_par);
+
+        auto additionalParams= additionalParams_map[series];
+        Expression levSm_ex = logistic(parameter(cg, additionalParams.levSm));  //level smoothing
+		    Expression sSm_ex = logistic(parameter(cg, additionalParams.sSm)); //seasonality smoothing
+
+			  vector<Expression> season_exVect;//vector, because we do not know how long the series is
+			  for (int iseas=0; iseas<SEASONALITY; iseas++){
+			    Expression seas=exp(parameter(cg, additionalParams.initSeasonality[iseas]));
+			    //so, when additionalParams_map[series].initSeasonality[iseas]==0 => seas==1
+			    season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+			  }
+			  season_exVect.push_back(season_exVect[0]);
+
+			  vector<Expression> logDiffOfLevels_vect;
+        vector<Expression> levels_exVect;
+			  Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+			  levels_exVect.push_back(lev);
+        for (int i=1; i<m4Obj.vals.size();i++) {  //Exponential Smoothing-style deseasonalization and smoothing
+			    Expression newLevel_ex=m4Obj.vals[i]*cdiv(levSm_ex,season_exVect[i]) + (1-levSm_ex)*levels_exVect[i-1];
+			    levels_exVect.push_back(newLevel_ex);
+			    Expression diff_ex=log(cdiv(newLevel_ex,levels_exVect[i-1]));//penalty for wiggliness of level
+			    logDiffOfLevels_vect.push_back(diff_ex);
+
+			    Expression newSeason_ex=m4Obj.vals[i]*cdiv(sSm_ex,newLevel_ex) + (1-sSm_ex)*season_exVect[i];
+			    season_exVect.push_back(newSeason_ex);
+        }
+         
+        Expression levelVarLoss_ex;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          vector<Expression> levelVarLoss_v;
+          for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+            Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+            levelVarLoss_v.push_back(diff_ex*diff_ex);
+          }
+          levelVarLoss_ex = average(levelVarLoss_v);
+        }
+
+			  //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+			  if (OUTPUT_SIZE_I>SEASONALITY) {
+			    unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY;
+			    for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++)
+			      season_exVect.push_back(season_exVect[startSeasonalityIndx+i]);
+			  }
+        vector<Expression> losses;
+        for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { 
+			    vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I;
+			    vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			    vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex=input(cg,{INPUT_SIZE},input_vect);
+			    Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);
+          joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+          }  catch (exception& e) {
+            cerr<<"cought exception 2 while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            cerr <<as_vector(input_ex.value())<<endl;
+          }
+          Expression out_ex;
+          if (ADD_NL_LAYER) {
+            out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+            out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+          } else 
+            out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+          //labels
+			    firstE = season_exVect.begin() +i+1;
+			    pastLastE = season_exVect.begin() +i+1+OUTPUT_SIZE_I;
+			    vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect);
+
+          first = m4Obj.vals.begin() +i+1;
+          pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I;
+          vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+          Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect);
+			    Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization
+          labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization
+			    Expression labels_ex=squash(labels1_ex);
+
+				  Expression loss_ex=MSIS(out_ex, labels_ex);//although out_ex has doubled size, labels_ex have normal size. NB, we do not have duplicated labels during training.
+          //Expression loss_ex=pinBallLoss(out_ex, labels_ex);
+          if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN)
+            losses.push_back(loss_ex);  
+        }
+        
+        Expression forecLoss_ex= average(losses);
+			  Expression loss_exp = forecLoss_ex;
+
+        float levVarLoss=0;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+          levVarLoss = as_scalar(levelVarLossP_ex.value());
+          levVarLosses.push_back(levVarLoss);
+          loss_exp= loss_exp + levelVarLossP_ex;
+        }
+
+        float cStateLoss=0;
+        if (C_STATE_PENALTY>0) {
+          vector<Expression> cStateLosses_vEx;
+          for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+            for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+              auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+              Expression penalty_ex = square(state_ex);
+              cStateLosses_vEx.push_back(sum_elems(penalty_ex));
+            }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        trainingLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+        forecLosses.push_back(forecastLoss);
+
+        cg.backward(loss_exp);
+        try {
+          trainer.update();//update shared weights
+          perSeriesTrainer.update();  //apdate params of this series only
+        } catch (exception& e) {  //long diagnostics for this unlikely event :-)
+          cerr<<"cought exception while doing "<<series<<endl;
+          cerr << e.what() << endl;
+
+            float minSeason = BIG_FLOAT;
+            cout << "season:";
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val = as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+
+            float minLevel = BIG_FLOAT;
+            cout << "levels:";
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax = 0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx = rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state = as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax = it;
+                      layerOfMax = il;
+                      chunkOfMax = irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            cout << " min season=" << minSeason << endl;
+            cout << " min level=" << minLevel << endl;
+            cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl;
+
+            //diagSeries.insert(series);
+          pc.reset_gradient();
+          perSeriesPC.reset_gradient();
+        }
+
+        //saving per-series values for diagnostics purposes
+        AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch);
+        histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+        histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+			  for (int isea=0; isea<SEASONALITY; isea++)
+			    histAdditionalParams.initSeasonality[isea]=as_scalar(season_exVect[isea].value());    
+		    if (iEpoch==1 || iEpoch == NUM_OF_TRAIN_EPOCHS /2 || iEpoch == NUM_OF_TRAIN_EPOCHS-1)
+          for (int iv = 0; iv<m4Obj.vals.size(); iv++) {
+            histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+          }
+          
+        //TEST. We walk (without learning) till end of the series. At the last point, the output is taken as the forecast
+        for (int i=(m4Obj.n - OUTPUT_SIZE_I); i<m4Obj.n; i++) {
+          vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I;
+          vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one
+          vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+          Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect);
+          Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization
+          joinedInput_ex.emplace_back(squash(input1_ex));
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+          }
+          catch (exception& e) {
+            cerr << "cought exception 2 while doing " << series << endl;
+            cerr << e.what() << endl;
+            cerr << as_vector(input_ex.value()) << endl;
+          }
+          if (i== m4Obj.n-1) {//make forecast
+            firstE = season_exVect.begin() + i + 1;
+            pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE_I;
+            vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+            for (int ios=0; ios<OUTPUT_SIZE; ios++) 
+              outputSeasonality_exVect.push_back(outputSeasonality_exVect[ios]);//we are duplicating it, because we want to convert the net output, which is duplicated,  to the original scale
+            Expression outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+            
+            out_ex = cmult(expand(out_ex), outputSeasonality_ex)*levels_exVect[i];//back to original scale
+            vector<float> out_vect = as_vector(out_ex.value());
+
+            if (LBACK > 0) {
+              float qLoss = errorFunc(out_vect, m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+              testLosses.push_back(qLoss);
+
+              qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUL, 0);
+              testLossesL.push_back(qLoss);
+
+              qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUH, OUTPUT_SIZE);
+              testLossesH.push_back(qLoss);
+            }
+
+            testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect;
+            if (iEpoch >= AVERAGING_LEVEL) {
+              if (USE_MEDIAN) {
+                if (testResults_map[series][AVERAGING_LEVEL].size() == 0)
+                  testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten
+                for (int iii = 0; iii < OUTPUT_SIZE_I*2; iii++) {
+                  vector<float> temp_vect2;
+                  for (int ii = 0; ii<AVERAGING_LEVEL; ii++)
+                    temp_vect2.push_back(testResults_map[series][ii][iii]);
+                  sort(temp_vect2.begin(), temp_vect2.end());
+                  testResults_map[series][AVERAGING_LEVEL][iii] = temp_vect2[MIDDLE_POS_FOR_AVG];
+                }
+              }
+              else {
+                vector<float> firstForec = testResults_map[series][0];
+                testResults_map[series][AVERAGING_LEVEL] = firstForec;
+                for (int ii = 1; ii<AVERAGING_LEVEL; ii++) {
+                  vector<float> nextForec = testResults_map[series][ii];
+                  for (int iii = 0; iii<OUTPUT_SIZE_I * 2; iii++)
+                    testResults_map[series][AVERAGING_LEVEL][iii] += nextForec[iii];
+                }
+                for (int iii = 0; iii<OUTPUT_SIZE_I * 2; iii++)
+                  testResults_map[series][AVERAGING_LEVEL][iii] /= AVERAGING_LEVEL;
+              }
+
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+                testAvgLosses.push_back(qLoss);
+
+                qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0);
+                testAvgLossesL.push_back(qLoss);
+
+                qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE);
+                testAvgLossesH.push_back(qLoss);
+                
+                #if defined USE_ODBC       //save
+                TRYODBC(hInsertStmt,
+                  SQL_HANDLE_STMT,
+                  SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL));
+          
+                for (int iv = 0; iv<2; iv++) {
+                  if (iv == 0)
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun))
+                  else
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun));
+
+                  for (int io = 0; io < OUTPUT_SIZE_I; io++) {
+                    int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io;
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL));
+
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io + iv*OUTPUT_SIZE_I], 0, NULL));
+                  }
+                  if (MAX_NUM_OF_SERIES<0)
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLExecute(hInsertStmt));
+                }
+                #endif    
+              } //lback>0
+            } //time to average
+          }//last anchor point of the series
+        }//through TEST loop        
+      }//through series
+
+  
+      if (iEpoch % FREQ_OF_TEST == 0) {
+        float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size();
+
+        cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forecast loss:" << averageForecLoss*100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+
+        float averageTestLoss=0;
+        if (LBACK > 0) {
+          float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size();
+          float averageTestLossL = accumulate(testLossesL.begin(), testLossesL.end(), 0.0) / testLossesL.size();
+          float averageTestLossH = accumulate(testLossesH.begin(), testLossesH.end(), 0.0) / testLossesH.size();
+          cout<<" Test loss:" << averageTestLoss<<" L:"<< averageTestLossL<<" H:"<< averageTestLossH;
+          if (iEpoch >= AVERAGING_LEVEL) {
+            float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch
+            float averageTestAvgLossL = accumulate(testAvgLossesL.begin(), testAvgLossesL.end(), 0.0) / testAvgLossesL.size();//of this epoch
+            float averageTestAvgLossH = accumulate(testAvgLossesH.begin(), testAvgLossesH.end(), 0.0) / testAvgLossesH.size();//of this epoch
+            cout << " avgLoss:" << averageTestAvgLoss<<" L:"<< averageTestAvgLossL<<" H:"<< averageTestAvgLossH<<endl;
+          }
+          if (USE_AUTO_LEARNING_RATE)
+            perfValid_vect.push_back(averageTestLoss);
+        }
+        cout << endl;
+      }
+      
+      if (USE_AUTO_LEARNING_RATE) {
+        bool changeL2Rate = false;
+        if (iEpoch >= 2) {
+          if (iEpoch < L3_PERIOD)
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - 2]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+          else
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - L3_PERIOD - 1]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+        }
+
+        if (changeL2Rate && learning_rate > MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) {
+          learning_rate /= LR_RATIO;
+          cout << "decreasing LR to:" << learning_rate << endl;
+          epochOfLastChangeOfLRate = iEpoch;
+          trainer.learning_rate = learning_rate;
+        }
+      }
+      #if defined USE_ODBC 
+      TRYODBC(hDbc,
+        SQL_HANDLE_DBC,
+        SQLEndTran(
+          SQL_HANDLE_DBC,
+          hDbc,
+          SQL_COMMIT));
+      #endif    
+    }//through epochs
+
+    if (PRINT_DIAGN) {//some diagnostic info
+      set<string> diagSeries;
+      for (int i = 0; i<1; i++) {//add a few normal ones
+        int irand = uniOnSeries(rng);
+        diagSeries.insert(oneChunk_vect[irand]);
+      }
+      for (auto series : diagSeries) {
+        cout << endl << series << endl;
+        array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series];
+        cout << "lSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levSm << " ";
+        cout << endl;
+        cout << "sSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).sSm << " ";
+        cout << endl;
+        cout << "seasons:" << endl;
+        for (int isea = 0; isea<SEASONALITY; isea++) {
+          for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).initSeasonality[isea] << " ";
+          cout << endl;
+        }
+        cout << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size()>0) {
+            cout << "levels:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", ";
+            cout << endl;
+            cout << "seas:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", ";
+            cout << endl;
+          }
+        }
+      }
+    }
+
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPathL);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE_I; io++)
+        outputFile << ", "<< testResults_map[series][AVERAGING_LEVEL][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+
+    outputFile.open(outputPathH);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE_I; io++)
+        outputFile << ", "<< testResults_map[series][AVERAGING_LEVEL][io+OUTPUT_SIZE_I];
+      outputFile<<endl;
+    }
+    outputFile.close();
+
+    //delete
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      auto addHistArr_ptr= historyOfAdditionalParams_map[series];
+      delete addHistArr_ptr;
+    }
+  }//ibig
+}//main
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+		}
+	  }
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+		}
+	  }
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/linux_example_scripts/build_mkl b/118 - slaweks17/c++/linux_example_scripts/build_mkl
new file mode 100644
index 0000000..4e64917
--- /dev/null
+++ b/118 - slaweks17/c++/linux_example_scripts/build_mkl	
@@ -0,0 +1,3 @@
+#!/bin/bash
+c++ -DEIGEN_FAST_MATH -fPIC -funroll-loops -fno-finite-math-only -Wall -Wno-missing-braces -std=c++11 -Ofast -g -march=native -O2 -g -DNDEBUG -I/home/uber/progs/dynet -I/home/uber/progs/eigen -I/home/uber/progs/dynet/buildMKL $1.cc slstm.cpp -o $1 -lodbc -rdynamic /home/uber/progs/dynet/buildMKL/dynet/libdynet.so -lpthread -lrt -Wl,-rpath,/home/uber/progs/dynet/buildMKL/dynet
+
diff --git a/118 - slaweks17/c++/linux_example_scripts/readme.txt b/118 - slaweks17/c++/linux_example_scripts/readme.txt
new file mode 100644
index 0000000..c2da4a7
--- /dev/null
+++ b/118 - slaweks17/c++/linux_example_scripts/readme.txt	
@@ -0,0 +1,13 @@
+build_mkl builds a specified program , linking it with MKL-compiled version of Dynet.
+usage, e.g.:
+./build_mkl ES_RNN
+(no extension).
+____You need to modify it, to point to your location of Dynet library.____
+Also, remove -lodbc if you do not use it, and especially if you had not installed it :-)
+
+run18 is a script that runs 9 pairs of workers, to be used with ES_RNN and ES_RNN_PI. 
+So it assumes it runs on a nice 18-core machine :-), and in such case you BIG_LOOP constant in the .cc files should probably be = 1, no big need for more than 9 runs for assembling.
+usage, e.g.:
+./run18 ES_RNN
+
+
diff --git a/118 - slaweks17/c++/linux_example_scripts/run18 b/118 - slaweks17/c++/linux_example_scripts/run18
new file mode 100644
index 0000000..9680e95
--- /dev/null
+++ b/118 - slaweks17/c++/linux_example_scripts/run18	
@@ -0,0 +1,20 @@
+#!/bin/bash
+rm ./nohup.out
+nohup nice -n 10 ./$1 9 1  &
+nohup nice -n 10 ./$1 9 2  &
+nohup nice -n 10 ./$1 10 1 5 &
+nohup nice -n 10 ./$1 10 2 5 &
+nohup nice -n 10 ./$1 11 1 10 &
+nohup nice -n 10 ./$1 11 2 10 &
+nohup nice -n 10 ./$1 12 1 15  &
+nohup nice -n 10 ./$1 12 2 15  &
+nohup nice -n 10 ./$1 13 1 20 &
+nohup nice -n 10 ./$1 13 2 20 &
+nohup nice -n 10 ./$1 14 1 25 &
+nohup nice -n 10 ./$1 14 2 25 &
+nohup nice -n 10 ./$1 15 1 30  &
+nohup nice -n 10 ./$1 15 2 30  &
+nohup nice -n 10 ./$1 16 1 35 &
+nohup nice -n 10 ./$1 16 2 35 &
+nohup nice -n 10 ./$1 17 1 40 &
+nohup nice -n 10 ./$1 17 2 40 &
diff --git a/118 - slaweks17/c++/readme.txt b/118 - slaweks17/c++/readme.txt
new file mode 100644
index 0000000..cab7432
--- /dev/null
+++ b/118 - slaweks17/c++/readme.txt	
@@ -0,0 +1,8 @@
+The programs require Dynet (https://github.com/clab/dynet) installed, compiled for C++.
+I have also been using Intel MKL, donwloadable freely, and built Dynet to use MKL. 
+In my early testing CPU perf was better than GPU one, so did not used GPU builds of Dynet.
+There will be 4 projects, each containing one .cc file and slstm.*.
+The programs can be run on Windows, Linux, and Mac.
+See inside *.cc files - there are more details. You need to setup some params.
+
+I provide example scripts for Linux, and a VS 2015 solution for Windows.
\ No newline at end of file
diff --git a/118 - slaweks17/c++/slstm.cpp b/118 - slaweks17/c++/slstm.cpp
new file mode 100644
index 0000000..3935604
--- /dev/null
+++ b/118 - slaweks17/c++/slstm.cpp	
@@ -0,0 +1,729 @@
+/*
+My implementation of dilated LSTMs, based on Dynet LSTM builders
+- DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
+- ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
+- AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
+*
+Slawek Smyl, Mar-May 2018
+*/
+
+#include "slstm.h"
+#include "dynet/lstm.h"
+#include "dynet/param-init.h"
+
+#include <fstream>
+#include <string>
+#include <vector>
+#include <iostream>
+
+#if defined DEBUG
+  #define _DEBUG
+#endif
+
+using namespace std;
+
+namespace dynet {
+
+  // ResidualDilatedLSTMBuilder based on Vanilla LSTM
+  enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG };
+  enum { LN_GH, LN_BH, LN_GX, LN_BX, LN_GC, LN_BC };
+
+  ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), ln_lstm(false), forget_bias(1.f), dropout_masks_valid(false) { }
+
+  ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
+    unsigned input_dim,
+    unsigned hidden_dim,
+    ParameterCollection& model,
+    bool ln_lstm, float forget_bias) : dilations(dilations), layers(unsigned(dilations.size())),
+      input_dim(input_dim), hid(hidden_dim), ln_lstm(ln_lstm), forget_bias(forget_bias), dropout_masks_valid(false) {
+    unsigned layer_input_dim = input_dim;
+    local_model = model.add_subcollection("ResidualDilated-lstm-builder");
+    for (unsigned i = 0; i < layers; ++i) {
+      // i
+      Parameter p_x2i = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
+      Parameter p_h2i = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
+      //Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim});
+      Parameter p_bi = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+
+      layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
+
+      vector<Parameter> ps = { p_x2i, p_h2i, /*p_c2i,*/ p_bi };
+      params.push_back(ps);
+
+      if (ln_lstm) {
+        Parameter p_gh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f));
+        Parameter p_bh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+        Parameter p_gx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f));
+        Parameter p_bx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+        Parameter p_gc = model.add_parameters({ hidden_dim }, ParameterInitConst(1.f));
+        Parameter p_bc = model.add_parameters({ hidden_dim }, ParameterInitConst(0.f));
+        vector<Parameter> ln_ps = { p_gh, p_bh, p_gx, p_bx, p_gc, p_bc };
+        ln_params.push_back(ln_ps);
+      }
+    }  // layers
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+
+  void ResidualDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
+    param_vars.clear();
+    if (ln_lstm)ln_param_vars.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      auto& p = params[i];
+      vector<Expression> vars;
+      for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); }
+      param_vars.push_back(vars);
+      if (ln_lstm) {
+        auto& ln_p = ln_params[i];
+        vector<Expression> ln_vars;
+        for (unsigned j = 0; j < ln_p.size(); ++j) { ln_vars.push_back(update ? parameter(cg, ln_p[j]) : const_parameter(cg, ln_p[j])); }
+        ln_param_vars.push_back(ln_vars);
+      }
+    }
+
+    _cg = &cg;
+  }
+  // layout: 0..layers = c
+  //         layers+1..2*layers = h
+  void ResidualDilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
+    h.clear();
+    c.clear();
+
+    if (hinit.size() > 0) {
+      DYNET_ARG_CHECK(layers * 2 == hinit.size(),
+        "ResidualDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
+        "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
+        hinit.size() << " expressions were passed in");
+      h0.resize(layers);
+      c0.resize(layers);
+      for (unsigned i = 0; i < layers; ++i) {
+        c0[i] = hinit[i];
+        h0[i] = hinit[i + layers];
+      }
+      has_initial_state = true;
+    }
+    else {
+      has_initial_state = false;
+    }
+
+    dropout_masks_valid = false;
+  }
+
+  void ResidualDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
+    masks.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      std::vector<Expression> masks_i;
+      unsigned idim = (i == 0) ? input_dim : hid;
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        float retention_rate = 1.f - dropout_rate;
+        float retention_rate_h = 1.f - dropout_rate_h;
+        float scale = 1.f / retention_rate;
+        float scale_h = 1.f / retention_rate_h;
+        // in
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
+        // h
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
+        masks.push_back(masks_i);
+      }
+    }
+    dropout_masks_valid = true;
+  }
+
+  ParameterCollection & ResidualDilatedLSTMBuilder::get_parameter_collection() {
+    return local_model;
+  }
+
+  // TODO - Make this correct
+  // Copied c from the previous step (otherwise c.size()< h.size())
+  // Also is creating a new step something we want?
+  // wouldn't overwriting the current one be better?
+  Expression ResidualDilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
+    DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
+      "ResidualDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
+      h_new.size() << " inputs for " << layers << " layers");
+    const unsigned t = h.size();
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = h_new[i];
+      Expression c_i = c[t - 1][i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  // Current implementation : s_new is either {new_c[0],...,new_c[n]}
+  // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
+  Expression ResidualDilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
+    DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
+      "ResidualDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
+    bool only_c = s_new.size() == layers;
+    const unsigned t = c.size();
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
+      Expression c_i = s_new[i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+
+  Expression ResidualDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    vector<Expression>& ht = h.back();
+    vector<Expression>& ct = c.back();
+    Expression in = x;
+    if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
+    for (unsigned i = 0; i < layers; ++i) {
+    	int dilation_offset = dilations[i] - 1;
+      const vector<Expression>& vars = param_vars[i];
+
+      Expression i_h_tm1, i_c_tm1;
+      bool has_prev_state = (prev >= 0 || has_initial_state);
+      if (prev < dilation_offset) {
+        if (has_initial_state) {
+          // intial value for h and c at timestep 0 in layer i
+          // defaults to zero matrix input if not set in add_parameter_edges
+          i_h_tm1 = h0[i];
+          i_c_tm1 = c0[i];
+        }
+        else {
+          i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
+          i_c_tm1 = i_h_tm1;
+        }
+      }
+      else {
+        i_h_tm1 = h[prev - dilation_offset][i];
+        i_c_tm1 = c[prev - dilation_offset][i];
+      }
+      // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
+      if (dropout_rate > 0.f) {
+        in = cmult(in, masks[i][0]);
+      }
+      if (has_prev_state && dropout_rate_h > 0.f)
+        i_h_tm1 = cmult(i_h_tm1, masks[i][1]);
+      // input
+      Expression tmp;
+      Expression i_ait;
+      Expression i_aft;
+      Expression i_aot;
+      Expression i_agt;
+      if (ln_lstm) {
+        const vector<Expression>& ln_vars = ln_param_vars[i];
+        if (has_prev_state)
+          tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]) + layer_norm(vars[_H2I] * i_h_tm1, ln_vars[LN_GH], ln_vars[LN_BH]);
+        else
+          tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]);
+      }
+      else {
+        if (has_prev_state)
+          tmp = affine_transform({ vars[_BI], vars[_X2I], in, vars[_H2I], i_h_tm1 });
+        else
+          tmp = affine_transform({ vars[_BI], vars[_X2I], in });
+      }
+      i_ait = pick_range(tmp, 0, hid);
+      i_aft = pick_range(tmp, hid, hid * 2);
+      i_aot = pick_range(tmp, hid * 2, hid * 3);
+      i_agt = pick_range(tmp, hid * 3, hid * 4);
+      Expression i_it = logistic(i_ait);
+      if (forget_bias != 0.0)
+        tmp = logistic(i_aft + forget_bias);
+      else
+        tmp = logistic(i_aft);
+
+      Expression i_ft = tmp;
+      Expression i_ot = logistic(i_aot);
+      Expression i_gt = tanh(i_agt);
+
+      ct[i] = has_prev_state ? (cmult(i_ft, i_c_tm1) + cmult(i_it, i_gt)) : cmult(i_it, i_gt);
+      if (ln_lstm) {
+        const vector<Expression>& ln_vars = ln_param_vars[i];
+        if (i==0)
+        	in = ht[i] = cmult(i_ot, tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC])));
+        else
+        	in = ht[i] = cmult(i_ot, in+tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC])));
+      }
+      else  {
+      	if (i==0)
+          in = ht[i] = cmult(i_ot, tanh(ct[i]));
+      	else
+      		in = ht[i] = cmult(i_ot, in+tanh(ct[i]));
+      }
+    }
+    return ht.back();
+  }
+
+  void ResidualDilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
+    const ResidualDilatedLSTMBuilder & rnn_lstm = (const ResidualDilatedLSTMBuilder&)rnn;
+    DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
+      "Attempt to copy ResidualDilatedLSTMBuilder with different number of parameters "
+      "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
+    for (size_t i = 0; i < params.size(); ++i)
+      for (size_t j = 0; j < params[i].size(); ++j)
+        params[i][j] = rnn_lstm.params[i][j];
+    for (size_t i = 0; i < ln_params.size(); ++i)
+      for (size_t j = 0; j < ln_params[i].size(); ++j)
+        ln_params[i][j] = rnn_lstm.ln_params[i][j];
+  }
+
+  void ResidualDilatedLSTMBuilder::set_dropout(float d) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d;
+  }
+
+  void ResidualDilatedLSTMBuilder::set_dropout(float d, float d_h) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d_h;
+  }
+
+  void ResidualDilatedLSTMBuilder::disable_dropout() {
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+
+
+
+
+  //enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG };
+  enum { _X2I_, _H2I_, _BI_, _XA1, _HA1, _SA1, _BA1, _A2, _B2 };
+
+
+//***************************
+
+
+  
+  AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { }
+  
+  AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
+                                         unsigned input_dim,
+                                         unsigned hidden_dim,
+                                         unsigned attention_dim,
+                                         ParameterCollection& model)
+  : max_dilations(max_dilations), layers(unsigned(max_dilations.size())),
+    input_dim(input_dim), hid(hidden_dim), attention_dim(attention_dim), weightnoise_std(0), dropout_masks_valid(false) {
+    unsigned layer_input_dim = input_dim;
+    local_model = model.add_subcollection("compact-vanilla-lstm-builder");
+    for (unsigned i = 0; i < layers; ++i) {
+      // i
+      Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
+      Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
+      Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+      
+      Parameter p_Wxa1 = local_model.add_parameters({ attention_dim, layer_input_dim });
+      Parameter p_Wha1 = local_model.add_parameters({ attention_dim, hidden_dim });
+      Parameter p_Wsa1 = local_model.add_parameters({ attention_dim, hidden_dim });
+      Parameter p_ba1 = local_model.add_parameters({ attention_dim }, ParameterInitConst(0.f));
+      
+      Parameter p_Wa2 = local_model.add_parameters({ max_dilations[i], attention_dim });
+      Parameter p_ba2 = local_model.add_parameters({ max_dilations[i] }, ParameterInitConst(0.f));
+      
+      layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
+      
+      vector<Parameter> ps = { p_Wx, p_Wh, p_b, p_Wxa1, p_Wha1, p_Wsa1, p_ba1, p_Wa2, p_ba2 };
+      params.push_back(ps);
+      
+    }  // layers
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
+    param_vars.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      auto& p = params[i];
+      vector<Expression> vars;
+      for (unsigned j = 0; j < p.size(); ++j) { 
+        vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); 
+      }
+      param_vars.push_back(vars);
+    }
+    
+    _cg = &cg;
+  }
+  // layout: 0..layers = c
+  //         layers+1..2*layers = h
+  void AttentiveDilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
+    h.clear();
+    c.clear();
+    
+    if (hinit.size() > 0) {
+      DYNET_ARG_CHECK(layers * 2 == hinit.size(),
+                      "AttentiveDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
+                      "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
+                      hinit.size() << " expressions were passed in");
+      h0.resize(layers);
+      c0.resize(layers);
+      for (unsigned i = 0; i < layers; ++i) {
+        c0[i] = hinit[i];
+        h0[i] = hinit[i + layers];
+      }
+      has_initial_state = true;
+    }
+    else {
+      has_initial_state = false;
+    }
+    
+    dropout_masks_valid = false;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
+    masks.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      std::vector<Expression> masks_i;
+      unsigned idim = (i == 0) ? input_dim : hid;
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        float retention_rate = 1.f - dropout_rate;
+        float retention_rate_h = 1.f - dropout_rate_h;
+        float scale = 1.f / retention_rate;
+        float scale_h = 1.f / retention_rate_h;
+        // in
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
+        // h
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
+        masks.push_back(masks_i);
+      }
+    }
+    dropout_masks_valid = true;
+  }
+  
+  ParameterCollection & AttentiveDilatedLSTMBuilder::get_parameter_collection() {
+    return local_model;
+  }
+  
+  // TODO - Make this correct
+  // Copied c from the previous step (otherwise c.size()< h.size())
+  // Also is creating a new step something we want?
+  // wouldn't overwriting the current one be better?
+  Expression AttentiveDilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
+    DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
+                    "AttentiveDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
+                    h_new.size() << " inputs for " << layers << " layers");
+    const unsigned t = unsigned(h.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = h_new[i];
+      Expression c_i = c[t - 1][i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  // Current implementation : s_new is either {new_c[0],...,new_c[n]}
+  // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
+  Expression AttentiveDilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
+    DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
+                    "AttentiveDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
+    bool only_c = s_new.size() == layers;
+    const unsigned t = unsigned(c.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
+      Expression c_i = s_new[i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  
+  Expression AttentiveDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    vector<Expression>& ht = h.back();
+    vector<Expression>& ct = c.back();
+    Expression in = x;
+    if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
+    for (unsigned i = 0; i < layers; ++i) {
+      int dilation_offset= max_dilations[i]-1;
+      const vector<Expression>& vars = param_vars[i];
+      Expression i_h_tm1, i_c_tm1;
+      if (prev < dilation_offset) {
+        if (has_initial_state) {
+          // initial value for h and c at timestep 0 in layer i
+          // defaults to zero matrix input if not set in add_parameter_edges
+          i_h_tm1 = h0[i];
+          i_c_tm1 = c0[i];
+        }
+        else {
+          i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
+          i_c_tm1 = i_h_tm1;
+        }
+      }
+      else {
+        if (dilation_offset>0) {
+          //enum { _X2I, _H2I, _BI, _XA1, _HA1, _SA1, _BA1, _A2, _B2 };
+          Expression weights_ex=vars[_XA1]*in+ vars[_HA1]*h[prev][i]+ vars[_SA1]*c[prev][i]+ vars[_BA1];
+          weights_ex=tanh(weights_ex);
+          weights_ex=vars[_A2]* weights_ex+ vars[_B2];
+          weights_ex =softmax(weights_ex);
+          #if defined _DEBUG
+            vector<float> weights=as_vector(weights_ex.value());
+          #endif
+
+          unsigned indx=0;
+          Expression w_ex = pick(weights_ex, indx);
+          Expression avg_h= cmult(h[prev][i], w_ex);
+          for (indx=1; indx <= dilation_offset; indx++) {//dilation_offset==max_dilations[i]-1, so together with indx==0, we cover max_dilations[i] steps
+            w_ex = pick(weights_ex, indx);
+            avg_h = avg_h+cmult(h[prev- indx][i], w_ex);
+          }
+          i_h_tm1 = avg_h;
+        } else {
+          i_h_tm1 = h[prev- dilation_offset][i];
+        }
+        i_c_tm1 = c[prev- dilation_offset][i];
+      }
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
+        Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      }
+      else {
+        Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      }
+    }
+    return ht.back();
+  }
+  
+  void AttentiveDilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
+    const AttentiveDilatedLSTMBuilder & rnn_lstm = (const AttentiveDilatedLSTMBuilder&)rnn;
+    DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
+                    "Attempt to copy AttentiveDilatedLSTMBuilder with different number of parameters "
+                    "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
+    for (size_t i = 0; i < params.size(); ++i)
+      for (size_t j = 0; j < params[i].size(); ++j)
+        params[i][j] = rnn_lstm.params[i][j];
+  }
+  
+  void AttentiveDilatedLSTMBuilder::set_dropout(float d) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
+                    "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::set_dropout(float d, float d_h) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
+                    "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d_h;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::disable_dropout() {
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+  void AttentiveDilatedLSTMBuilder::set_weightnoise(float std) {
+    DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0");
+    weightnoise_std = std;
+  }
+
+  //*/
+
+  DilatedLSTMBuilder::DilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { }
+
+  DilatedLSTMBuilder::DilatedLSTMBuilder(vector<unsigned> dilations,
+    unsigned input_dim,
+    unsigned hidden_dim,
+    ParameterCollection& model)
+    : dilations(dilations), layers(unsigned(dilations.size())),
+    input_dim(input_dim), hid(hidden_dim), weightnoise_std(0), dropout_masks_valid(false) {
+    unsigned layer_input_dim = input_dim;
+    local_model = model.add_subcollection("compact-vanilla-lstm-builder");
+    for (unsigned i = 0; i < layers; ++i) {
+      // i
+      Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
+      Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
+      Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+
+      layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
+
+      vector<Parameter> ps = { p_Wx, p_Wh, p_b };
+      params.push_back(ps);
+
+    }  // layers
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+
+  void DilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
+    param_vars.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      auto& p = params[i];
+      vector<Expression> vars;
+      for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); }
+      param_vars.push_back(vars);
+    }
+
+    _cg = &cg;
+  }
+  // layout: 0..layers = c
+  //         layers+1..2*layers = h
+  void DilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
+    h.clear();
+    c.clear();
+
+    if (hinit.size() > 0) {
+      DYNET_ARG_CHECK(layers * 2 == hinit.size(),
+        "DilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
+        "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
+        hinit.size() << " expressions were passed in");
+      h0.resize(layers);
+      c0.resize(layers);
+      for (unsigned i = 0; i < layers; ++i) {
+        c0[i] = hinit[i];
+        h0[i] = hinit[i + layers];
+      }
+      has_initial_state = true;
+    } else {
+      has_initial_state = false;
+    }
+
+    dropout_masks_valid = false;
+  }
+
+  void DilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
+    masks.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      std::vector<Expression> masks_i;
+      unsigned idim = (i == 0) ? input_dim : hid;
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        float retention_rate = 1.f - dropout_rate;
+        float retention_rate_h = 1.f - dropout_rate_h;
+        float scale = 1.f / retention_rate;
+        float scale_h = 1.f / retention_rate_h;
+        // in
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
+        // h
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
+        masks.push_back(masks_i);
+      }
+    }
+    dropout_masks_valid = true;
+  }
+
+  ParameterCollection & DilatedLSTMBuilder::get_parameter_collection() {
+    return local_model;
+  }
+
+  // TODO - Make this correct
+  // Copied c from the previous step (otherwise c.size()< h.size())
+  // Also is creating a new step something we want?
+  // wouldn't overwriting the current one be better?
+  Expression DilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
+    DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
+      "DilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
+      h_new.size() << " inputs for " << layers << " layers");
+    const unsigned t = unsigned(h.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = h_new[i];
+      Expression c_i = c[t - 1][i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  // Current implementation : s_new is either {new_c[0],...,new_c[n]}
+  // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
+  Expression DilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
+    DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
+      "DilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
+    bool only_c = s_new.size() == layers;
+    const unsigned t = unsigned(c.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
+      Expression c_i = s_new[i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+
+  Expression DilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    vector<Expression>& ht = h.back();
+    vector<Expression>& ct = c.back();
+    Expression in = x;
+    if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
+    for (unsigned i = 0; i < layers; ++i) {
+      int dilation_offset = dilations[i] - 1;
+      const vector<Expression>& vars = param_vars[i];
+      Expression i_h_tm1, i_c_tm1;
+      if (prev < dilation_offset) {
+        if (has_initial_state) {
+          // initial value for h and c at timestep 0 in layer i
+          // defaults to zero matrix input if not set in add_parameter_edges
+          i_h_tm1 = h0[i];
+          i_c_tm1 = c0[i];
+        } else {
+          i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
+          i_c_tm1 = i_h_tm1;
+        }
+      } else {  // t > 0
+        i_h_tm1 = h[prev - dilation_offset][i];
+        i_c_tm1 = c[prev - dilation_offset][i];
+      }
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
+        Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      } else {
+        Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      }
+    }
+    return ht.back();
+  }
+
+  void DilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
+    const DilatedLSTMBuilder & rnn_lstm = (const DilatedLSTMBuilder&)rnn;
+    DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
+      "Attempt to copy DilatedLSTMBuilder with different number of parameters "
+      "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
+    for (size_t i = 0; i < params.size(); ++i)
+      for (size_t j = 0; j < params[i].size(); ++j)
+        params[i][j] = rnn_lstm.params[i][j];
+  }
+
+  void DilatedLSTMBuilder::set_dropout(float d) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d;
+  }
+
+  void DilatedLSTMBuilder::set_dropout(float d, float d_h) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d_h;
+  }
+
+  void DilatedLSTMBuilder::disable_dropout() {
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+  void DilatedLSTMBuilder::set_weightnoise(float std) {
+    DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0");
+    weightnoise_std = std;
+  }
+
+} // namespace dynet
diff --git a/118 - slaweks17/c++/slstm.h b/118 - slaweks17/c++/slstm.h
new file mode 100644
index 0000000..adb63a7
--- /dev/null
+++ b/118 - slaweks17/c++/slstm.h	
@@ -0,0 +1,394 @@
+/**
+* file slstm.h
+* header for my implementation of dilated LSTMs, based on Dynet LSTM builders
+  - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
+  - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
+  - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
+*
+Slawek Smyl, Mar-May 2018
+*/
+
+#ifndef DYNET_SLSTMS_H_
+#define DYNET_SLSTMS_H_
+
+#include "dynet/dynet.h"
+#include "dynet/rnn.h"
+#include "dynet/expr.h"
+
+using namespace std;
+
+namespace dynet {
+
+  //basd on VanillaLSTMBuilder
+  struct ResidualDilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    ResidualDilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the ResidualDilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    * \param ln_lstm Whether to use layer normalization
+    * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
+    */
+    explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model,
+      bool ln_lstm = false,
+      float forget_bias = 1.f);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+    /**
+    * \brief Get parameters in ResidualDilatedLSTMBuilder
+    * \return list of points to ParameterStorage objects
+    */
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> ln_params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> ln_param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    bool ln_lstm;
+    float forget_bias;
+    bool dropout_masks_valid;
+    vector<unsigned> dilations; //one int per layer
+
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+
+
+  struct DilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    DilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the DilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    */
+    explicit DilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> dilations; //one int per layer
+
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+  
+  
+  struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
+    /**
+     * @brief Default Constructor
+     */
+    AttentiveDilatedLSTMBuilder();
+    /**
+     * \brief Constructor for the AttentiveDilatedLSTMBuilder
+     *
+     * \param max_dilations Vector, maximum dilations (per layer)
+     * \param input_dim Dimention of the input \f$x_t\f$
+     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+     * \param model ParameterCollection holding the parameters
+     */
+    explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
+                                unsigned input_dim,
+                                unsigned hidden_dim,
+                                unsigned attention_dim,
+                                ParameterCollection& model);
+    
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+    
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+    
+    void copy(const RNNBuilder & params) override;
+    
+    /**
+     * \brief Set the dropout rates to a unique value
+     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+     * \param d Dropout rate to be applied on all of \f$x,h\f$
+     */
+    void set_dropout(float d);
+    /**
+     * \brief Set the dropout rates
+     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+     * The dynamics of the cell are then modified to :
+     *
+     * \f$
+     * \begin{split}
+     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+     h_t & = \tanh(c_t)\circ o_t\\
+     \end{split}
+     * \f$
+     *
+     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+     */
+    void set_dropout(float d, float d_r);
+    /**
+     * \brief Set all dropout rates to 0
+     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+     *
+     */
+    void disable_dropout();
+    /**
+     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+     * \details If this function is not called on batched input, the same mask will be applied across
+     * all batch elements. Use this to apply different masks to each batch element
+     *
+     * \param batch_size Batch size
+     */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+    
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+    
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+    
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    unsigned attention_dim;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> max_dilations; //one int per layer
+    
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+    
+  };
+} // namespace dynet
+
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M4.sln b/118 - slaweks17/c++/windows_VisualStudio/M4.sln
new file mode 100644
index 0000000..035373c
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M4.sln	
@@ -0,0 +1,58 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M41", "M41\M41.vcxproj", "{928301A0-F01A-48F6-A499-851B3CE8BD4E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M42", "M42\M42.vcxproj", "{A16B5466-E680-43F6-A884-A4A01EB78E50}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M43", "M43\M43.vcxproj", "{BE951571-3F3A-4048-BAA3-0C05F38CFF42}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M44", "M44\M44.vcxproj", "{7A192E0C-8F58-4D65-998E-3A7010AB5F87}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		RelWithDebug|x64 = RelWithDebug|x64
+		RelWithDebug|x86 = RelWithDebug|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x64.ActiveCfg = Debug|x64
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x64.Build.0 = Debug|x64
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x86.ActiveCfg = Debug|Win32
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x86.Build.0 = Debug|Win32
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
+		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x64.ActiveCfg = Debug|x64
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x64.Build.0 = Debug|x64
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x86.ActiveCfg = Debug|Win32
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x86.Build.0 = Debug|Win32
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
+		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x64.ActiveCfg = Debug|x64
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x64.Build.0 = Debug|x64
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x86.ActiveCfg = Debug|Win32
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x86.Build.0 = Debug|Win32
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
+		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x64.ActiveCfg = Debug|x64
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x64.Build.0 = Debug|x64
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x86.ActiveCfg = Debug|Win32
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x86.Build.0 = Debug|Win32
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
+		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M41/ES_RNN.cc b/118 - slaweks17/c++/windows_VisualStudio/M41/ES_RNN.cc
new file mode 100644
index 0000000..43dc358
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M41/ES_RNN.cc	
@@ -0,0 +1,1193 @@
+/*ES-RNN: ES-RNN Exponential Smoothing Recurrent Neural Network hybrid. Point forecast.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention.
+It is meant to be used for Monthly and Quarterly series of M4 competition, becasue the DE (Diversified Ensemble) version is too slow.
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+It has to be invoked in pair of executables, passing at least two integers: seedForChunks, chunkNo
+so e.g. create a script with following lines on Windows
+start <this_executable> 10 1
+start <this_executable> 10 2
+Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers:
+# start <this_executable> 10 1 0
+# start <this_executable> 10 2 0
+# start <this_executable> 20 1 5
+# start <this_executable> 20 2 5
+# start <this_executable> 30 1 10
+# start <this_executable> 30 2 10
+seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2. 
+We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP.
+Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, 
+so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R.
+By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - 
+a decent number to ensemble (in a separate R script).
+
+There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. 
+The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. 
+The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. 
+So, you need comment/uncomment to have one block of interest active.
+
+
+*/
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+#include <fstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+string VARIABLE = "Quarterly";
+const string run = "50/45 (1,2),(4,8), LR=0.001/{10,1e-4f}, EPOCHS=15, LVP=80 40*"; 
+const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 45;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE.
+
+vector<vector<unsigned>> dilations={{1,2},{4,8}};//Each vector represents one chunk of Dilateed LSTMS, connected in standard resnNet fashion
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM  //so for Quarterly series, we do not use either the more advanced residual connections nor attention.
+const bool ADD_NL_LAYER=false;  //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer
+
+const float INITIAL_LEARNING_RATE = 0.001f;
+const map<int, float> LEARNING_RATES = { { 10,1e-4f } }; //at which epoch we set them up to what
+const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate.
+
+const int NUM_OF_TRAIN_EPOCHS = 15;
+const unsigned int STATE_HSIZE = 40;
+
+const int SEASONALITY = 4;
+const unsigned int INPUT_SIZE = 4;
+const int INPUT_SIZE_I= INPUT_SIZE;
+const unsigned int OUTPUT_SIZE = 8;
+const int OUTPUT_SIZE_I= OUTPUT_SIZE;
+const int MIN_INP_SEQ_LEN = 0;
+const float LEVEL_VARIABILITY_PENALTY = 80;  //Multiplier for L" penalty against wigglines of level vector. Important.
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2;
+const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+
+
+
+/*
+string VARIABLE = "Monthly";
+const string run = "50/49 Res (1,3,6,12), LR=5e-4 {12,1e-4f}, EPOCHS=10, 20*";  
+const float PERCENTILE = 50; //we always use Pinball loss, although on normalized values. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+#define USE_RESIDUAL_LSTM  //so for Monthly we use only one block, so no standard resNet shortcuts, but instead but of the special residual shortcuts, after https://arxiv.org/abs/1701.03360.
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;  //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer
+
+vector<vector<unsigned>> dilations={{1,3,6,12}};//so for Monthly we use only one block, so no standard resNet shortcut
+const float INITIAL_LEARNING_RATE = 5e-4;
+const map<int, float> LEARNING_RATES = { { 12,1e-4f } }; //at which epoch we set them up to what
+const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate.
+
+const int NUM_OF_TRAIN_EPOCHS = 10;
+const unsigned int STATE_HSIZE = 50;
+
+const float LEVEL_VARIABILITY_PENALTY = 50;  //Multiplier for L" penalty against wigglines of level vector.
+
+const int SEASONALITY = 12;
+const unsigned int OUTPUT_SIZE = 18;
+const unsigned int INPUT_SIZE = 12;
+const int INPUT_SIZE_I= INPUT_SIZE;
+const int OUTPUT_SIZE_I= OUTPUT_SIZE;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I+ MIN_INP_SEQ_LEN+2;
+const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+*/
+
+
+/*
+string VARIABLE = "Daily";
+const string run = "50/49  NL LRMult=1.5, 3/5 (1,7,28) LR=3e-4 {9,1e-4f} EPOCHS=15, LVP=100 HSIZE=40 20w";
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = true;
+
+const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const int TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 7;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,7,28 } };
+
+const float INITIAL_LEARNING_RATE = 3e-4;
+const map<int, float> LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1.5;
+const int NUM_OF_TRAIN_EPOCHS = 15;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector.
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 7;
+const int INPUT_SIZE_I = INPUT_SIZE;
+const unsigned int OUTPUT_SIZE = 14;
+const int OUTPUT_SIZE_I = OUTPUT_SIZE;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+const int MAX_SERIES_LENGTH = 20 * SEASONALITY + MIN_SERIES_LENGTH;  //we are chopping longer series, to max of last 20 years
+*/
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 40;
+#else
+  const int MAX_NUM_OF_SERIES = -1; //use all series
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;//in data provided
+const int BIG_LOOP = 3;
+const int NUM_OF_CHUNKS = 2;
+const float EPS=1e-6;
+const int AVERAGING_LEVEL=5;
+const bool USE_MEDIAN = false;
+const int MIDDLE_POS_FOR_AVG = 2; //if using medians
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=20;
+const float C_STATE_PENALTY = 0;
+
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN=true;
+const float TAU = PERCENTILE / 100.;
+const float TRAINING_TAU = TRAINING_PERCENTILE / 100.;
+const unsigned ATTENTION_HSIZE=STATE_HSIZE;
+
+const bool USE_AUTO_LEARNING_RATE=false;
+//if USE_AUTO_LEARNING_RATE, and only if LBACK>0
+const float MIN_LEARNING_RATE = 0.0001f;
+const float LR_RATIO = sqrt(10);
+const float LR_TOLERANCE_MULTIP = 1.005;
+const int L3_PERIOD = 2;
+const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2;
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+    if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE_I) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) { //chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+
+struct AdditionalParams {//Per series, important
+  Parameter levSm;
+  Parameter sSm;
+  array<Parameter, SEASONALITY> initSeasonality;
+};
+
+struct AdditionalParamsF {//Used for storing diagnostics
+  float levSm;
+  float sSm;
+  array<float, SEASONALITY> initSeasonality;
+  vector<float> levels;
+  vector<float> seasons;
+};
+
+
+Expression pinBallLoss(const Expression& out_ex, const Expression& actuals_ex) {//used by Dynet, learning loss function
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = pick(out_ex, indx);
+    auto actual = pick(actuals_ex, indx);
+    if (as_scalar(actual.value()) > as_scalar(forec.value()))
+      losses.push_back((actual - forec)*TRAINING_TAU);
+    else
+      losses.push_back((actual - forec)*(TRAINING_TAU - 1));
+  }
+  return sum(losses) / OUTPUT_SIZE * 2;
+}
+
+
+//weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0; float suma=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    suma+= abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*TAU;
+    else
+      sumf = sumf + (actual - forec)*(TAU - 1);
+  }
+  return sumf / suma * 200;
+}
+
+//used just for diagnostics, if LBACK>0 and PERCENTILE==50
+float sMAPE(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    sumf+=abs(forec-actual)/(abs(forec)+abs(actual));
+  }
+  return sumf / OUTPUT_SIZE * 200;
+}
+
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect) {
+  if (PERCENTILE==50)
+    return sMAPE(out_vect, actuals_vect);
+  else
+    return wQuantLoss(out_vect, actuals_vect);
+}
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples.
+  int chunkNo = 1;
+  int ibigOffset = 0;
+  if (argc >= 3) {
+    seedForChunks = atoi(argv[1]);
+    chunkNo = atoi(argv[2]);
+  } 
+  if (argc >= 4)
+	  ibigOffset = atoi(argv[3]);
+
+  if (chunkNo > NUM_OF_CHUNKS) {
+    cerr << "chunkNo > NUM_OF_CHUNKS";
+    exit(-1);
+  }
+  else if (chunkNo <= 0) {
+    cerr << "chunkNo <= 0";
+    exit(-1);
+  }
+
+  cout<<VARIABLE<<" "<<run<<endl;
+  std::cout << "seed:" << seedForChunks << " chunk no:" << chunkNo;
+  if (ibigOffset>0) 
+    std::cout<< " ibigOffset:"<< ibigOffset;  //if continuing prematurely stopped run
+  if (LBACK>0) 
+    std::cout<<" lback:"<<LBACK;
+  std::cout<<endl;
+
+   if  (USE_AUTO_LEARNING_RATE && LBACK == 0) {
+    cerr<<"Can't use auto learning rate when LBACK==0";
+    exit(-1);
+   }
+
+ 
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;  //so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+    
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister)
+  mt19937 rngForChunks(seedForChunks);
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+
+  int series_len=(int)series_vect.size();
+  int chunkSize= series_len/NUM_OF_CHUNKS;
+  std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize<<endl;
+  uniform_int_distribution<int> uniOnSeries(0, chunkSize -1);  // closed interval [a, b]
+  
+  unordered_map<string, array<vector<float>, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5);
+  set<string> diagSeries;
+  
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) { //the loop :-)
+	  int ibigDb= ibigOffset+ibig;
+    string outputPath = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(seedForChunks) + "_"+ to_string(chunkNo)+"_"+ to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+
+#if defined USE_ODBC      
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif
+
+    ParameterCollection pc;
+    ParameterCollection perSeriesPC;
+
+    float learning_rate= INITIAL_LEARNING_RATE;
+    AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS);
+    trainer.clip_threshold = GRADIENT_CLIPPING;
+    AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+    perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING;  
+    
+    #if defined USE_RESIDUAL_LSTM
+      vector<ResidualDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      vector<AttentiveDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+       vector<DilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+    Parameter MLPW_par,MLPB_par;
+    if (ADD_NL_LAYER) { 
+      MLPW_par = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+      MLPB_par = pc.add_parameters({ STATE_HSIZE });
+    }
+    Parameter adapterW_par = pc.add_parameters({ OUTPUT_SIZE, STATE_HSIZE });
+    Parameter adapterB_par = pc.add_parameters({ OUTPUT_SIZE });
+
+    shuffle(series_vect.begin(), series_vect.end(), rngForChunks);//this shuffling is psudo random (it uses the same seed) so it is synchronized across pairs of workers
+    auto start= series_vect.begin()+ (chunkNo-1)*chunkSize;
+    auto end= start+ chunkSize;
+    if (chunkNo== NUM_OF_CHUNKS)
+      end = series_vect.end();
+    vector<string> oneChunk_vect(start,end);
+    if (PRINT_DIAGN) {
+      for (int k = 0; k<10; k++)  //diag
+        cout << oneChunk_vect[k] << " ";
+      cout << endl;
+    }  
+    if (chunkNo == NUM_OF_CHUNKS)
+      cout<<"last chunk size:"<< oneChunk_vect.size()<<endl;
+
+    unordered_map<string, AdditionalParams> additionalParams_map((int)oneChunk_vect.size()*1.5); //per series
+    unordered_map<string, array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup
+      string series = *iter;
+      AdditionalParams addParams;
+      addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5);  //level smoothing
+      addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5);    //seasonality smoothing
+      for (int isea = 0; isea<SEASONALITY; isea++)
+        addParams.initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);  //initial seasonality (over first SEASONALITY points)
+      additionalParams_map[series] = addParams;
+
+      historyOfAdditionalParams_map[series] = new array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>();
+    }
+    
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      if (!USE_AUTO_LEARNING_RATE && LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        trainer.learning_rate = LEARNING_RATES.at(iEpoch);
+        perSeriesTrainer.learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+        cout << "changing LR to:" << trainer.learning_rate << endl;
+      }
+
+      vector<float> testLosses; //test losses of all series in this epoch
+      vector<float> testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch 
+      vector<float> trainingLosses; //training losses of all series in one epoch
+      vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+      #if defined USE_ODBC
+      TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+      
+      for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) {
+        string series=*iter;
+        auto m4Obj = allSeries_map[series];
+
+        #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+        #endif
+      
+        ComputationGraph cg;
+         for (int il=0; il<dilations.size(); il++) {
+           rNNStack[il].new_graph(cg);
+           rNNStack[il].start_new_sequence(); 
+         }
+          
+        Expression MLPW_ex, MLPB_ex;
+        if (ADD_NL_LAYER) {   
+          MLPW_ex = parameter(cg, MLPW_par);
+          MLPB_ex = parameter(cg, MLPB_par);
+        }
+        Expression adapterW_ex=parameter(cg, adapterW_par);
+        Expression adapterB_ex=parameter(cg, adapterB_par);
+
+        auto additionalParams= additionalParams_map[series];
+        Expression levSm_ex = logistic(parameter(cg, additionalParams.levSm));  //level smoothing
+		    Expression sSm_ex = logistic(parameter(cg, additionalParams.sSm)); //seasonality smoothing
+
+			  vector<Expression> season_exVect;//vector, because we do not know how long the series is
+			  for (int iseas=0; iseas<SEASONALITY; iseas++){
+			    Expression seas=exp(parameter(cg, additionalParams.initSeasonality[iseas]));
+			    //so, when additionalParams_map[series].initSeasonality[iseas]==0 => seas==1
+			    season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+			  }
+			  season_exVect.push_back(season_exVect[0]);
+
+			  vector<Expression> logDiffOfLevels_vect;
+        vector<Expression> levels_exVect;
+			  Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+			  levels_exVect.push_back(lev);
+        for (int i=1; i<m4Obj.vals.size();i++) {  //Exponential Smoothing-style deseasonalization and smoothing
+			    Expression newLevel_ex=m4Obj.vals[i]*cdiv(levSm_ex,season_exVect[i]) + (1-levSm_ex)*levels_exVect[i-1];
+			    levels_exVect.push_back(newLevel_ex);
+			    Expression diff_ex=log(cdiv(newLevel_ex,levels_exVect[i-1]));//penalty for wiggliness of level
+			    logDiffOfLevels_vect.push_back(diff_ex);
+
+			    Expression newSeason_ex=m4Obj.vals[i]*cdiv(sSm_ex,newLevel_ex) + (1-sSm_ex)*season_exVect[i];
+			    season_exVect.push_back(newSeason_ex);
+        }
+         
+        Expression levelVarLoss_ex;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          vector<Expression> levelVarLoss_v;
+          for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+            Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+            levelVarLoss_v.push_back(diff_ex*diff_ex);
+          }
+          levelVarLoss_ex = average(levelVarLoss_v);
+        }
+
+			  //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+			  if (OUTPUT_SIZE_I>SEASONALITY) {
+			    unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY;
+			    for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++)
+			      season_exVect.push_back(season_exVect[startSeasonalityIndx+i]);
+			  }
+        vector<Expression> losses;
+        for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { 
+			    vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I;
+			    vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			    vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex=input(cg,{INPUT_SIZE},input_vect);
+			    Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);
+          joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+          }  catch (exception& e) {
+            cerr<<"cought exception 2 while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            cerr <<as_vector(input_ex.value())<<endl;
+          }
+          Expression out_ex;
+          if (ADD_NL_LAYER) {
+            out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+            out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+          } else 
+            out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+          //labels
+			    firstE = season_exVect.begin() +i+1;
+			    pastLastE = season_exVect.begin() +i+1+OUTPUT_SIZE_I;
+			    vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect);
+
+          first = m4Obj.vals.begin() +i+1;
+          pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I;
+          vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+          Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect);
+			    Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization
+          labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization
+			    Expression labels_ex=squash(labels1_ex);
+
+          Expression loss_ex=pinBallLoss(out_ex, labels_ex);
+          if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN)
+            losses.push_back(loss_ex);  
+        }
+        
+        Expression forecLoss_ex= average(losses);
+			  Expression loss_exp = forecLoss_ex;
+
+        float levVarLoss=0;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+          levVarLoss = as_scalar(levelVarLossP_ex.value());
+          levVarLosses.push_back(levVarLoss);
+          loss_exp= loss_exp + levelVarLossP_ex;
+        }
+
+        float cStateLoss=0;
+        if (C_STATE_PENALTY>0) {
+          vector<Expression> cStateLosses_vEx;
+          for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+            for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+              auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+              Expression penalty_ex = square(state_ex);
+              cStateLosses_vEx.push_back(sum_elems(penalty_ex));
+            }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        trainingLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+        forecLosses.push_back(forecastLoss);
+
+        cg.backward(loss_exp);
+        try {
+          trainer.update();//update shared weights
+          perSeriesTrainer.update();  //apdate params of this series only
+        } catch (exception& e) {  //long diagnostics for this unlikely event :-)
+          cerr<<"cought exception while doing "<<series<<endl;
+          cerr << e.what() << endl;
+
+            float minSeason = BIG_FLOAT;
+            cout << "season:";
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val = as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+
+            float minLevel = BIG_FLOAT;
+            cout << "levels:";
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax = 0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx = rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state = as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax = it;
+                      layerOfMax = il;
+                      chunkOfMax = irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            cout << " min season=" << minSeason << endl;
+            cout << " min level=" << minLevel << endl;
+            cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl;
+
+            //diagSeries.insert(series);
+          pc.reset_gradient();
+          perSeriesPC.reset_gradient();
+        }
+
+        //saving per-series values for diagnostics purposes
+        AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch);
+        histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+        histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+			  for (int isea=0; isea<SEASONALITY; isea++)
+			    histAdditionalParams.initSeasonality[isea]=as_scalar(season_exVect[isea].value());    
+		    if (iEpoch==1 || iEpoch == NUM_OF_TRAIN_EPOCHS /2 || iEpoch == NUM_OF_TRAIN_EPOCHS-1)
+          for (int iv = 0; iv<m4Obj.vals.size(); iv++) {
+            histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+          }
+          
+        //TEST. We walk (without learning) till end of the series. At the last point, the output is taken as the forecast
+        for (int i=(m4Obj.n - OUTPUT_SIZE_I); i<m4Obj.n; i++) {
+          vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I;
+          vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one
+          vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+          Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect);
+          Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization
+          joinedInput_ex.emplace_back(squash(input1_ex));
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+          }
+          catch (exception& e) {
+            cerr << "cought exception 2 while doing " << series << endl;
+            cerr << e.what() << endl;
+            cerr << as_vector(input_ex.value()) << endl;
+          }
+          if (i== m4Obj.n-1) {//make forecast
+            firstE = season_exVect.begin() + i + 1;
+            pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE_I;
+            vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+            Expression outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+            
+            out_ex = cmult(expand(out_ex), outputSeasonality_ex)*levels_exVect[i];//back to original scale
+            vector<float> out_vect = as_vector(out_ex.value());
+
+            if (LBACK > 0) {
+              float qLoss = errorFunc(out_vect, m4Obj.testVals);
+              testLosses.push_back(qLoss);
+            }
+
+            testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect;
+            if (iEpoch >= AVERAGING_LEVEL) {
+              if (USE_MEDIAN) {
+                if (testResults_map[series][AVERAGING_LEVEL].size() == 0)
+                  testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten
+                for (int iii = 0; iii < OUTPUT_SIZE_I; iii++) {
+                  vector<float> temp_vect2;
+                  for (int ii = 0; ii<AVERAGING_LEVEL; ii++)
+                    temp_vect2.push_back(testResults_map[series][ii][iii]);
+                  sort(temp_vect2.begin(), temp_vect2.end());
+                  testResults_map[series][AVERAGING_LEVEL][iii] = temp_vect2[MIDDLE_POS_FOR_AVG];
+                }
+              }
+              else {
+                vector<float> firstForec = testResults_map[series][0];
+                testResults_map[series][AVERAGING_LEVEL] = firstForec;
+                for (int ii = 1; ii<AVERAGING_LEVEL; ii++) {
+                  vector<float> nextForec = testResults_map[series][ii];
+                  for (int iii = 0; iii<OUTPUT_SIZE_I; iii++)
+                    testResults_map[series][AVERAGING_LEVEL][iii] += nextForec[iii];
+                }
+                for (int iii = 0; iii<OUTPUT_SIZE_I; iii++)
+                  testResults_map[series][AVERAGING_LEVEL][iii] /= AVERAGING_LEVEL;
+              }
+
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals);
+                testAvgLosses.push_back(qLoss);
+                
+                #if defined USE_ODBC       //save
+                TRYODBC(hInsertStmt,
+                  SQL_HANDLE_STMT,
+                  SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL));
+
+                for (int io = 0; io < OUTPUT_SIZE_I; io++) {
+                  int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io;
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL));
+
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io], 0, NULL));
+                }
+                if (MAX_NUM_OF_SERIES<0)
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLExecute(hInsertStmt));
+                #endif    
+              }
+            } //time to average
+          }//last anchor point of the series
+        }//through TEST loop        
+      }//through series
+
+  
+      if (iEpoch % FREQ_OF_TEST == 0) {
+        float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size();
+
+        cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forecast loss:" << averageForecLoss*100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+
+        float averageTestLoss=0;
+        if (LBACK > 0) {
+          float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size();
+          cout<<" Test loss:" << averageTestLoss;
+          if (iEpoch >= AVERAGING_LEVEL) {
+            float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch
+            cout << " avgLoss:" << averageTestAvgLoss;
+          }
+          if (USE_AUTO_LEARNING_RATE)
+            perfValid_vect.push_back(averageTestLoss);
+        }
+        cout << endl;
+      }
+      
+      if (USE_AUTO_LEARNING_RATE) {
+        bool changeL2Rate = false;
+        if (iEpoch >= 2) {
+          if (iEpoch < L3_PERIOD)
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - 2]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+          else
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - L3_PERIOD - 1]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+        }
+
+        if (changeL2Rate && learning_rate > MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) {
+          learning_rate /= LR_RATIO;
+          cout << "decreasing LR to:" << learning_rate << endl;
+          epochOfLastChangeOfLRate = iEpoch;
+          trainer.learning_rate = learning_rate;
+        }
+      }
+      #if defined USE_ODBC 
+      TRYODBC(hDbc,
+        SQL_HANDLE_DBC,
+        SQLEndTran(
+          SQL_HANDLE_DBC,
+          hDbc,
+          SQL_COMMIT));
+      #endif    
+    }//through epochs
+
+    if (PRINT_DIAGN) {//some diagnostic info
+      set<string> diagSeries;
+      for (int i = 0; i<1; i++) {//add a few normal ones
+        int irand = uniOnSeries(rng);
+        diagSeries.insert(oneChunk_vect[irand]);
+      }
+      for (auto series : diagSeries) {
+        cout << endl << series << endl;
+        array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series];
+        cout << "lSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levSm << " ";
+        cout << endl;
+        cout << "sSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).sSm << " ";
+        cout << endl;
+        cout << "seasons:" << endl;
+        for (int isea = 0; isea<SEASONALITY; isea++) {
+          for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).initSeasonality[isea] << " ";
+          cout << endl;
+        }
+        cout << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size()>0) {
+            cout << "levels:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", ";
+            cout << endl;
+            cout << "seas:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", ";
+            cout << endl;
+          }
+        }
+      }
+    }
+
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPath);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE_I; io++)
+        outputFile << ", "<< testResults_map[series][AVERAGING_LEVEL][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+
+
+    //delete
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      auto addHistArr_ptr= historyOfAdditionalParams_map[series];
+      delete addHistArr_ptr;
+    }
+  }//ibig
+}//main
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M41/M41.vcxproj b/118 - slaweks17/c++/windows_VisualStudio/M41/M41.vcxproj
new file mode 100644
index 0000000..79a86ac
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M41/M41.vcxproj	
@@ -0,0 +1,227 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|Win32">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|x64">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="ES_RNN.cc" />
+    <ClCompile Include="slstm.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="slstm.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{928301A0-F01A-48F6-A499-851B3CE8BD4E}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>M41</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.cpp b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.cpp
new file mode 100644
index 0000000..3935604
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.cpp	
@@ -0,0 +1,729 @@
+/*
+My implementation of dilated LSTMs, based on Dynet LSTM builders
+- DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
+- ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
+- AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
+*
+Slawek Smyl, Mar-May 2018
+*/
+
+#include "slstm.h"
+#include "dynet/lstm.h"
+#include "dynet/param-init.h"
+
+#include <fstream>
+#include <string>
+#include <vector>
+#include <iostream>
+
+#if defined DEBUG
+  #define _DEBUG
+#endif
+
+using namespace std;
+
+namespace dynet {
+
+  // ResidualDilatedLSTMBuilder based on Vanilla LSTM
+  enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG };
+  enum { LN_GH, LN_BH, LN_GX, LN_BX, LN_GC, LN_BC };
+
+  ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), ln_lstm(false), forget_bias(1.f), dropout_masks_valid(false) { }
+
+  ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
+    unsigned input_dim,
+    unsigned hidden_dim,
+    ParameterCollection& model,
+    bool ln_lstm, float forget_bias) : dilations(dilations), layers(unsigned(dilations.size())),
+      input_dim(input_dim), hid(hidden_dim), ln_lstm(ln_lstm), forget_bias(forget_bias), dropout_masks_valid(false) {
+    unsigned layer_input_dim = input_dim;
+    local_model = model.add_subcollection("ResidualDilated-lstm-builder");
+    for (unsigned i = 0; i < layers; ++i) {
+      // i
+      Parameter p_x2i = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
+      Parameter p_h2i = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
+      //Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim});
+      Parameter p_bi = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+
+      layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
+
+      vector<Parameter> ps = { p_x2i, p_h2i, /*p_c2i,*/ p_bi };
+      params.push_back(ps);
+
+      if (ln_lstm) {
+        Parameter p_gh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f));
+        Parameter p_bh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+        Parameter p_gx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f));
+        Parameter p_bx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+        Parameter p_gc = model.add_parameters({ hidden_dim }, ParameterInitConst(1.f));
+        Parameter p_bc = model.add_parameters({ hidden_dim }, ParameterInitConst(0.f));
+        vector<Parameter> ln_ps = { p_gh, p_bh, p_gx, p_bx, p_gc, p_bc };
+        ln_params.push_back(ln_ps);
+      }
+    }  // layers
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+
+  void ResidualDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
+    param_vars.clear();
+    if (ln_lstm)ln_param_vars.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      auto& p = params[i];
+      vector<Expression> vars;
+      for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); }
+      param_vars.push_back(vars);
+      if (ln_lstm) {
+        auto& ln_p = ln_params[i];
+        vector<Expression> ln_vars;
+        for (unsigned j = 0; j < ln_p.size(); ++j) { ln_vars.push_back(update ? parameter(cg, ln_p[j]) : const_parameter(cg, ln_p[j])); }
+        ln_param_vars.push_back(ln_vars);
+      }
+    }
+
+    _cg = &cg;
+  }
+  // layout: 0..layers = c
+  //         layers+1..2*layers = h
+  void ResidualDilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
+    h.clear();
+    c.clear();
+
+    if (hinit.size() > 0) {
+      DYNET_ARG_CHECK(layers * 2 == hinit.size(),
+        "ResidualDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
+        "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
+        hinit.size() << " expressions were passed in");
+      h0.resize(layers);
+      c0.resize(layers);
+      for (unsigned i = 0; i < layers; ++i) {
+        c0[i] = hinit[i];
+        h0[i] = hinit[i + layers];
+      }
+      has_initial_state = true;
+    }
+    else {
+      has_initial_state = false;
+    }
+
+    dropout_masks_valid = false;
+  }
+
+  void ResidualDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
+    masks.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      std::vector<Expression> masks_i;
+      unsigned idim = (i == 0) ? input_dim : hid;
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        float retention_rate = 1.f - dropout_rate;
+        float retention_rate_h = 1.f - dropout_rate_h;
+        float scale = 1.f / retention_rate;
+        float scale_h = 1.f / retention_rate_h;
+        // in
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
+        // h
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
+        masks.push_back(masks_i);
+      }
+    }
+    dropout_masks_valid = true;
+  }
+
+  ParameterCollection & ResidualDilatedLSTMBuilder::get_parameter_collection() {
+    return local_model;
+  }
+
+  // TODO - Make this correct
+  // Copied c from the previous step (otherwise c.size()< h.size())
+  // Also is creating a new step something we want?
+  // wouldn't overwriting the current one be better?
+  Expression ResidualDilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
+    DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
+      "ResidualDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
+      h_new.size() << " inputs for " << layers << " layers");
+    const unsigned t = h.size();
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = h_new[i];
+      Expression c_i = c[t - 1][i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  // Current implementation : s_new is either {new_c[0],...,new_c[n]}
+  // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
+  Expression ResidualDilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
+    DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
+      "ResidualDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
+    bool only_c = s_new.size() == layers;
+    const unsigned t = c.size();
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
+      Expression c_i = s_new[i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+
+  Expression ResidualDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    vector<Expression>& ht = h.back();
+    vector<Expression>& ct = c.back();
+    Expression in = x;
+    if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
+    for (unsigned i = 0; i < layers; ++i) {
+    	int dilation_offset = dilations[i] - 1;
+      const vector<Expression>& vars = param_vars[i];
+
+      Expression i_h_tm1, i_c_tm1;
+      bool has_prev_state = (prev >= 0 || has_initial_state);
+      if (prev < dilation_offset) {
+        if (has_initial_state) {
+          // intial value for h and c at timestep 0 in layer i
+          // defaults to zero matrix input if not set in add_parameter_edges
+          i_h_tm1 = h0[i];
+          i_c_tm1 = c0[i];
+        }
+        else {
+          i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
+          i_c_tm1 = i_h_tm1;
+        }
+      }
+      else {
+        i_h_tm1 = h[prev - dilation_offset][i];
+        i_c_tm1 = c[prev - dilation_offset][i];
+      }
+      // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
+      if (dropout_rate > 0.f) {
+        in = cmult(in, masks[i][0]);
+      }
+      if (has_prev_state && dropout_rate_h > 0.f)
+        i_h_tm1 = cmult(i_h_tm1, masks[i][1]);
+      // input
+      Expression tmp;
+      Expression i_ait;
+      Expression i_aft;
+      Expression i_aot;
+      Expression i_agt;
+      if (ln_lstm) {
+        const vector<Expression>& ln_vars = ln_param_vars[i];
+        if (has_prev_state)
+          tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]) + layer_norm(vars[_H2I] * i_h_tm1, ln_vars[LN_GH], ln_vars[LN_BH]);
+        else
+          tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]);
+      }
+      else {
+        if (has_prev_state)
+          tmp = affine_transform({ vars[_BI], vars[_X2I], in, vars[_H2I], i_h_tm1 });
+        else
+          tmp = affine_transform({ vars[_BI], vars[_X2I], in });
+      }
+      i_ait = pick_range(tmp, 0, hid);
+      i_aft = pick_range(tmp, hid, hid * 2);
+      i_aot = pick_range(tmp, hid * 2, hid * 3);
+      i_agt = pick_range(tmp, hid * 3, hid * 4);
+      Expression i_it = logistic(i_ait);
+      if (forget_bias != 0.0)
+        tmp = logistic(i_aft + forget_bias);
+      else
+        tmp = logistic(i_aft);
+
+      Expression i_ft = tmp;
+      Expression i_ot = logistic(i_aot);
+      Expression i_gt = tanh(i_agt);
+
+      ct[i] = has_prev_state ? (cmult(i_ft, i_c_tm1) + cmult(i_it, i_gt)) : cmult(i_it, i_gt);
+      if (ln_lstm) {
+        const vector<Expression>& ln_vars = ln_param_vars[i];
+        if (i==0)
+        	in = ht[i] = cmult(i_ot, tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC])));
+        else
+        	in = ht[i] = cmult(i_ot, in+tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC])));
+      }
+      else  {
+      	if (i==0)
+          in = ht[i] = cmult(i_ot, tanh(ct[i]));
+      	else
+      		in = ht[i] = cmult(i_ot, in+tanh(ct[i]));
+      }
+    }
+    return ht.back();
+  }
+
+  void ResidualDilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
+    const ResidualDilatedLSTMBuilder & rnn_lstm = (const ResidualDilatedLSTMBuilder&)rnn;
+    DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
+      "Attempt to copy ResidualDilatedLSTMBuilder with different number of parameters "
+      "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
+    for (size_t i = 0; i < params.size(); ++i)
+      for (size_t j = 0; j < params[i].size(); ++j)
+        params[i][j] = rnn_lstm.params[i][j];
+    for (size_t i = 0; i < ln_params.size(); ++i)
+      for (size_t j = 0; j < ln_params[i].size(); ++j)
+        ln_params[i][j] = rnn_lstm.ln_params[i][j];
+  }
+
+  void ResidualDilatedLSTMBuilder::set_dropout(float d) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d;
+  }
+
+  void ResidualDilatedLSTMBuilder::set_dropout(float d, float d_h) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d_h;
+  }
+
+  void ResidualDilatedLSTMBuilder::disable_dropout() {
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+
+
+
+
+  //enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG };
+  enum { _X2I_, _H2I_, _BI_, _XA1, _HA1, _SA1, _BA1, _A2, _B2 };
+
+
+//***************************
+
+
+  
+  AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { }
+  
+  AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
+                                         unsigned input_dim,
+                                         unsigned hidden_dim,
+                                         unsigned attention_dim,
+                                         ParameterCollection& model)
+  : max_dilations(max_dilations), layers(unsigned(max_dilations.size())),
+    input_dim(input_dim), hid(hidden_dim), attention_dim(attention_dim), weightnoise_std(0), dropout_masks_valid(false) {
+    unsigned layer_input_dim = input_dim;
+    local_model = model.add_subcollection("compact-vanilla-lstm-builder");
+    for (unsigned i = 0; i < layers; ++i) {
+      // i
+      Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
+      Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
+      Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+      
+      Parameter p_Wxa1 = local_model.add_parameters({ attention_dim, layer_input_dim });
+      Parameter p_Wha1 = local_model.add_parameters({ attention_dim, hidden_dim });
+      Parameter p_Wsa1 = local_model.add_parameters({ attention_dim, hidden_dim });
+      Parameter p_ba1 = local_model.add_parameters({ attention_dim }, ParameterInitConst(0.f));
+      
+      Parameter p_Wa2 = local_model.add_parameters({ max_dilations[i], attention_dim });
+      Parameter p_ba2 = local_model.add_parameters({ max_dilations[i] }, ParameterInitConst(0.f));
+      
+      layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
+      
+      vector<Parameter> ps = { p_Wx, p_Wh, p_b, p_Wxa1, p_Wha1, p_Wsa1, p_ba1, p_Wa2, p_ba2 };
+      params.push_back(ps);
+      
+    }  // layers
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
+    param_vars.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      auto& p = params[i];
+      vector<Expression> vars;
+      for (unsigned j = 0; j < p.size(); ++j) { 
+        vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); 
+      }
+      param_vars.push_back(vars);
+    }
+    
+    _cg = &cg;
+  }
+  // layout: 0..layers = c
+  //         layers+1..2*layers = h
+  void AttentiveDilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
+    h.clear();
+    c.clear();
+    
+    if (hinit.size() > 0) {
+      DYNET_ARG_CHECK(layers * 2 == hinit.size(),
+                      "AttentiveDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
+                      "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
+                      hinit.size() << " expressions were passed in");
+      h0.resize(layers);
+      c0.resize(layers);
+      for (unsigned i = 0; i < layers; ++i) {
+        c0[i] = hinit[i];
+        h0[i] = hinit[i + layers];
+      }
+      has_initial_state = true;
+    }
+    else {
+      has_initial_state = false;
+    }
+    
+    dropout_masks_valid = false;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
+    masks.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      std::vector<Expression> masks_i;
+      unsigned idim = (i == 0) ? input_dim : hid;
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        float retention_rate = 1.f - dropout_rate;
+        float retention_rate_h = 1.f - dropout_rate_h;
+        float scale = 1.f / retention_rate;
+        float scale_h = 1.f / retention_rate_h;
+        // in
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
+        // h
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
+        masks.push_back(masks_i);
+      }
+    }
+    dropout_masks_valid = true;
+  }
+  
+  ParameterCollection & AttentiveDilatedLSTMBuilder::get_parameter_collection() {
+    return local_model;
+  }
+  
+  // TODO - Make this correct
+  // Copied c from the previous step (otherwise c.size()< h.size())
+  // Also is creating a new step something we want?
+  // wouldn't overwriting the current one be better?
+  Expression AttentiveDilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
+    DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
+                    "AttentiveDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
+                    h_new.size() << " inputs for " << layers << " layers");
+    const unsigned t = unsigned(h.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = h_new[i];
+      Expression c_i = c[t - 1][i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  // Current implementation : s_new is either {new_c[0],...,new_c[n]}
+  // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
+  Expression AttentiveDilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
+    DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
+                    "AttentiveDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
+    bool only_c = s_new.size() == layers;
+    const unsigned t = unsigned(c.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
+      Expression c_i = s_new[i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  
+  Expression AttentiveDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    vector<Expression>& ht = h.back();
+    vector<Expression>& ct = c.back();
+    Expression in = x;
+    if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
+    for (unsigned i = 0; i < layers; ++i) {
+      int dilation_offset= max_dilations[i]-1;
+      const vector<Expression>& vars = param_vars[i];
+      Expression i_h_tm1, i_c_tm1;
+      if (prev < dilation_offset) {
+        if (has_initial_state) {
+          // initial value for h and c at timestep 0 in layer i
+          // defaults to zero matrix input if not set in add_parameter_edges
+          i_h_tm1 = h0[i];
+          i_c_tm1 = c0[i];
+        }
+        else {
+          i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
+          i_c_tm1 = i_h_tm1;
+        }
+      }
+      else {
+        if (dilation_offset>0) {
+          //enum { _X2I, _H2I, _BI, _XA1, _HA1, _SA1, _BA1, _A2, _B2 };
+          Expression weights_ex=vars[_XA1]*in+ vars[_HA1]*h[prev][i]+ vars[_SA1]*c[prev][i]+ vars[_BA1];
+          weights_ex=tanh(weights_ex);
+          weights_ex=vars[_A2]* weights_ex+ vars[_B2];
+          weights_ex =softmax(weights_ex);
+          #if defined _DEBUG
+            vector<float> weights=as_vector(weights_ex.value());
+          #endif
+
+          unsigned indx=0;
+          Expression w_ex = pick(weights_ex, indx);
+          Expression avg_h= cmult(h[prev][i], w_ex);
+          for (indx=1; indx <= dilation_offset; indx++) {//dilation_offset==max_dilations[i]-1, so together with indx==0, we cover max_dilations[i] steps
+            w_ex = pick(weights_ex, indx);
+            avg_h = avg_h+cmult(h[prev- indx][i], w_ex);
+          }
+          i_h_tm1 = avg_h;
+        } else {
+          i_h_tm1 = h[prev- dilation_offset][i];
+        }
+        i_c_tm1 = c[prev- dilation_offset][i];
+      }
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
+        Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      }
+      else {
+        Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      }
+    }
+    return ht.back();
+  }
+  
+  void AttentiveDilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
+    const AttentiveDilatedLSTMBuilder & rnn_lstm = (const AttentiveDilatedLSTMBuilder&)rnn;
+    DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
+                    "Attempt to copy AttentiveDilatedLSTMBuilder with different number of parameters "
+                    "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
+    for (size_t i = 0; i < params.size(); ++i)
+      for (size_t j = 0; j < params[i].size(); ++j)
+        params[i][j] = rnn_lstm.params[i][j];
+  }
+  
+  void AttentiveDilatedLSTMBuilder::set_dropout(float d) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
+                    "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::set_dropout(float d, float d_h) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
+                    "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d_h;
+  }
+  
+  void AttentiveDilatedLSTMBuilder::disable_dropout() {
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+  void AttentiveDilatedLSTMBuilder::set_weightnoise(float std) {
+    DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0");
+    weightnoise_std = std;
+  }
+
+  //*/
+
+  DilatedLSTMBuilder::DilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { }
+
+  DilatedLSTMBuilder::DilatedLSTMBuilder(vector<unsigned> dilations,
+    unsigned input_dim,
+    unsigned hidden_dim,
+    ParameterCollection& model)
+    : dilations(dilations), layers(unsigned(dilations.size())),
+    input_dim(input_dim), hid(hidden_dim), weightnoise_std(0), dropout_masks_valid(false) {
+    unsigned layer_input_dim = input_dim;
+    local_model = model.add_subcollection("compact-vanilla-lstm-builder");
+    for (unsigned i = 0; i < layers; ++i) {
+      // i
+      Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
+      Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
+      Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
+
+      layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
+
+      vector<Parameter> ps = { p_Wx, p_Wh, p_b };
+      params.push_back(ps);
+
+    }  // layers
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+
+  void DilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
+    param_vars.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      auto& p = params[i];
+      vector<Expression> vars;
+      for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); }
+      param_vars.push_back(vars);
+    }
+
+    _cg = &cg;
+  }
+  // layout: 0..layers = c
+  //         layers+1..2*layers = h
+  void DilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
+    h.clear();
+    c.clear();
+
+    if (hinit.size() > 0) {
+      DYNET_ARG_CHECK(layers * 2 == hinit.size(),
+        "DilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
+        "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
+        hinit.size() << " expressions were passed in");
+      h0.resize(layers);
+      c0.resize(layers);
+      for (unsigned i = 0; i < layers; ++i) {
+        c0[i] = hinit[i];
+        h0[i] = hinit[i + layers];
+      }
+      has_initial_state = true;
+    } else {
+      has_initial_state = false;
+    }
+
+    dropout_masks_valid = false;
+  }
+
+  void DilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
+    masks.clear();
+    for (unsigned i = 0; i < layers; ++i) {
+      std::vector<Expression> masks_i;
+      unsigned idim = (i == 0) ? input_dim : hid;
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        float retention_rate = 1.f - dropout_rate;
+        float retention_rate_h = 1.f - dropout_rate_h;
+        float scale = 1.f / retention_rate;
+        float scale_h = 1.f / retention_rate_h;
+        // in
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
+        // h
+        masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
+        masks.push_back(masks_i);
+      }
+    }
+    dropout_masks_valid = true;
+  }
+
+  ParameterCollection & DilatedLSTMBuilder::get_parameter_collection() {
+    return local_model;
+  }
+
+  // TODO - Make this correct
+  // Copied c from the previous step (otherwise c.size()< h.size())
+  // Also is creating a new step something we want?
+  // wouldn't overwriting the current one be better?
+  Expression DilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
+    DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
+      "DilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
+      h_new.size() << " inputs for " << layers << " layers");
+    const unsigned t = unsigned(h.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = h_new[i];
+      Expression c_i = c[t - 1][i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+  // Current implementation : s_new is either {new_c[0],...,new_c[n]}
+  // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
+  Expression DilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
+    DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
+      "DilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
+    bool only_c = s_new.size() == layers;
+    const unsigned t = unsigned(c.size());
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    for (unsigned i = 0; i < layers; ++i) {
+      Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
+      Expression c_i = s_new[i];
+      h[t][i] = h_i;
+      c[t][i] = c_i;
+    }
+    return h[t].back();
+  }
+
+  Expression DilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
+    h.push_back(vector<Expression>(layers));
+    c.push_back(vector<Expression>(layers));
+    vector<Expression>& ht = h.back();
+    vector<Expression>& ct = c.back();
+    Expression in = x;
+    if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
+    for (unsigned i = 0; i < layers; ++i) {
+      int dilation_offset = dilations[i] - 1;
+      const vector<Expression>& vars = param_vars[i];
+      Expression i_h_tm1, i_c_tm1;
+      if (prev < dilation_offset) {
+        if (has_initial_state) {
+          // initial value for h and c at timestep 0 in layer i
+          // defaults to zero matrix input if not set in add_parameter_edges
+          i_h_tm1 = h0[i];
+          i_c_tm1 = c0[i];
+        } else {
+          i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
+          i_c_tm1 = i_h_tm1;
+        }
+      } else {  // t > 0
+        i_h_tm1 = h[prev - dilation_offset][i];
+        i_c_tm1 = c[prev - dilation_offset][i];
+      }
+      if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
+        // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
+        Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      } else {
+        Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std);
+        ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
+        in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
+      }
+    }
+    return ht.back();
+  }
+
+  void DilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
+    const DilatedLSTMBuilder & rnn_lstm = (const DilatedLSTMBuilder&)rnn;
+    DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
+      "Attempt to copy DilatedLSTMBuilder with different number of parameters "
+      "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
+    for (size_t i = 0; i < params.size(); ++i)
+      for (size_t j = 0; j < params[i].size(); ++j)
+        params[i][j] = rnn_lstm.params[i][j];
+  }
+
+  void DilatedLSTMBuilder::set_dropout(float d) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d;
+  }
+
+  void DilatedLSTMBuilder::set_dropout(float d, float d_h) {
+    DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
+      "dropout rate must be a probability (>=0 and <=1)");
+    dropout_rate = d;
+    dropout_rate_h = d_h;
+  }
+
+  void DilatedLSTMBuilder::disable_dropout() {
+    dropout_rate = 0.f;
+    dropout_rate_h = 0.f;
+  }
+  void DilatedLSTMBuilder::set_weightnoise(float std) {
+    DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0");
+    weightnoise_std = std;
+  }
+
+} // namespace dynet
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.h
new file mode 100644
index 0000000..adb63a7
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M41/slstm.h	
@@ -0,0 +1,394 @@
+/**
+* file slstm.h
+* header for my implementation of dilated LSTMs, based on Dynet LSTM builders
+  - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
+  - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
+  - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
+*
+Slawek Smyl, Mar-May 2018
+*/
+
+#ifndef DYNET_SLSTMS_H_
+#define DYNET_SLSTMS_H_
+
+#include "dynet/dynet.h"
+#include "dynet/rnn.h"
+#include "dynet/expr.h"
+
+using namespace std;
+
+namespace dynet {
+
+  //basd on VanillaLSTMBuilder
+  struct ResidualDilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    ResidualDilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the ResidualDilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    * \param ln_lstm Whether to use layer normalization
+    * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
+    */
+    explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model,
+      bool ln_lstm = false,
+      float forget_bias = 1.f);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+    /**
+    * \brief Get parameters in ResidualDilatedLSTMBuilder
+    * \return list of points to ParameterStorage objects
+    */
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> ln_params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> ln_param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    bool ln_lstm;
+    float forget_bias;
+    bool dropout_masks_valid;
+    vector<unsigned> dilations; //one int per layer
+
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+
+
+  struct DilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    DilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the DilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    */
+    explicit DilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> dilations; //one int per layer
+
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+  
+  
+  struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
+    /**
+     * @brief Default Constructor
+     */
+    AttentiveDilatedLSTMBuilder();
+    /**
+     * \brief Constructor for the AttentiveDilatedLSTMBuilder
+     *
+     * \param max_dilations Vector, maximum dilations (per layer)
+     * \param input_dim Dimention of the input \f$x_t\f$
+     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+     * \param model ParameterCollection holding the parameters
+     */
+    explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
+                                unsigned input_dim,
+                                unsigned hidden_dim,
+                                unsigned attention_dim,
+                                ParameterCollection& model);
+    
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+    
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+    
+    void copy(const RNNBuilder & params) override;
+    
+    /**
+     * \brief Set the dropout rates to a unique value
+     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+     * \param d Dropout rate to be applied on all of \f$x,h\f$
+     */
+    void set_dropout(float d);
+    /**
+     * \brief Set the dropout rates
+     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+     * The dynamics of the cell are then modified to :
+     *
+     * \f$
+     * \begin{split}
+     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+     h_t & = \tanh(c_t)\circ o_t\\
+     \end{split}
+     * \f$
+     *
+     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+     */
+    void set_dropout(float d, float d_r);
+    /**
+     * \brief Set all dropout rates to 0
+     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+     *
+     */
+    void disable_dropout();
+    /**
+     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+     * \details If this function is not called on batched input, the same mask will be applied across
+     * all batch elements. Use this to apply different masks to each batch element
+     *
+     * \param batch_size Batch size
+     */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+    
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+    
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+    
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    unsigned attention_dim;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> max_dilations; //one int per layer
+    
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+    
+  };
+} // namespace dynet
+
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M42/ES_RNN_PI.cc b/118 - slaweks17/c++/windows_VisualStudio/M42/ES_RNN_PI.cc
new file mode 100644
index 0000000..268c654
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M42/ES_RNN_PI.cc	
@@ -0,0 +1,1246 @@
+/*ES-RNN: ES-RNN Exponential Smoothing Recurrent Neural Network hybrid. Prediction intervals.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention.
+It is meant to be used for Monthly and Quarterly series of M4 competition, becasue the DE (Diversified Ensemble) version is too slow.
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+It has to be invoked in pair of executables, passing at least two integers: seedForChunks, chunkNo
+so e.g. create a script with following lines on Windows
+start <this_executable> 10 1
+start <this_executable> 10 2
+Modern computers have at more then 2 cores, so e.g. on 6-core machine create and run the following script with 3 pairs of workers:
+# start <this_executable> 10 1 0
+# start <this_executable> 10 2 0
+# start <this_executable> 20 1 5
+# start <this_executable> 20 2 5
+# start <this_executable> 30 1 10
+# start <this_executable> 30 2 10
+seedForChunks have to be the same withion one pair, chunk numbers have to be 1 and 2.
+We have added here the third parameter: ibigOffset. The straddle should be equal or bigger than BIG_LOOP.
+Each pair goes through BIG_LOOP (by default 3, change in code below if you want) of model fitting and prediction, 
+so 2 pairs, as above, will produce 6 forecasts to be ensembled later, in R.
+By increasing number of pairs, e.g. to 6 on 12-core computer, one can reduce BIG_LOOP to 1, so reduce execution time, and still have 6 forecasts - 
+a decent number to ensemble (in a separate, supplied R script).
+
+There are three blocks of parameters below, one active (starting with //PARAMS--------------) and two inactive. 
+The active block is setup as in the final run of forecasting quarterly series. Similarly Monthly block. 
+The Daily block is more of a demo, allowing to run quickly forecast for Daily series, although with slightly worse performance (use another program ES_RNN_E.cc for it). It was not used for the final submission. 
+So, you need comment/uncomment to have one block of interest active.
+
+
+*/
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+#include <fstream>
+#include <sstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+string VARIABLE = "Quarterly";
+const string run0 = "(1,2),(4,8), LR=1e-3/{7,3e-4f},{11,1e-4f}, EPOCHS=16, LVP=200 40*";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+vector<vector<unsigned>> dilations = { { 1,2 },{ 4,8 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion
+const float INITIAL_LEARNING_RATE = 1e-3f;
+//else
+const map<int, float> LEARNING_RATES = { { 7,3e-4f },{ 11,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1; //multiplier for per-series parameters' learning rate.
+
+const float ALPHA = 0.05;
+const float TAUL = ALPHA / 2;
+const float TAUH = 1 - TAUL;
+const float ALPHA_MULTIP = 2 / ALPHA;
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;  //whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layer
+
+const int NUM_OF_TRAIN_EPOCHS = 16;
+const unsigned int STATE_HSIZE = 40;
+
+const int SEASONALITY = 4;
+const unsigned int INPUT_SIZE = 4;
+const int INPUT_SIZE_I = INPUT_SIZE;
+const unsigned int OUTPUT_SIZE = 8;
+const int OUTPUT_SIZE_I = OUTPUT_SIZE;
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2;
+const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+
+const float LEVEL_VARIABILITY_PENALTY = 200;  //Multiplier for L" penalty against wigglines of level vector. 
+
+
+/*
+string VARIABLE = "Monthly";
+const string run0 = "Res(1,3,6,12), LR=1e-3 {8,3e-4f},{13,1e-4f}, EPOCHS=14, LVP=50, 20*";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+vector<vector<unsigned>> dilations = { { 1,3,6,12 } };//Each vector represents one chunk of Dilateed LSTMS, connected in resnNet fashion^M
+const float INITIAL_LEARNING_RATE = 1e-3f;
+const map<int, float> LEARNING_RATES = { { 8,3e-4f },{ 13,1e-4f } }; //at which epoch we set them up to what^M
+const float PER_SERIES_LR_MULTIP = 1;
+
+const int NUM_OF_TRAIN_EPOCHS = 14;
+const unsigned int STATE_HSIZE = 50;
+
+const float LEVEL_VARIABILITY_PENALTY = 50;  //Multiplier for L" penalty against wigglines of level vector.
+
+const int SEASONALITY = 12;
+const unsigned int OUTPUT_SIZE = 18;
+const unsigned int INPUT_SIZE = 12;
+const int INPUT_SIZE_I = INPUT_SIZE;
+const int OUTPUT_SIZE_I = OUTPUT_SIZE;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = INPUT_SIZE_I + OUTPUT_SIZE_I + MIN_INP_SEQ_LEN + 2;
+const int MAX_SERIES_LENGTH = 40 * SEASONALITY + MIN_SERIES_LENGTH; //we are chopping longer series, to last, max e.g. 40 years
+
+const float ALPHA = 0.05;
+const float TAUL = ALPHA / 2;
+const float TAUH = 1 - TAUL;
+const float ALPHA_MULTIP = 2 / ALPHA;
+*/
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 40;
+#else
+  const int MAX_NUM_OF_SERIES = -1; //use all series
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;//in data provided
+const int BIG_LOOP = 3;
+const int NUM_OF_CHUNKS = 2;
+const float EPS=1e-6;
+const int AVERAGING_LEVEL=5;
+const bool USE_MEDIAN = false;
+const int MIDDLE_POS_FOR_AVG = 2; //if using medians
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=20;
+const float C_STATE_PENALTY = 0;
+
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN=true;
+const unsigned ATTENTION_HSIZE=STATE_HSIZE;
+
+const bool USE_AUTO_LEARNING_RATE=false;
+//if USE_AUTO_LEARNING_RATE, and only if LBACK>0
+const float MIN_LEARNING_RATE = 0.0001f;
+const float LR_RATIO = sqrt(10);
+const float LR_TOLERANCE_MULTIP = 1.005;
+const int L3_PERIOD = 2;
+const int MIN_EPOCHS_BEFORE_CHANGING_LRATE = 2;
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  float meanAbsSeasDiff;
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+
+    meanAbsSeasDiff = 0;
+    float sumf = 0;
+    for (int ip = SEASONALITY; ip<vals.size(); ip++) {
+      float diff = vals[ip] - vals[ip - SEASONALITY];
+      sumf += abs(diff);
+    }
+    if (sumf>0)
+      meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY);
+
+    if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE_I) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE_I;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE_I;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE_I); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) {//chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+
+struct AdditionalParams {//Per series, important
+  Parameter levSm;
+  Parameter sSm;
+  array<Parameter, SEASONALITY> initSeasonality;
+};
+
+struct AdditionalParamsF {//Used for storing diagnostics
+  float levSm;
+  float sSm;
+  array<float, SEASONALITY> initSeasonality;
+  vector<float> levels;
+  vector<float> seasons;
+};
+
+//loss function
+Expression MSIS(const Expression& out_ex, const Expression& actuals_ex) {
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = pick(out_ex, indx);
+    auto forecH = pick(out_ex, indx+ OUTPUT_SIZE);
+    auto actual = pick(actuals_ex, indx);
+    float actualf= as_scalar(actual.value());
+
+    Expression loss= forecH - forecL;
+    if (actualf< as_scalar(forecL.value()))
+      loss=loss+(forecL - actual)*ALPHA_MULTIP;
+    if (actualf > as_scalar(forecH.value()))
+      loss = loss + (actual - forecH)*ALPHA_MULTIP;
+    losses.push_back(loss);
+  }
+  Expression ret = sum(losses) / OUTPUT_SIZE;
+  #if defined _DEBUG
+  float retf = as_scalar(ret.value());
+  if (retf>100) {
+    vector<float> out_vect = as_vector(out_ex.value());
+    vector<float> actuals_vect = as_vector(actuals_ex.value());
+    for (int i = 0; i<OUTPUT_SIZE; i++) {
+      cout << out_vect[i] << " " << actuals_vect[i] << endl;
+    }
+    cout << "ret:" << retf;
+    cout << endl;
+  }
+  #endif 
+  return ret;
+}
+
+
+// weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+  float sumf = 0; float suma = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx+ offset];
+    auto actual = actuals_vect[indx];
+    suma += abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*tau;
+    else
+      sumf = sumf + (actual - forec)*(tau - 1);
+  }
+  return sumf / suma * 200;
+}
+
+//MSIS operating on floats, used for validation
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect, float meanAbsSeasDiff) {
+  float sumf=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = out_vect[indx];
+    auto forecH = out_vect[indx + OUTPUT_SIZE];
+    auto actualf = actuals_vect[indx];
+
+    float loss = forecH - forecL;
+    if (actualf< forecL)
+      loss = loss + (forecL - actualf)*ALPHA_MULTIP;
+    if (actualf > forecH)
+      loss = loss + (actualf - forecH)*ALPHA_MULTIP;
+    sumf+=loss;
+  }
+  return sumf / (OUTPUT_SIZE*meanAbsSeasDiff);
+}
+
+
+
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int seedForChunks = 10; //Yes it runs, without any params, but it will work only on 1/NUM_OF_CHUNKS of all cases. The system is expected to run in NUM_OF_CHUNKS multiples.
+  int chunkNo = 1;
+  int ibigOffset = 0;
+  if (argc >= 3) {
+    seedForChunks = atoi(argv[1]);
+    chunkNo = atoi(argv[2]);
+  } 
+  if (argc >= 4)
+	  ibigOffset = atoi(argv[3]);
+
+  if (chunkNo > NUM_OF_CHUNKS) {
+    cerr << "chunkNo > NUM_OF_CHUNKS";
+    exit(-1);
+  }
+  else if (chunkNo <= 0) {
+    cerr << "chunkNo <= 0";
+    exit(-1);
+  }
+
+  cout<<VARIABLE<<" "<<runL<<endl;
+  std::cout << "seed:" << seedForChunks << " chunk no:" << chunkNo;
+  if (ibigOffset>0) 
+    std::cout<< " ibigOffset:"<< ibigOffset;  //if continuing prematurely stopped run
+  if (LBACK>0) 
+    std::cout<<" lback:"<<LBACK;
+  std::cout<<endl;
+
+   if  (USE_AUTO_LEARNING_RATE && LBACK == 0) {
+    cerr<<"Can't use auto learning rate when LBACK==0";
+    exit(-1);
+   }
+
+ 
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+  convert2 << int(ALPHA * 100);
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;  //so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE_I; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE_I+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+    
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister)
+  mt19937 rngForChunks(seedForChunks);
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 48/2=24k, for monthly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      if (m4Obj.meanAbsSeasDiff==0) {
+        cout<<"Warning, flat series:"<<series<<endl;
+        m4Obj.meanAbsSeasDiff= m4Obj.testVals[0]/100;
+      }
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+
+  int series_len=(int)series_vect.size();
+  int chunkSize= series_len/NUM_OF_CHUNKS;
+  std::cout << "num of series:" << series_vect.size() <<" size of chunk:"<< chunkSize<<endl;
+  uniform_int_distribution<int> uniOnSeries(0, chunkSize -1);  // closed interval [a, b]
+  
+  unordered_map<string, array<vector<float>, AVERAGING_LEVEL+1>> testResults_map((int)chunkSize*1.5);
+  set<string> diagSeries;
+  
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) { //the loop :-)
+	  int ibigDb= ibigOffset+ibig;
+    string outputPathL = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(ibigDb)+"_LLB"+ to_string(LBACK)+ ".csv";
+    string outputPathH = OUTPUT_DIR + '/' + VARIABLE + "_" + to_string(ibigDb) + "_HLB" + to_string(LBACK) + ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+    
+#if defined USE_ODBC        
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif 
+
+    ParameterCollection pc;
+    ParameterCollection perSeriesPC;
+
+    float learning_rate= INITIAL_LEARNING_RATE;
+    AdamTrainer trainer(pc, learning_rate, 0.9, 0.999, EPS);
+    trainer.clip_threshold = GRADIENT_CLIPPING;
+    AdamTrainer perSeriesTrainer(perSeriesPC, learning_rate*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+    perSeriesTrainer.clip_threshold = GRADIENT_CLIPPING;  
+    
+    #if defined USE_RESIDUAL_LSTM
+      vector<ResidualDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      vector<AttentiveDilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+       vector<DilatedLSTMBuilder> rNNStack;
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+    Parameter MLPW_par,MLPB_par;
+    if (ADD_NL_LAYER) { 
+      MLPW_par = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+      MLPB_par = pc.add_parameters({ STATE_HSIZE });
+    }
+    Parameter adapterW_par = pc.add_parameters({ OUTPUT_SIZE*2, STATE_HSIZE });
+    Parameter adapterB_par = pc.add_parameters({ OUTPUT_SIZE*2 });
+
+    shuffle(series_vect.begin(), series_vect.end(), rngForChunks);//this shuffling is psudo random (it uses the same seed) so it is synchronized across pairs of wokers
+    auto start= series_vect.begin()+ (chunkNo-1)*chunkSize;
+    auto end= start+ chunkSize;
+    if (chunkNo== NUM_OF_CHUNKS)
+      end = series_vect.end();
+    vector<string> oneChunk_vect(start,end);
+    if (PRINT_DIAGN) {
+      for (int k = 0; k<10; k++)  //diag
+        cout << oneChunk_vect[k] << " ";
+      cout << endl;
+    }  
+    if (chunkNo == NUM_OF_CHUNKS)
+      cout<<"last chunk size:"<< oneChunk_vect.size()<<endl;
+
+    unordered_map<string, AdditionalParams> additionalParams_map((int)oneChunk_vect.size()*1.5); //per series
+    unordered_map<string, array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>*> historyOfAdditionalParams_map((int)oneChunk_vect.size()*1.5);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {//setup
+      string series = *iter;
+      AdditionalParams addParams;
+      addParams.levSm = perSeriesPC.add_parameters({ 1 }, 0.5);  //level smoothing
+      addParams.sSm = perSeriesPC.add_parameters({ 1 }, 0.5);    //seasonality smoothing
+      for (int isea = 0; isea<SEASONALITY; isea++)
+        addParams.initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);  //initial seasonality (over first SEASONALITY points)
+      additionalParams_map[series] = addParams;
+
+      historyOfAdditionalParams_map[series] = new array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>();
+    }
+    
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      if (!USE_AUTO_LEARNING_RATE && LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        trainer.learning_rate = LEARNING_RATES.at(iEpoch);
+        cout << "changing LR to:" << trainer.learning_rate << endl;
+        perSeriesTrainer.learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+      }
+
+      vector<float> testLosses; //test losses of all series in this epoch
+      vector<float> testAvgLosses; //test avg (over last few epochs) losses of all series in this epoch 
+      vector<float> testLossesL; //lower quantile loss
+      vector<float> testAvgLossesL; //lower quantile loss
+      vector<float> testLossesH; //higher quantile loss
+      vector<float> testAvgLossesH; //higher quantile loss
+      vector<float> trainingLosses; //training losses of all series in one epoch
+      vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+      #if defined USE_ODBC
+      TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+      
+      for (auto iter = oneChunk_vect.begin() ; iter != oneChunk_vect.end(); ++iter) {
+        string series=*iter;
+        auto m4Obj = allSeries_map[series];
+
+        #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+        TRYODBC(hInsertStmt,
+          SQL_HANDLE_STMT,
+          SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+        #endif
+      
+        ComputationGraph cg;
+         for (int il=0; il<dilations.size(); il++) {
+           rNNStack[il].new_graph(cg);
+           rNNStack[il].start_new_sequence(); 
+         }
+          
+        Expression MLPW_ex, MLPB_ex;
+        if (ADD_NL_LAYER) {   
+          MLPW_ex = parameter(cg, MLPW_par);
+          MLPB_ex = parameter(cg, MLPB_par);
+        }
+        Expression adapterW_ex=parameter(cg, adapterW_par);
+        Expression adapterB_ex=parameter(cg, adapterB_par);
+
+        auto additionalParams= additionalParams_map[series];
+        Expression levSm_ex = logistic(parameter(cg, additionalParams.levSm));  //level smoothing
+		    Expression sSm_ex = logistic(parameter(cg, additionalParams.sSm)); //seasonality smoothing
+
+			  vector<Expression> season_exVect;//vector, because we do not know how long the series is
+			  for (int iseas=0; iseas<SEASONALITY; iseas++){
+			    Expression seas=exp(parameter(cg, additionalParams.initSeasonality[iseas]));
+			    //so, when additionalParams_map[series].initSeasonality[iseas]==0 => seas==1
+			    season_exVect.push_back(seas);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+			  }
+			  season_exVect.push_back(season_exVect[0]);
+
+			  vector<Expression> logDiffOfLevels_vect;
+        vector<Expression> levels_exVect;
+			  Expression lev=cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+			  levels_exVect.push_back(lev);
+        for (int i=1; i<m4Obj.vals.size();i++) {  //Exponential Smoothing-style deseasonalization and smoothing
+			    Expression newLevel_ex=m4Obj.vals[i]*cdiv(levSm_ex,season_exVect[i]) + (1-levSm_ex)*levels_exVect[i-1];
+			    levels_exVect.push_back(newLevel_ex);
+			    Expression diff_ex=log(cdiv(newLevel_ex,levels_exVect[i-1]));//penalty for wiggliness of level
+			    logDiffOfLevels_vect.push_back(diff_ex);
+
+			    Expression newSeason_ex=m4Obj.vals[i]*cdiv(sSm_ex,newLevel_ex) + (1-sSm_ex)*season_exVect[i];
+			    season_exVect.push_back(newSeason_ex);
+        }
+         
+        Expression levelVarLoss_ex;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          vector<Expression> levelVarLoss_v;
+          for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+            Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+            levelVarLoss_v.push_back(diff_ex*diff_ex);
+          }
+          levelVarLoss_ex = average(levelVarLoss_v);
+        }
+
+			  //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+			  if (OUTPUT_SIZE_I>SEASONALITY) {
+			    unsigned long startSeasonalityIndx=season_exVect.size()-SEASONALITY;
+			    for (int i=0;i<(OUTPUT_SIZE_I-SEASONALITY);i++)
+			      season_exVect.push_back(season_exVect[startSeasonalityIndx+i]);
+			  }
+        vector<Expression> losses;
+        for (int i=INPUT_SIZE_I-1; i<(m4Obj.n- OUTPUT_SIZE_I); i++) { 
+			    vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE_I;
+			    vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			    vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() +i+1-INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() +i+1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex=input(cg,{INPUT_SIZE},input_vect);
+			    Expression input1_ex=cdiv(input0_ex,inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);
+          joinedInput_ex.emplace_back(noise(squash(input1_ex), NOISE_STD)); //normalization+noise
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+          }  catch (exception& e) {
+            cerr<<"cought exception 2 while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            cerr <<as_vector(input_ex.value())<<endl;
+          }
+          Expression out_ex;
+          if (ADD_NL_LAYER) {
+            out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+            out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+          } else 
+            out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+          //labels
+			    firstE = season_exVect.begin() +i+1;
+			    pastLastE = season_exVect.begin() +i+1+OUTPUT_SIZE_I;
+			    vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			    Expression outputSeasonality_ex=concatenate(outputSeasonality_exVect);
+
+          first = m4Obj.vals.begin() +i+1;
+          pastLast = m4Obj.vals.begin() +i+1+OUTPUT_SIZE_I;
+          vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+          Expression labels0_ex=input(cg,{OUTPUT_SIZE},labels_vect);
+			    Expression labels1_ex=cdiv(labels0_ex,outputSeasonality_ex); //deseasonalization
+          labels1_ex= cdiv(labels1_ex, levels_exVect[i]);//normalization
+			    Expression labels_ex=squash(labels1_ex);
+
+				  Expression loss_ex=MSIS(out_ex, labels_ex);//although out_ex has doubled size, labels_ex have normal size. NB, we do not have duplicated labels during training.
+          //Expression loss_ex=pinBallLoss(out_ex, labels_ex);
+          if (i>=INPUT_SIZE_I+MIN_INP_SEQ_LEN)
+            losses.push_back(loss_ex);  
+        }
+        
+        Expression forecLoss_ex= average(losses);
+			  Expression loss_exp = forecLoss_ex;
+
+        float levVarLoss=0;
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+          levVarLoss = as_scalar(levelVarLossP_ex.value());
+          levVarLosses.push_back(levVarLoss);
+          loss_exp= loss_exp + levelVarLossP_ex;
+        }
+
+        float cStateLoss=0;
+        if (C_STATE_PENALTY>0) {
+          vector<Expression> cStateLosses_vEx;
+          for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+            for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+              auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+              Expression penalty_ex = square(state_ex);
+              cStateLosses_vEx.push_back(sum_elems(penalty_ex));
+            }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        trainingLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+        forecLosses.push_back(forecastLoss);
+
+        cg.backward(loss_exp);
+        try {
+          trainer.update();//update shared weights
+          perSeriesTrainer.update();  //apdate params of this series only
+        } catch (exception& e) {  //long diagnostics for this unlikely event :-)
+          cerr<<"cought exception while doing "<<series<<endl;
+          cerr << e.what() << endl;
+
+            float minSeason = BIG_FLOAT;
+            cout << "season:";
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val = as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+
+            float minLevel = BIG_FLOAT;
+            cout << "levels:";
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax = 0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx = rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state = as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax = it;
+                      layerOfMax = il;
+                      chunkOfMax = irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            cout << " min season=" << minSeason << endl;
+            cout << " min level=" << minLevel << endl;
+            cout << " max abs:" << maxAbs << " at time:" << timeOfMax << " at layer:" << layerOfMax << " and chunk:" << chunkOfMax << endl;
+
+            //diagSeries.insert(series);
+          pc.reset_gradient();
+          perSeriesPC.reset_gradient();
+        }
+
+        //saving per-series values for diagnostics purposes
+        AdditionalParamsF &histAdditionalParams= historyOfAdditionalParams_map[series]->at(iEpoch);
+        histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+        histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+			  for (int isea=0; isea<SEASONALITY; isea++)
+			    histAdditionalParams.initSeasonality[isea]=as_scalar(season_exVect[isea].value());    
+		    if (iEpoch==1 || iEpoch == NUM_OF_TRAIN_EPOCHS /2 || iEpoch == NUM_OF_TRAIN_EPOCHS-1)
+          for (int iv = 0; iv<m4Obj.vals.size(); iv++) {
+            histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+          }
+          
+        //TEST. We walk (without learning) till end of the series. At the last point, the output is taken as the forecast
+        for (int i=(m4Obj.n - OUTPUT_SIZE_I); i<m4Obj.n; i++) {
+          vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1 - INPUT_SIZE_I;
+          vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1; //not including the last one
+          vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+          Expression inputSeasonality_ex = concatenate(inputSeasonality_exVect);
+
+          vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE_I;
+          vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+          vector<float> input_vect(first, pastLast); //[first,pastLast)
+          Expression input0_ex = input(cg, { INPUT_SIZE }, input_vect);
+          Expression input1_ex = cdiv(input0_ex, inputSeasonality_ex); //deseasonalization
+          vector<Expression> joinedInput_ex;
+          input1_ex= cdiv(input1_ex, levels_exVect[i]);//normalization
+          joinedInput_ex.emplace_back(squash(input1_ex));
+          joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+          Expression input_ex = concatenate(joinedInput_ex);
+
+          Expression rnn_ex;
+          try {
+            rnn_ex = rNNStack[0].add_input(input_ex);
+            for (int il=1; il<dilations.size(); il++)
+              rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+          }
+          catch (exception& e) {
+            cerr << "cought exception 2 while doing " << series << endl;
+            cerr << e.what() << endl;
+            cerr << as_vector(input_ex.value()) << endl;
+          }
+          if (i== m4Obj.n-1) {//make forecast
+            firstE = season_exVect.begin() + i + 1;
+            pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE_I;
+            vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+            for (int ios=0; ios<OUTPUT_SIZE; ios++) 
+              outputSeasonality_exVect.push_back(outputSeasonality_exVect[ios]);//we are duplicating it, because we want to convert the net output, which is duplicated,  to the original scale
+            Expression outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+            
+            out_ex = cmult(expand(out_ex), outputSeasonality_ex)*levels_exVect[i];//back to original scale
+            vector<float> out_vect = as_vector(out_ex.value());
+
+            if (LBACK > 0) {
+              float qLoss = errorFunc(out_vect, m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+              testLosses.push_back(qLoss);
+
+              qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUL, 0);
+              testLossesL.push_back(qLoss);
+
+              qLoss = wQuantLoss(out_vect, m4Obj.testVals, TAUH, OUTPUT_SIZE);
+              testLossesH.push_back(qLoss);
+            }
+
+            testResults_map[series][iEpoch%AVERAGING_LEVEL] = out_vect;
+            if (iEpoch >= AVERAGING_LEVEL) {
+              if (USE_MEDIAN) {
+                if (testResults_map[series][AVERAGING_LEVEL].size() == 0)
+                  testResults_map[series][AVERAGING_LEVEL] = out_vect; //just to initialized, to make space. The values will be overwritten
+                for (int iii = 0; iii < OUTPUT_SIZE_I*2; iii++) {
+                  vector<float> temp_vect2;
+                  for (int ii = 0; ii<AVERAGING_LEVEL; ii++)
+                    temp_vect2.push_back(testResults_map[series][ii][iii]);
+                  sort(temp_vect2.begin(), temp_vect2.end());
+                  testResults_map[series][AVERAGING_LEVEL][iii] = temp_vect2[MIDDLE_POS_FOR_AVG];
+                }
+              }
+              else {
+                vector<float> firstForec = testResults_map[series][0];
+                testResults_map[series][AVERAGING_LEVEL] = firstForec;
+                for (int ii = 1; ii<AVERAGING_LEVEL; ii++) {
+                  vector<float> nextForec = testResults_map[series][ii];
+                  for (int iii = 0; iii<OUTPUT_SIZE_I * 2; iii++)
+                    testResults_map[series][AVERAGING_LEVEL][iii] += nextForec[iii];
+                }
+                for (int iii = 0; iii<OUTPUT_SIZE_I * 2; iii++)
+                  testResults_map[series][AVERAGING_LEVEL][iii] /= AVERAGING_LEVEL;
+              }
+
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+                testAvgLosses.push_back(qLoss);
+
+                qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0);
+                testAvgLossesL.push_back(qLoss);
+
+                qLoss = wQuantLoss(testResults_map[series][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE);
+                testAvgLossesH.push_back(qLoss);
+                
+                #if defined USE_ODBC       //save
+                TRYODBC(hInsertStmt,
+                  SQL_HANDLE_STMT,
+                  SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE_I + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&forecastLoss, 0, NULL));
+          
+                for (int iv = 0; iv<2; iv++) {
+                  if (iv == 0)
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun))
+                  else
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun));
+
+                  for (int io = 0; io < OUTPUT_SIZE_I; io++) {
+                    int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*io;
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[io], 0, NULL));
+
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&testResults_map[series][AVERAGING_LEVEL][io + iv*OUTPUT_SIZE_I], 0, NULL));
+                  }
+                  if (MAX_NUM_OF_SERIES<0)
+                    TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLExecute(hInsertStmt));
+                }
+                #endif    
+              } //lback>0
+            } //time to average
+          }//last anchor point of the series
+        }//through TEST loop        
+      }//through series
+
+  
+      if (iEpoch % FREQ_OF_TEST == 0) {
+        float averageTrainingLoss = accumulate(trainingLosses.begin(), trainingLosses.end(), 0.0) / trainingLosses.size();
+
+        cout << ibig << " " << iEpoch << " loss:" << averageTrainingLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forecast loss:" << averageForecLoss*100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+
+        float averageTestLoss=0;
+        if (LBACK > 0) {
+          float averageTestLoss = accumulate(testLosses.begin(), testLosses.end(), 0.0) / testLosses.size();
+          float averageTestLossL = accumulate(testLossesL.begin(), testLossesL.end(), 0.0) / testLossesL.size();
+          float averageTestLossH = accumulate(testLossesH.begin(), testLossesH.end(), 0.0) / testLossesH.size();
+          cout<<" Test loss:" << averageTestLoss<<" L:"<< averageTestLossL<<" H:"<< averageTestLossH;
+          if (iEpoch >= AVERAGING_LEVEL) {
+            float averageTestAvgLoss = accumulate(testAvgLosses.begin(), testAvgLosses.end(), 0.0) / testAvgLosses.size();//of this epoch
+            float averageTestAvgLossL = accumulate(testAvgLossesL.begin(), testAvgLossesL.end(), 0.0) / testAvgLossesL.size();//of this epoch
+            float averageTestAvgLossH = accumulate(testAvgLossesH.begin(), testAvgLossesH.end(), 0.0) / testAvgLossesH.size();//of this epoch
+            cout << " avgLoss:" << averageTestAvgLoss<<" L:"<< averageTestAvgLossL<<" H:"<< averageTestAvgLossH<<endl;
+          }
+          if (USE_AUTO_LEARNING_RATE)
+            perfValid_vect.push_back(averageTestLoss);
+        }
+        cout << endl;
+      }
+      
+      if (USE_AUTO_LEARNING_RATE) {
+        bool changeL2Rate = false;
+        if (iEpoch >= 2) {
+          if (iEpoch < L3_PERIOD)
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - 2]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+          else
+            changeL2Rate = perfValid_vect[perfValid_vect.size() - L3_PERIOD - 1]<LR_TOLERANCE_MULTIP*perfValid_vect[perfValid_vect.size() - 1];
+        }
+
+        if (changeL2Rate && learning_rate > MIN_LEARNING_RATE && (iEpoch - epochOfLastChangeOfLRate) >= MIN_EPOCHS_BEFORE_CHANGING_LRATE) {
+          learning_rate /= LR_RATIO;
+          cout << "decreasing LR to:" << learning_rate << endl;
+          epochOfLastChangeOfLRate = iEpoch;
+          trainer.learning_rate = learning_rate;
+        }
+      }
+      #if defined USE_ODBC 
+      TRYODBC(hDbc,
+        SQL_HANDLE_DBC,
+        SQLEndTran(
+          SQL_HANDLE_DBC,
+          hDbc,
+          SQL_COMMIT));
+      #endif    
+    }//through epochs
+
+    if (PRINT_DIAGN) {//some diagnostic info
+      set<string> diagSeries;
+      for (int i = 0; i<1; i++) {//add a few normal ones
+        int irand = uniOnSeries(rng);
+        diagSeries.insert(oneChunk_vect[irand]);
+      }
+      for (auto series : diagSeries) {
+        cout << endl << series << endl;
+        array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>* historyOfAdditionalParams_ptrToArr = historyOfAdditionalParams_map[series];
+        cout << "lSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levSm << " ";
+        cout << endl;
+        cout << "sSm:" << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+          cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).sSm << " ";
+        cout << endl;
+        cout << "seasons:" << endl;
+        for (int isea = 0; isea<SEASONALITY; isea++) {
+          for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).initSeasonality[isea] << " ";
+          cout << endl;
+        }
+        cout << endl;
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size()>0) {
+            cout << "levels:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).levels.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).levels[iv] << ", ";
+            cout << endl;
+            cout << "seas:" << iEpoch << " ";
+            for (int iv = 0; iv<historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons.size(); iv++)
+              cout << historyOfAdditionalParams_ptrToArr->at(iEpoch).seasons[iv] << ", ";
+            cout << endl;
+          }
+        }
+      }
+    }
+
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPathL);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE_I; io++)
+        outputFile << ", "<< testResults_map[series][AVERAGING_LEVEL][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+
+    outputFile.open(outputPathH);
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE_I; io++)
+        outputFile << ", "<< testResults_map[series][AVERAGING_LEVEL][io+OUTPUT_SIZE_I];
+      outputFile<<endl;
+    }
+    outputFile.close();
+
+    //delete
+    for (auto iter = oneChunk_vect.begin(); iter != oneChunk_vect.end(); ++iter) {
+      string series = *iter;
+      auto addHistArr_ptr= historyOfAdditionalParams_map[series];
+      delete addHistArr_ptr;
+    }
+  }//ibig
+}//main
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+		}
+	  }
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+		}
+	  }
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj b/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj
new file mode 100644
index 0000000..6a749a5
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj	
@@ -0,0 +1,227 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|Win32">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|x64">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\M41\slstm.cpp" />
+    <ClCompile Include="ES_RNN_PI.cc" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\M41\slstm.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{A16B5466-E680-43F6-A884-A4A01EB78E50}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>M42</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj.filters b/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj.filters
new file mode 100644
index 0000000..b8ac1c3
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M42/M42.vcxproj.filters	
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="ES_RNN_PI.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\M41\slstm.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\M41\slstm.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M42/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M42/slstm.h
new file mode 100644
index 0000000..adb63a7
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M42/slstm.h	
@@ -0,0 +1,394 @@
+/**
+* file slstm.h
+* header for my implementation of dilated LSTMs, based on Dynet LSTM builders
+  - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
+  - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
+  - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
+*
+Slawek Smyl, Mar-May 2018
+*/
+
+#ifndef DYNET_SLSTMS_H_
+#define DYNET_SLSTMS_H_
+
+#include "dynet/dynet.h"
+#include "dynet/rnn.h"
+#include "dynet/expr.h"
+
+using namespace std;
+
+namespace dynet {
+
+  //basd on VanillaLSTMBuilder
+  struct ResidualDilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    ResidualDilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the ResidualDilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    * \param ln_lstm Whether to use layer normalization
+    * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
+    */
+    explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model,
+      bool ln_lstm = false,
+      float forget_bias = 1.f);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+    /**
+    * \brief Get parameters in ResidualDilatedLSTMBuilder
+    * \return list of points to ParameterStorage objects
+    */
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> ln_params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> ln_param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    bool ln_lstm;
+    float forget_bias;
+    bool dropout_masks_valid;
+    vector<unsigned> dilations; //one int per layer
+
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+
+
+  struct DilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    DilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the DilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    */
+    explicit DilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> dilations; //one int per layer
+
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+  
+  
+  struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
+    /**
+     * @brief Default Constructor
+     */
+    AttentiveDilatedLSTMBuilder();
+    /**
+     * \brief Constructor for the AttentiveDilatedLSTMBuilder
+     *
+     * \param max_dilations Vector, maximum dilations (per layer)
+     * \param input_dim Dimention of the input \f$x_t\f$
+     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+     * \param model ParameterCollection holding the parameters
+     */
+    explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
+                                unsigned input_dim,
+                                unsigned hidden_dim,
+                                unsigned attention_dim,
+                                ParameterCollection& model);
+    
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+    
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+    
+    void copy(const RNNBuilder & params) override;
+    
+    /**
+     * \brief Set the dropout rates to a unique value
+     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+     * \param d Dropout rate to be applied on all of \f$x,h\f$
+     */
+    void set_dropout(float d);
+    /**
+     * \brief Set the dropout rates
+     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+     * The dynamics of the cell are then modified to :
+     *
+     * \f$
+     * \begin{split}
+     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+     h_t & = \tanh(c_t)\circ o_t\\
+     \end{split}
+     * \f$
+     *
+     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+     */
+    void set_dropout(float d, float d_r);
+    /**
+     * \brief Set all dropout rates to 0
+     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+     *
+     */
+    void disable_dropout();
+    /**
+     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+     * \details If this function is not called on batched input, the same mask will be applied across
+     * all batch elements. Use this to apply different masks to each batch element
+     *
+     * \param batch_size Batch size
+     */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+    
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+    
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+    
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    unsigned attention_dim;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> max_dilations; //one int per layer
+    
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+    
+  };
+} // namespace dynet
+
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M43/ES_RNN_E.cc b/118 - slaweks17/c++/windows_VisualStudio/M43/ES_RNN_E.cc
new file mode 100644
index 0000000..aaf4659
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M43/ES_RNN_E.cc	
@@ -0,0 +1,1665 @@
+/*ES-RNN-E: Exponential Smoothing Recurrent Neural Network hybrid, Ensemble of specialists. Point forecast.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention. Non-seasonal, single, or double seasonal.
+It is meant to be used for all types of series from M4 competition, except Monthly and Quarterly (for performance reasons - it is slower).
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+In contradistinction to ES-RNN, each executable uses all series, but in a similar manner repeating the whole learning process BIG_LOOP times (by default 3).
+Invocation should pass BIG_LOOP offset
+so e.g. create a script with following lines on Windows
+start <this_executable> 0
+start <this_executable> 10
+start <this_executable> 20
+start <this_executable> 30
+on 4-core computer.
+In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble.
+Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1.
+(Creating final forecasts is done in a supplied R script)
+
+There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive.
+These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active.
+*/
+
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+//#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+string VARIABLE = "Hourly";
+const string run = "50/49 Att 4/5 1,4)(24,168) LR=0.01,{7,5e-3f},{18,1e-3f},{22,3e-4f} EPOCHS=27, LVP=10, CSP=1";
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 24;
+const int SEASONALITY2 = 168;
+vector<vector<unsigned>> dilations = { { 1,4 },{ 24, 168 } };
+
+const float INITIAL_LEARNING_RATE = 0.01f;
+const map<int, float> LEARNING_RATES = { { 7,5e-3f },{ 18,1e-3f },{ 22,3e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 27;
+
+float LEVEL_VARIABILITY_PENALTY = 10;  //Multiplier for L" penalty against wigglines of level vector.
+const float C_STATE_PENALTY = 1;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 24;
+const unsigned int OUTPUT_SIZE = 48;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH;  //==all
+const int TOPN = 4;
+
+
+/*
+string VARIABLE = "Weekly";
+const string run = "50/47 Att 3/5 (1,52) LR=1e-3  {11,3e-4f}, {17,1e-4f} EPOCHS=23, LVP=100 6y";
+
+const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const int TRAINING_PERCENTILE = 47;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 52;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1, 52 } };
+
+const float INITIAL_LEARNING_RATE = 1e-3;
+const map<int, float> LEARNING_RATES = { { 11,3e-4f },{ 17,1e-4f } }; //at which epoch we manually set them up to what
+const int NUM_OF_TRAIN_EPOCHS = 23;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+const float PER_SERIES_LR_MULTIP = 1;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 10;
+const unsigned int OUTPUT_SIZE = 13;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#81     380     935    1023    1604    2598
+const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH;  //==all
+const int TOPN = 3;
+*/
+
+/*
+string VARIABLE = "Daily";
+const string run = "Final 50/49 730 4/5 (1,3)(7,14) LR=3e-4 {9,1e-4f} EPOCHS=13, LVP=100 13w";
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const int TRAINING_PERCENTILE = 49;  //the program has a tendency for positive bias. So, we can reduce it by running smaller TRAINING_PERCENTILE
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 7;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,3 },{ 7, 14 } };
+
+const float INITIAL_LEARNING_RATE = 3e-4;
+const map<int, float> LEARNING_RATES = { { 9,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 13;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 7;
+const unsigned int OUTPUT_SIZE = 14;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //##93     323    2940    2357    4197    9919 
+const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+/*
+string VARIABLE = "Yearly";
+const string run = "50 Att 4/5 (1,6) LR=1e-4  EPOCHS=12, 60*";
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const float PERCENTILE = 50; //we always use Pinball loss. When forecasting point value, we actually forecast median, so PERCENTILE=50
+const float TRAINING_PERCENTILE = 50;  
+
+const int SEASONALITY_NUM = 0; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 0;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,6 } };
+
+const float INITIAL_LEARNING_RATE = 1e-4;
+const map<int, float> LEARNING_RATES = { { 15,1e-5 } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 12;
+
+float LEVEL_VARIABILITY_PENALTY = 0;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 30;
+
+const unsigned int INPUT_SIZE = 4;
+const unsigned int OUTPUT_SIZE = 6;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#13.00   20.00   29.00   31.32   40.00  835.00
+const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+//end of VARIABLE-specific params
+
+const int BIG_LOOP = 3;
+const int NUM_OF_NETS = 5;
+const unsigned int ATTENTION_HSIZE = STATE_HSIZE;
+
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 20;
+#else
+  const int MAX_NUM_OF_SERIES = -1;
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;
+const int AVERAGING_LEVEL = 5;
+const float EPS=1e-6;
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=50;
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN = false;
+const float TAU = PERCENTILE / 100.;
+const float TRAINING_TAU = TRAINING_PERCENTILE / 100.; 
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+float squash(float x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+float expand(float x) {
+  return exp(x);
+}
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+    if (LBACK > 0) { //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) {//chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+#if defined USE_ODBC        
+void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+  SQLSMALLINT    hType,
+  RETCODE        RetCode);
+#endif 
+
+
+
+struct AdditionalParams {//Per series, important
+    Parameter levSm;
+    Parameter sSm;
+    array<Parameter, SEASONALITY> initSeasonality;
+    Parameter sSm2;
+    array<Parameter, SEASONALITY2> initSeasonality2;
+};
+struct AdditionalParamsF {//Used for storing diagnostics
+    float levSm;
+    float sSm;
+    array<float, SEASONALITY> initSeasonality;
+    float sSm2;
+    array<float, SEASONALITY2> initSeasonality2;
+    vector<float> levels;
+    vector<float> seasons;
+    vector<float> seasons2;
+};
+  
+
+array<int, NUM_OF_NETS> perfToRanking (array<float, NUM_OF_NETS> perf_arr) {
+  array<int, NUM_OF_NETS> index;
+  
+  for (int itop=0; itop<TOPN; itop++) {
+    float currMin=BIG_FLOAT; int indexOfMin=-1;
+    for (int i=0; i<NUM_OF_NETS; i++) {
+      if (perf_arr[i]<currMin) {
+        currMin=perf_arr[i];
+        indexOfMin=i;
+      }
+    }
+    index[itop]=indexOfMin;
+    perf_arr[indexOfMin]=BIG_FLOAT;
+  }
+  return index;
+}
+
+
+Expression pinBallLoss(const Expression& out_ex, const Expression& actuals_ex) {//used by Dynet
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = pick(out_ex, indx);
+    auto actual = pick(actuals_ex, indx);
+    if (as_scalar(actual.value()) > as_scalar(forec.value()))
+      losses.push_back((actual - forec)*TRAINING_TAU);
+    else
+      losses.push_back((actual - forec)*(TRAINING_TAU - 1));
+  }
+  return sum(losses) / OUTPUT_SIZE * 2;
+}
+
+
+// weighted quantile Loss, used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0; float suma=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    suma+= abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*TAU;
+    else
+      sumf = sumf + (actual - forec)*(TAU - 1);
+  }
+  return sumf / suma * 200;
+}
+
+//used just for diagnostics, if LBACK>0 and PERCENTILE==50
+float sMAPE(vector<float>& out_vect, vector<float>& actuals_vect) {
+  float sumf = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx];
+    auto actual = actuals_vect[indx];
+    sumf+=abs(forec-actual)/(abs(forec)+abs(actual));
+  }
+  return sumf / OUTPUT_SIZE * 200;
+}
+
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect) {
+  if (PERCENTILE==50)
+    return sMAPE(out_vect, actuals_vect);
+  else
+    return wQuantLoss(out_vect, actuals_vect);
+}
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int ibigOffset = 0;
+  if (argc == 2)
+    ibigOffset = atoi(argv[1]);
+    
+  cout << VARIABLE<<" "<<run << " Lback=" << LBACK << endl;
+  cout << "ibigOffset:"<< ibigOffset<<endl;
+
+  if (SEASONALITY_NUM <= 0 && LEVEL_VARIABILITY_PENALTY > 0) {
+    cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<<endl;
+    LEVEL_VARIABILITY_PENALTY=0;
+  }
+  
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;//so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)run.c_str(), 0, &nullTerminatedStringOfRun));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+   
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister in this case)
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 24k for yearly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+  cout << "num of series:" << series_vect.size() << endl;
+
+  unsigned int series_len=(unsigned int)series_vect.size();
+  uniform_int_distribution<int> uniOnSeries(0,series_len-1);  // closed interval [a, b]
+  uniform_int_distribution<int> uniOnNets(0,NUM_OF_NETS-1);  // closed interval [a, b]
+  
+  unordered_map<string, array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc...
+  unordered_map<string, vector<float>> finalResults_map((int)series_len*1.5);//per series
+  set<string> diagSeries;
+  
+  unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map;
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) {
+  	int ibigDb= ibigOffset+ibig;
+    string outputPath = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+    
+#if defined USE_ODBC        
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif 
+  
+    //create nets
+    array<ParameterCollection, NUM_OF_NETS> paramsCollection_arr;//per net
+    array<ParameterCollection, NUM_OF_NETS> perSeriesParamsCollection_arr;//per net
+    array<AdamTrainer*, NUM_OF_NETS> trainers_arr;
+    array<AdamTrainer*, NUM_OF_NETS> perSeriesTrainers_arr;
+    
+
+    #if defined USE_RESIDUAL_LSTM
+      array<vector<ResidualDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #elif defined USE_ATTENTIVE_LSTM
+      array<vector<AttentiveDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #else
+      array<vector<DilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #endif
+
+    array<Parameter, NUM_OF_NETS> MLPW_parArr;
+    array<Parameter, NUM_OF_NETS> MLPB_parArr;
+    array<Parameter, NUM_OF_NETS> adapterW_parArr;
+    array<Parameter, NUM_OF_NETS> adapterB_parArr;
+    
+    //this is not a history, this is the real stuff
+    unordered_map<string, array<AdditionalParams, NUM_OF_NETS>* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      additionalParams_mapOfArr[series]=new array<AdditionalParams, NUM_OF_NETS>();
+    }
+    
+    for (int inet=0; inet<NUM_OF_NETS; inet++) {
+      ParameterCollection& pc=paramsCollection_arr[inet];
+      ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+      
+      trainers_arr[inet]=new AdamTrainer (pc, INITIAL_LEARNING_RATE, 0.9, 0.999, EPS);
+      trainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+      perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+      perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+            
+    auto& rNNStack=rnnStack_arr[inet];
+    #if defined USE_RESIDUAL_LSTM
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+      if (ADD_NL_LAYER) { 
+        MLPW_parArr[inet] = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+        MLPB_parArr[inet] = pc.add_parameters({ STATE_HSIZE });
+      }
+  	  adapterW_parArr[inet]=pc.add_parameters({OUTPUT_SIZE, STATE_HSIZE});
+  	  adapterB_parArr[inet]=pc.add_parameters({OUTPUT_SIZE});
+      
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        array<AdditionalParams, NUM_OF_NETS>*  additionalParams_arr=additionalParams_mapOfArr[series];
+        additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net
+        if (SEASONALITY_NUM > 0) {
+          additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY; isea++)
+            additionalParams_arr->at(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+        if (SEASONALITY_NUM > 1) {
+          additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY2; isea++)
+            additionalParams_arr->at(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+      }
+    }//seting up, through nets
+    
+    //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS]
+    unordered_map<string, array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5);
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      historyOfAdditionalParams_map[series]=new array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>();
+    }
+    
+    //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted
+    array<vector<string>, NUM_OF_NETS> seriesAssignment;//every net has an array
+    for (int j=0; j<NUM_OF_NETS/2; j++)
+      for (int i=0; i<series_len; i++) {
+        int inet=uniOnNets(rng);
+        seriesAssignment[inet].push_back(series_vect[i]);
+      }
+    
+    //nesting: ibig
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+    
+      clock_t begin_time = clock();
+      unordered_map<string, array<float, NUM_OF_NETS>> netPerf_map;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {  //Parellalize here, if you can :-)
+        //initialize perf matrix
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          netPerf_map[series][inet]=BIG_FLOAT;
+        }
+        
+        ParameterCollection& pc=paramsCollection_arr[inet];
+        auto& trainer=trainers_arr[inet];    
+        ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+        auto& perSeriesTrainer=perSeriesTrainers_arr[inet];
+        
+      	if (LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        		trainer->learning_rate = LEARNING_RATES.at(iEpoch);
+        		if (inet==0)
+        		  cout << "changing LR to:" << trainer->learning_rate << endl;
+        		perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+      	}
+
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+        
+        vector<string> oneNetAssignments=seriesAssignment[inet];
+        random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end());
+        
+        vector<float> epochLosses;
+        vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+        for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+        
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+
+					Expression MLPW_ex,MLPB_ex;
+          if (ADD_NL_LAYER)  {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+		      vector<Expression> logDiffOfLevels_vect;
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//Exponential Smoothing-style deseasonalization and smoothing
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1]));//penalty for wiggliness of level
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1]));
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+		     
+          Expression levelVarLoss_ex;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            vector<Expression> levelVarLoss_v;
+            for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+              Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+              levelVarLoss_v.push_back(diff_ex*diff_ex);
+            }
+            levelVarLoss_ex = average(levelVarLoss_v);
+          }
+			   
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { 
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+
+            first = m4Obj.vals.begin() + i + 1;
+            pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+            vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+            Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+              firstE = season_exVect.begin() + i + 1;
+              pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+              outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+
+              firstE = season2_exVect.begin() + i + 1;
+              pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+            if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+                losses.push_back(loss_ex); 
+          }//through points of a series
+
+          Expression forecLoss_ex= average(losses);
+			    Expression loss_exp = forecLoss_ex;
+			    
+          float levVarLoss=0;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+            levVarLoss = as_scalar(levelVarLossP_ex.value());
+            levVarLosses.push_back(levVarLoss);
+            loss_exp= loss_exp + levelVarLossP_ex;
+          }
+
+          float cStateLoss=0;
+          if (C_STATE_PENALTY>0) {
+            vector<Expression> cStateLosses_vEx;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+              for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+                auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+                Expression penalty_ex = square(state_ex);
+                cStateLosses_vEx.push_back(mean_elems(penalty_ex));
+              }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        epochLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+          forecLosses.push_back(forecastLoss);
+        
+          cg.backward(loss_exp);
+          try {
+          trainer->update();//update shared weights
+          perSeriesTrainer->update();  //update params of this series only
+        } catch (exception& e) {  //long diagnostics for this unlikely event :-)
+            cerr<<"cought exception while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            
+            float minSeason=BIG_FLOAT;
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val= as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason=val;
+            }  
+            cout << "min season:"<<minSeason<<endl;
+
+            minSeason = BIG_FLOAT;
+            for (int isea = 0; isea < season2_exVect.size(); isea++) {
+              float val = as_scalar(season2_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+            cout << "min season2:"<<minSeason<<endl;
+
+            float minLevel = BIG_FLOAT;
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+            cout << "min level:"<<minLevel<<endl;
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax=0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx= rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state=as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax=it;
+                      layerOfMax=il;
+                      chunkOfMax= irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 0) 
+              cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 1) 
+              cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl;
+            cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax<<endl;
+
+            //diagSeries.insert(series);
+            pc.reset_gradient();
+            perSeriesPC.reset_gradient();
+          }
+
+          //diagnostics saving
+          AdditionalParamsF histAdditionalParams;
+          histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+          if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+            for (int iv = 0; iv<levels_exVect.size(); iv++) {
+              histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            }
+          }
+
+          if (SEASONALITY_NUM > 0) {
+            histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+            for (int isea = 0; isea<SEASONALITY; isea++)
+              histAdditionalParams.initSeasonality[isea] = as_scalar(season_exVect[isea].value());
+
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season_exVect.size(); iv++) {
+                histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+              }
+            }
+          }
+         
+          if (SEASONALITY_NUM > 1) {
+            histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value());
+		        for (int isea=0; isea<SEASONALITY2; isea++) 
+			        histAdditionalParams.initSeasonality2[isea]=as_scalar(season2_exVect[isea].value());   
+               
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season2_exVect.size(); iv++) {
+                histAdditionalParams.seasons2.push_back(as_scalar(season2_exVect[iv].value()));
+              }
+            }
+          }     
+
+          historyOfAdditionalParams_arr[iEpoch]=histAdditionalParams;
+        }//through series
+
+        float averageLoss = accumulate( epochLosses.begin(), epochLosses.end(), 0.0)/epochLosses.size();
+        cout << ibig << " " << iEpoch << " " << inet << " count:" << oneNetAssignments.size() << " loss:" << averageLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forec loss:" << averageForecLoss * 100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+        cout<<endl;
+      }//through nets. This should be done in parallel. One day it will, when Dynet allows it.
+      cout << (clock() - begin_time) / CLOCKS_PER_SEC<<"s"<<endl;
+
+
+      //Validation. We just save outputs of all nets on all series
+      //We can't attach validation to training, because training happens across subset of series*nets, and we need to store results from all of these combinations, for future use
+      //level: epoch, but we do not use the epoch value, we overwrite
+      begin_time = clock();
+      for (int inet=0; inet<NUM_OF_NETS; inet++) { //through _all_ nets. Paralellize here.
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {//through _all_ series.
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          Expression MLPW_ex, MLPB_ex;
+          if (ADD_NL_LAYER) {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+          
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//if lback>0 then this is shortened, so it always contains data awe have right to access
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+
+
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          Expression out_ex;//we declare it here, bcause the last one will be the forecast
+          for (int i=INPUT_SIZE-1; i<m4Obj.n; i++) {
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            if (i<(m4Obj.n- OUTPUT_SIZE)) {//calc perf on training area
+              vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1;
+              vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+              vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+              Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+              if (SEASONALITY_NUM > 0) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+              }
+              if (SEASONALITY_NUM > 1) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+              }
+              Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+          	  Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+          	  if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+          			  losses.push_back(loss_ex);  //training area losses
+            }
+            
+            if (i==(m4Obj.n-1)) {//validation loss
+            	out_ex=expand(out_ex)*levels_exVect[i];//back to original scale
+							if (SEASONALITY_NUM > 0 ) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                out_ex = cmult(out_ex, outputSeasonality_ex);//reseasonalize
+              }
+            	if (SEASONALITY_NUM > 1 ) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+            		out_ex = cmult(out_ex, outputSeasonality2_ex);//reseasonalize
+              }
+                //we do not need the matching label here, because we do not bother calculate valid losses of each net across all series.
+                //We care about best and topn performance
+            }
+          }//end of going through all point of a series
+          
+          Expression loss_exp = average(losses);
+          float loss = as_scalar(cg.forward(loss_exp));//training loss of a single series
+          netPerf_map[series][inet]=loss;
+          
+          //unordered_map<string, array<array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc...
+          //No epoch here, because this will just reflect the current (latest) situation - the last few epochs
+          vector<float> out_vect=as_vector(out_ex.value());
+          testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect;
+          if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) {
+            vector<float> firstForec=testResults_map[series][inet][0];
+            testResults_map[series][inet][AVERAGING_LEVEL]=firstForec;
+            for (int ii=1; ii<AVERAGING_LEVEL; ii++) {
+              vector<float> nextForec=testResults_map[series][inet][ii];
+              for (int iii=0; iii<OUTPUT_SIZE; iii++)
+                testResults_map[series][inet][AVERAGING_LEVEL][iii]+=nextForec[iii];
+            }
+            for (int iii=0; iii<OUTPUT_SIZE; iii++)
+              testResults_map[series][inet][AVERAGING_LEVEL][iii]/=AVERAGING_LEVEL;
+          } //time to average
+        }//through series
+      } //through nets
+      cout << (clock() - begin_time) / CLOCKS_PER_SEC << "s" << endl;
+      
+      if (iEpoch>0 && iEpoch % FREQ_OF_TEST==0) {
+        //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch.
+        vector<float> bestEpochLosses;
+        vector<float> bestEpochAvgLosses;
+        vector<float> topnEpochLosses;
+        vector<float> topnEpochAvgLosses;
+        
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+#if defined USE_ODBC        
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+#endif 
+          
+          float avgLoss;
+          vector<float> avgLatest;
+          vector<float> avgAvg;
+          
+          for (int itop=0; itop<TOPN; itop++) {
+            int inet=netRanking_map[series][itop];
+            
+            if (itop==0) {
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals);
+                bestEpochLosses.push_back(qLoss);
+              }
+              avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL];  //used later for calculating topn loss
+              
+              if (iEpoch>=AVERAGING_LEVEL) {
+                if (LBACK > 0) {
+                  float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals);
+                  bestEpochAvgLosses.push_back(qLoss);
+                }
+                avgAvg=testResults_map[series][inet][AVERAGING_LEVEL];
+              }
+            } else {
+              for (int iii=0; iii<OUTPUT_SIZE; iii++) {
+                avgLatest[iii]+=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL][iii];//calculate current topn
+                if (iEpoch>=AVERAGING_LEVEL)
+                  avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii];
+              }
+            }
+          }//through topn
+          
+          for (int iii=0; iii<OUTPUT_SIZE; iii++)
+	          avgLatest[iii]/=TOPN;
+          if (LBACK > 0) {
+            float qLoss = errorFunc(avgLatest, m4Obj.testVals);
+            topnEpochLosses.push_back(qLoss);
+          }
+          
+          if (iEpoch>=AVERAGING_LEVEL) {
+            for (int iii = 0; iii<OUTPUT_SIZE; iii++) 
+              avgAvg[iii] /= TOPN;
+            finalResults_map[series] = avgAvg;
+
+            if (LBACK > 0) {
+#if defined USE_ODBC        
+              TRYODBC(hInsertStmt,
+                SQL_HANDLE_STMT,
+                SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL));
+       
+              for (int iii=0; iii<OUTPUT_SIZE; iii++) {              
+               int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*iii;
+               TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[iii], 0, NULL));
+
+               TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgAvg[iii], 0, NULL));
+              }
+              TRYODBC(hInsertStmt,
+               SQL_HANDLE_STMT,
+               SQLExecute(hInsertStmt));
+#endif 
+              float qLoss = errorFunc(avgAvg, m4Obj.testVals);
+              topnEpochAvgLosses.push_back(qLoss);
+            }
+          }
+        }//through series
+        if (LBACK > 0) {
+          float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size();
+          float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size();
+          cout<<ibig<<" "<<iEpoch<<" VALID best:"<<bestEpochLoss<<" topn:"<<topnEpochLoss;
+          if (iEpoch>=AVERAGING_LEVEL) {
+            float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size();
+            float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size();
+            cout<<" bestAvg:"<<bestEpochAvgLoss<<" topnAvg:"<<topnEpochAvgLoss<<endl;
+          } else
+            cout<<endl;
+        }
+      }//time to report
+      
+      //assign
+      for (int inet=0; inet<NUM_OF_NETS; inet++)
+        seriesAssignment[inet].clear();
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        //unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map
+        netRanking_map[series]=perfToRanking(netPerf_map[series]);
+        
+        for (int itop=0; itop<TOPN; itop++) {
+          int inet=netRanking_map[series][itop];
+          seriesAssignment[inet].push_back(series); //every net has a set
+        }
+      }
+      
+      //check and fix degenerations
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        if (seriesAssignment[inet].size()==0) {
+          cout<<"Resetting "<<inet<<endl;
+          for (int i=0; i<series_len/2; i++) {
+            int irand=uniOnSeries(rng);
+            seriesAssignment[inet].push_back(series_vect[irand]);
+          }
+        }
+      }
+#if defined USE_ODBC  
+      TRYODBC(hDbc,
+      SQL_HANDLE_DBC,
+      SQLEndTran(
+        SQL_HANDLE_DBC,
+        hDbc,
+        SQL_COMMIT));
+#endif
+    }//through epochs of RNN
+    
+    //some diagnostic info
+    set<string> diagSeries;
+    for (int i=0; i<1; i++) {//add a few normal ones
+      int irand=uniOnSeries(rng);
+      diagSeries.insert(series_vect[irand]);
+    }
+    for(auto series : diagSeries) {
+      cout<<endl<<series<<endl;
+      
+      cout<<"lSm:"<<endl;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+    	auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].levSm<<" ";
+        cout<<endl;
+      }
+      
+      if (SEASONALITY_NUM > 0 ) {
+        cout<<"sSm:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	    auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm<<" ";
+          cout<<endl;
+        }
+      }  
+      
+      if (SEASONALITY_NUM > 1 ) {
+        cout<<"sSm2:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	  auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm2<<" ";
+        cout<<endl;
+        }
+      }
+      
+      for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+        auto& historyOfAdditionalParams_arr = historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_arr[iEpoch].levels.size()>0) {
+            cout << "levels:" << iEpoch<<" ";
+            for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+              cout << historyOfAdditionalParams_arr[iEpoch].levels[iv] << ", ";
+            cout << endl;
+            if (SEASONALITY_NUM > 0 ) {
+              cout << "seasons:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons[iv] << ", ";
+              cout << endl;
+            }
+            if (SEASONALITY_NUM > 1 ) {
+              cout << "seasons2:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons2[iv] << ", ";
+              cout << endl;
+            }
+          }
+        }
+      }
+    }//end of diag printing
+    
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPath);
+    for (auto iter = series_vect.begin(); iter != series_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE; io++)
+        outputFile << ", " << finalResults_map[series][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+    
+    
+    //delete    
+    for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+      delete trainers_arr[inet];
+      perSeriesTrainers_arr[inet];
+    }
+
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      delete additionalParams_mapOfArr[series];
+      delete historyOfAdditionalParams_map[series];
+    }
+    additionalParams_mapOfArr.clear();
+    historyOfAdditionalParams_map.clear();
+  }//big loop
+}//main
+
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M43/M43.filters b/118 - slaweks17/c++/windows_VisualStudio/M43/M43.filters
new file mode 100644
index 0000000..a5c1624
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M43/M43.filters	
@@ -0,0 +1,25 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <Text Include="ReadMe.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="yearly30.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M43/M43.vcxproj b/118 - slaweks17/c++/windows_VisualStudio/M43/M43.vcxproj
new file mode 100644
index 0000000..5da8187
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M43/M43.vcxproj	
@@ -0,0 +1,227 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|Win32">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|x64">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\M41\slstm.cpp" />
+    <ClCompile Include="ES_RNN_E.cc" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\M41\slstm.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{BE951571-3F3A-4048-BAA3-0C05F38CFF42}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>M43</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M43/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M43/slstm.h
new file mode 100644
index 0000000..adb63a7
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M43/slstm.h	
@@ -0,0 +1,394 @@
+/**
+* file slstm.h
+* header for my implementation of dilated LSTMs, based on Dynet LSTM builders
+  - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
+  - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
+  - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
+*
+Slawek Smyl, Mar-May 2018
+*/
+
+#ifndef DYNET_SLSTMS_H_
+#define DYNET_SLSTMS_H_
+
+#include "dynet/dynet.h"
+#include "dynet/rnn.h"
+#include "dynet/expr.h"
+
+using namespace std;
+
+namespace dynet {
+
+  //basd on VanillaLSTMBuilder
+  struct ResidualDilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    ResidualDilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the ResidualDilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    * \param ln_lstm Whether to use layer normalization
+    * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
+    */
+    explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model,
+      bool ln_lstm = false,
+      float forget_bias = 1.f);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+    /**
+    * \brief Get parameters in ResidualDilatedLSTMBuilder
+    * \return list of points to ParameterStorage objects
+    */
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> ln_params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> ln_param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    bool ln_lstm;
+    float forget_bias;
+    bool dropout_masks_valid;
+    vector<unsigned> dilations; //one int per layer
+
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+
+
+  struct DilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    DilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the DilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    */
+    explicit DilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> dilations; //one int per layer
+
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+  
+  
+  struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
+    /**
+     * @brief Default Constructor
+     */
+    AttentiveDilatedLSTMBuilder();
+    /**
+     * \brief Constructor for the AttentiveDilatedLSTMBuilder
+     *
+     * \param max_dilations Vector, maximum dilations (per layer)
+     * \param input_dim Dimention of the input \f$x_t\f$
+     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+     * \param model ParameterCollection holding the parameters
+     */
+    explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
+                                unsigned input_dim,
+                                unsigned hidden_dim,
+                                unsigned attention_dim,
+                                ParameterCollection& model);
+    
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+    
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+    
+    void copy(const RNNBuilder & params) override;
+    
+    /**
+     * \brief Set the dropout rates to a unique value
+     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+     * \param d Dropout rate to be applied on all of \f$x,h\f$
+     */
+    void set_dropout(float d);
+    /**
+     * \brief Set the dropout rates
+     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+     * The dynamics of the cell are then modified to :
+     *
+     * \f$
+     * \begin{split}
+     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+     h_t & = \tanh(c_t)\circ o_t\\
+     \end{split}
+     * \f$
+     *
+     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+     */
+    void set_dropout(float d, float d_r);
+    /**
+     * \brief Set all dropout rates to 0
+     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+     *
+     */
+    void disable_dropout();
+    /**
+     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+     * \details If this function is not called on batched input, the same mask will be applied across
+     * all batch elements. Use this to apply different masks to each batch element
+     *
+     * \param batch_size Batch size
+     */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+    
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+    
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+    
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    unsigned attention_dim;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> max_dilations; //one int per layer
+    
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+    
+  };
+} // namespace dynet
+
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M44/ES_RNN_E_PI.cc b/118 - slaweks17/c++/windows_VisualStudio/M44/ES_RNN_E_PI.cc
new file mode 100644
index 0000000..e9729d5
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M44/ES_RNN_E_PI.cc	
@@ -0,0 +1,1744 @@
+/*ES-RNN-E: Exponential Smoothing Recurrent Neural Network hybrid, Ensemble of specialists. Prediction Intervals forecast.
+Slawek Smyl,  Jan-May 2017.
+
+Dilated LSTMs, with optional shortcuts, attention. Non-seasonal, single, or double seasonal.
+It is meant to be used for all types of series from M4 competition, except Monthly and Quarterly (for performance reasons - Ensamble of Specilists is slower).
+The program uses and requires Dynet NN library(https://github.com/clab/dynet); can be compiled and run on Windows, Linux, and Mac.
+
+In contradistinction to ES-RNN, each executable uses all series, but in a similar manner repeating the whole learning process BIG_LOOP times (by default 3).
+Invocation should pass BIG_LOOP offset
+so e.g. create a script with following lines on Windows
+start <this_executable> 0
+start <this_executable> 10
+start <this_executable> 20
+start <this_executable> 30
+on 4-core computer.
+In this setup, learning and fitting would be repeated 4*3 times, probably unnecessarily too many, 6-8 independent runs should be enough for a good ensemble.
+Therefore if running on say 8 core machine , one can extend the above script to 8 concurrent executions and reduce BIG_LOOP to 1.
+(Creating final forecasts is done in a supplied R script)
+
+There are four blocks of parameters below, one active (starting with //PARAMS--------------) and three inactive.
+These blocks are as they were during the final forecasting run. You need comment/uncomment to have one block of interest active.
+*/
+
+
+//#define USE_ODBC
+//define USE_ODBC if you want to 
+// 1. run the program in backtesting mode (which means you also need to set LBACK>0 below. Read the comment below.
+// 2. save forecasts to a datatabase. Mysql and SQL Server were tested. The table creation and some other scripts should be found in \sql directory of the source code.
+// Of course setting up ODBC is not that simple, :-), e.g. you need to create DSN=slawek, that points to a database with the output table.
+// Saving to the db is convenient, but not necessary - all forecasts are always saved to as csv files in automatically created subdirectory (sorry sometimes two directories, so you have to copy :-)) of OUTPUT_DIR
+//If saving to database you need to modify run varaible, for each new run, otherwise you will get the table key error.
+
+#include "dynet/dynet.h"
+#include "dynet/training.h"
+#include "dynet/expr.h"
+#include "dynet/io.h"
+#include "dynet/model.h"
+#include "dynet/nodes.h"
+#include "dynet/expr.h"
+#include "dynet/lstm.h"
+#include "slstm.h" //my implementation of dilated LSTMs
+
+
+#if defined USE_ODBC        
+  #if defined _WINDOWS
+    #include <windows.h>
+  #endif  
+  #include <sqlext.h>
+  #include <sql.h>
+#endif 
+
+#include <ctime>
+#include <numeric>
+#include <array> 
+//#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <algorithm>  
+#include <math.h> 
+
+using namespace std;
+using namespace dynet;
+
+string DATA_DIR = "f:\\progs\\data\\M4DataSet\\"; //with the competition data csvs
+//string DATA_DIR="/home/uber/progs/data/M4DataSet/";
+string OUTPUT_DIR = "f:\\progs\\data\\M4\\"; 
+//string OUTPUT_DIR="/home/uber/progs/data/M4/";
+
+int LBACK = 0; //LBACK 0 means final mode: learning on all data and forecasting. LBACK=1 would move back by OUTPUT_SIZE, and forecast last known OUTPUT_SIZE points, for backtesting. LBACK could be a larger integer, but then number of series shrinks.
+
+
+//PARAMS--------------
+
+string VARIABLE = "Hourly";
+const string run0 = "(1,4)(24,168) LR=0.01, {25,3e-3f} EPOCHS=37, LVP=10, CSP=0";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int SEASONALITY_NUM = 2;//0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 24;
+const int SEASONALITY2 = 168;
+vector<vector<unsigned>> dilations = { { 1,4 },{ 24, 168 } };
+
+const float INITIAL_LEARNING_RATE = 0.01f;
+const map<int, float> LEARNING_RATES = { { 20,1e-3f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 37;
+
+float LEVEL_VARIABILITY_PENALTY = 10;  //Multiplier for L" penalty against wigglines of level vector.
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 24;
+const unsigned int OUTPUT_SIZE = 48;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+const int MAX_SERIES_LENGTH = 53 * SEASONALITY2 + MIN_SERIES_LENGTH;  //==all
+const int TOPN = 4;
+
+
+/*
+string VARIABLE = "Weekly";
+const string run0 = "Att 4/5 (1,52) LR=1e-3 {15,3e-4f} EPOCHS=31, LVP=100 6y";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = false;
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 52;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1, 52 } };
+
+const float INITIAL_LEARNING_RATE = 1e-3;
+const map<int, float> LEARNING_RATES = { { 15,3e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 31;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 10;
+const unsigned int OUTPUT_SIZE = 13;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#81     380     935    1023    1604    2598
+const int MAX_SERIES_LENGTH = 6 * SEASONALITY + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+/*
+
+string VARIABLE = "Daily";
+const string run0 = "4/5 (1,3)(7,14) LR=3e-4 {13,1e-4f} EPOCHS=21, LVP=100 13w";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+//#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER=false;
+
+const int SEASONALITY_NUM = 1; //0 means no seasonality, for Yearly; 1 - single seasonality for Daily(7), Weekly(52); 2 - dual seaonality for Hourly (24,168)
+const int SEASONALITY = 7;
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,3 },{ 7, 14 } };
+
+const float INITIAL_LEARNING_RATE = 3e-4;
+const map<int, float> LEARNING_RATES = { { 13,1e-4f } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 21;
+
+float LEVEL_VARIABILITY_PENALTY = 100;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 40;
+
+const unsigned int INPUT_SIZE = 7;
+const unsigned int OUTPUT_SIZE = 14;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //##93     323    2940    2357    4197    9919 
+const int MAX_SERIES_LENGTH = 13 * SEASONALITY + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+/*
+string VARIABLE = "Yearly";
+const string run0 = "Att NL 4/5 (1,6) LR=1e-4 {17,3e-5}{22,1e-5} EPOCHS=29, 60*";
+const string runL = "alpha5L " + run0;
+const string runH = "alpha5H " + run0;
+
+//#define USE_RESIDUAL_LSTM
+#define USE_ATTENTIVE_LSTM
+const bool ADD_NL_LAYER = true;
+
+const int SEASONALITY_NUM = 0; //0 means no seasonality
+const int SEASONALITY = 1; //for no seasonality, set it to 1, important
+const int SEASONALITY2 = 0;
+vector<vector<unsigned>> dilations = { { 1,6 } };
+
+const float INITIAL_LEARNING_RATE = 1e-4;
+const map<int, float> LEARNING_RATES = { { 17,3e-5 },{ 22,1e-5 } }; //at which epoch we manually set them up to what
+const float PER_SERIES_LR_MULTIP = 1;
+const int NUM_OF_TRAIN_EPOCHS = 29;
+
+float LEVEL_VARIABILITY_PENALTY = 0;  //Multiplier for L" penalty against wigglines of level vector. 
+const float C_STATE_PENALTY = 0;
+
+const unsigned int STATE_HSIZE = 30;
+
+const unsigned int INPUT_SIZE = 4;
+const unsigned int OUTPUT_SIZE = 6;
+
+const int MIN_INP_SEQ_LEN = 0;
+const int MIN_SERIES_LENGTH = OUTPUT_SIZE + INPUT_SIZE + MIN_INP_SEQ_LEN + 2;  //this is compared to n==(total length - OUTPUT_SIZE). Total length may be truncated by LBACK
+                                                                               //#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+                                                                               //#13.00   20.00   29.00   31.32   40.00  835.00
+const int MAX_SERIES_LENGTH = 60 + MIN_SERIES_LENGTH;
+const int TOPN = 4;
+*/
+
+const float ALPHA = 0.05;
+const float TAUL = ALPHA / 2;
+const float TAUH = 1 - TAUL;
+const float ALPHA_MULTIP = 2 / ALPHA;
+
+const int BIG_LOOP = 3;
+const int NUM_OF_NETS = 5;
+const unsigned ATTENTION_HSIZE = STATE_HSIZE;
+
+#if defined _DEBUG
+  const int MAX_NUM_OF_SERIES = 20;
+#else
+  const int MAX_NUM_OF_SERIES = -1;
+#endif // _DEBUG
+
+const unsigned int NUM_OF_CATEGORIES = 6;
+const int AVERAGING_LEVEL = 5;
+const float EPS=1e-6;
+
+const float NOISE_STD=0.001; 
+const int FREQ_OF_TEST=1;
+const float GRADIENT_CLIPPING=50;
+const float BIG_FLOAT=1e38;//numeric_limits<float>::max();
+const bool PRINT_DIAGN = false;
+
+string INPUT_PATH = DATA_DIR + VARIABLE + "-train.csv";
+string INFO_INPUT_PATH = DATA_DIR + "M4-info.csv";
+
+
+Expression squash(const Expression& x) {
+  return log(x);
+}
+float squash(float x) {
+  return log(x);
+}
+
+Expression expand(const Expression& x) {
+  return exp(x);
+}
+float expand(float x) {
+  return exp(x);
+}
+
+
+#if defined USE_ODBC
+  void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+    SQLSMALLINT    hType,
+    RETCODE        RetCode);
+
+  #if defined _WINDOWS
+    WCHAR* pwszConnStr = L"DSN=slawek";
+  #else
+    SQLCHAR* pwszConnStr = (SQLCHAR*) "DSN=slawek";
+  #endif   
+  #define TRYODBC(h, ht, x)   {   RETCODE rc = x;\
+                                if (rc != SQL_SUCCESS) \
+                                { \
+                                    HandleDiagnosticRecord (h, ht, rc); \
+                                } \
+                                if (rc == SQL_ERROR) \
+                                { \
+                                    fprintf(stderr, "Error in " #x "\n"); \
+                                    if (hStmt)    { \
+																			SQLFreeHandle(SQL_HANDLE_STMT, hStmt); \
+																		} \
+																		if (hDbc)    { \
+																			SQLDisconnect(hDbc); \
+																			SQLFreeHandle(SQL_HANDLE_DBC, hDbc); \
+																		} \
+																		if (hEnv)    { \
+																				SQLFreeHandle(SQL_HANDLE_ENV, hEnv); \
+																		} \
+																		exit(-1); \
+                                }  \
+                            }
+
+#endif
+
+struct M4TS {//storing series data
+  vector < float> categories_vect;
+  vector<float> vals;
+  vector<float> testVals;//empty, unless LBACK>0
+  float meanAbsSeasDiff;
+  int n;
+  
+  M4TS(string category, stringstream  &line_stream) {
+    array<float, NUM_OF_CATEGORIES> categories = { 0,0,0,0,0,0 };
+    if (category == "Demographic")
+      categories[0] = 1;
+    else if (category == "Finance")
+      categories[1] = 1;
+    else if (category == "Industry")
+      categories[2] = 1;
+    else if (category == "Macro")
+      categories[3] = 1;
+    else if (category == "Micro")
+      categories[4] = 1;
+    else if (category == "Other")
+      categories[5] = 1;
+    else {
+      cerr << "unknown category?";
+      exit(-1);
+    }
+    for (int i = 0; i < NUM_OF_CATEGORIES; i++)
+      categories_vect.push_back(categories[i]);
+
+    string tmp_str;
+    while(getline(line_stream, tmp_str, ',' )) {
+      string val_str;
+      for (const auto c : tmp_str) {
+				if (c != '\"' && c != '\r') //remove quotes and very occasional double end of line
+          val_str.push_back(c);
+      }
+      if (val_str.size() == 0)
+        break;
+      float val=(atof(val_str.c_str()));
+      vals.push_back(val);
+    }
+
+    meanAbsSeasDiff = 0;
+    float sumf = 0;
+    for (int ip = SEASONALITY; ip<vals.size(); ip++) {
+      float diff = vals[ip] - vals[ip - SEASONALITY];
+      sumf += abs(diff);
+    }
+    if (sumf>0)
+      meanAbsSeasDiff = sumf / (vals.size() - SEASONALITY);
+
+    if (LBACK > 0) {  //extract last OUTPUT_SIZE points as the test values
+      if (vals.size() > LBACK*OUTPUT_SIZE) {
+        auto first = vals.begin() + vals.size() - LBACK*OUTPUT_SIZE;
+        auto pastLast = vals.begin() + vals.size() - (LBACK-1)*OUTPUT_SIZE;
+        vector<float> input_vect(first, pastLast); //[first,pastLast)
+        testVals= input_vect;
+        vals.resize(vals.size() - LBACK*OUTPUT_SIZE); //remove last LBACK*OUTPUT_SIZE elements
+        n = vals.size();
+      } else
+        n = 0;
+    } else {
+      n = vals.size();
+    }
+    if (n > MAX_SERIES_LENGTH) {//chop long series
+      vals.erase(vals.begin(), vals.begin() + (n-MAX_SERIES_LENGTH)); //remove some early data
+      n = vals.size();
+    }
+  }
+  M4TS(){};
+};
+
+#if defined USE_ODBC        
+void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+  SQLSMALLINT    hType,
+  RETCODE        RetCode);
+#endif 
+
+struct AdditionalParams {//Per series, important
+    Parameter levSm;
+    Parameter sSm;
+    array<Parameter, SEASONALITY> initSeasonality;
+    Parameter sSm2;
+    array<Parameter, SEASONALITY2> initSeasonality2;
+};
+struct AdditionalParamsF {//Used for storing diagnostics
+    float levSm;
+    float sSm;
+    array<float, SEASONALITY> initSeasonality;
+    float sSm2;
+    array<float, SEASONALITY2> initSeasonality2;
+    vector<float> levels;
+    vector<float> seasons;
+    vector<float> seasons2;
+};
+  
+
+array<int, NUM_OF_NETS> perfToRanking (array<float, NUM_OF_NETS> perf_arr) {
+  array<int, NUM_OF_NETS> index;
+  
+  for (int itop=0; itop<TOPN; itop++) {
+    float currMin=BIG_FLOAT; int indexOfMin=-1;
+    for (int i=0; i<NUM_OF_NETS; i++) {
+      if (perf_arr[i]<currMin) {
+        currMin=perf_arr[i];
+        indexOfMin=i;
+      }
+    }
+    index[itop]=indexOfMin;
+    perf_arr[indexOfMin]=BIG_FLOAT;
+  }
+  return index;
+}
+
+//loss function
+Expression MSIS(const Expression& out_ex, const Expression& actuals_ex) {
+  vector<Expression> losses;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = pick(out_ex, indx);
+    auto forecH = pick(out_ex, indx+ OUTPUT_SIZE);
+    auto actual = pick(actuals_ex, indx);
+    float actualf= as_scalar(actual.value());
+
+    Expression loss= forecH - forecL;
+    if (actualf< as_scalar(forecL.value()))
+      loss=loss+(forecL - actual)*ALPHA_MULTIP;
+    if (actualf > as_scalar(forecH.value()))
+      loss = loss + (actual - forecH)*ALPHA_MULTIP;
+    losses.push_back(loss);
+  }
+  return sum(losses) / OUTPUT_SIZE;
+}
+
+// weighted quantile Loss
+float wQuantLoss(vector<float>& out_vect, vector<float>& actuals_vect, float tau, int offset) {//used just for diagnostics, if if LBACK>0 and PERCENTILE!=50
+  float sumf = 0; float suma = 0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forec = out_vect[indx+ offset];
+    auto actual = actuals_vect[indx];
+    suma += abs(actual);
+    if (actual > forec)
+      sumf = sumf + (actual - forec)*tau;
+    else
+      sumf = sumf + (actual - forec)*(tau - 1);
+  }
+  return sumf / suma * 200;
+}
+
+float errorFunc(vector<float>& out_vect, vector<float>& actuals_vect, float meanAbsSeasDiff) {
+  float sumf=0;
+  for (unsigned int indx = 0; indx<OUTPUT_SIZE; indx++) {
+    auto forecL = out_vect[indx];
+    auto forecH = out_vect[indx + OUTPUT_SIZE];
+    auto actualf = actuals_vect[indx];
+
+    float loss = forecH - forecL;
+    if (actualf< forecL)
+      loss = loss + (forecL - actualf)*ALPHA_MULTIP;
+    if (actualf > forecH)
+      loss = loss + (actualf - forecH)*ALPHA_MULTIP;
+    sumf+=loss;
+  }
+  return sumf / (OUTPUT_SIZE*meanAbsSeasDiff);
+}
+
+
+
+int main(int argc, char** argv) {
+  dynet::initialize(argc, argv);
+
+  int ibigOffset = 0;
+  if (argc == 2)
+    ibigOffset = atoi(argv[1]);
+    
+  cout<<VARIABLE<<" "<<runL<<endl;
+  cout << runH << " Lback=" << LBACK << endl;
+  cout << "ibigOffset:"<< ibigOffset<<endl;
+
+  if (SEASONALITY_NUM <= 0 && LEVEL_VARIABILITY_PENALTY > 0) {
+    cout<<"Warning. LEVEL_VARIABILITY_PENALTY has to be equal zero if SEASONALITY_NUM==0"<<endl;
+    LEVEL_VARIABILITY_PENALTY=0;
+  }
+  
+  time_t rawtime;
+  struct tm * timeinfo;
+  char buffer[80];
+
+  time(&rawtime);
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer, sizeof(buffer), "%Y-%m-%d_%I_%M", timeinfo);
+  std::string timestamp_str(buffer);
+
+  ostringstream convert2;
+  convert2 << int(ALPHA * 100);
+
+  #if defined _WINDOWS
+    OUTPUT_DIR = OUTPUT_DIR + "\\" + VARIABLE+ timestamp_str;
+    if (LBACK==0) 
+      OUTPUT_DIR = OUTPUT_DIR+"Final\\";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir ") + OUTPUT_DIR;//so occasionaly, if the programs do not start within the same minute, you may find more than one output dir created. After the run just manullay put them together.
+  #else
+    OUTPUT_DIR = OUTPUT_DIR + "/" + VARIABLE + timestamp_str;
+    if (LBACK == 0)
+      OUTPUT_DIR = OUTPUT_DIR + "Final/";
+    OUTPUT_DIR = OUTPUT_DIR + convert2.str();
+    string exec = string("mkdir -p ") + OUTPUT_DIR;
+  #endif
+  system(exec.c_str());
+
+  if (LBACK == 0) 
+    cout << "Doing final of " << VARIABLE << " into " << OUTPUT_DIR << endl;
+
+#if defined USE_ODBC
+  time_t t = time(0);   // get time now
+  struct tm * now = localtime(&t);
+  TIMESTAMP_STRUCT now_ts;
+  now_ts.year= now->tm_year+1900;
+  now_ts.month=now->tm_mon+1;
+  now_ts.day=now->tm_mday;
+  now_ts.hour=now->tm_hour;
+  now_ts.minute=now->tm_min;
+  now_ts.second=now->tm_sec;
+  now_ts.fraction=0; //reportedly needed
+
+  const int OFFSET_TO_FIRST_ACTUAL=5;
+  string insertQuery_str = "insert into M72nn(run, LBack, ibig, series, epoch ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    stringstream ss;
+    ss << iq;
+    string iq_str = ss.str();
+    insertQuery_str = insertQuery_str +", actual"+iq_str+", forec" + iq_str;
+  }
+  insertQuery_str = insertQuery_str +", trainingError, variable, n, dateTimeOfPrediction) \
+    values(? , ? , ? , ? , ? ";
+  for (int iq = 1; iq <= OUTPUT_SIZE; iq++) {
+    insertQuery_str = insertQuery_str + ",?,?";
+  }
+  insertQuery_str = insertQuery_str + ",?,?,?,?)";
+  #if defined _WINDOWS  
+  wstring insertQuery(insertQuery_str.begin(), insertQuery_str.end());
+  SQLWCHAR* sqlQuery = (SQLWCHAR*)insertQuery.c_str();
+  #else
+  SQLCHAR* sqlQuery =(SQLCHAR*)insertQuery_str.c_str();
+  #endif
+
+  SQLHENV  hEnv = NULL;
+  SQLHDBC  hDbc = NULL;
+  SQLHSTMT hStmt = NULL, hInsertStmt = NULL;
+
+  if (SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &hEnv) == SQL_ERROR) {
+    fprintf(stderr, "Unable to allocate an environment handle\n");
+    exit(-1);
+  }
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLSetEnvAttr(hEnv,
+      SQL_ATTR_ODBC_VERSION,
+      (SQLPOINTER)SQL_OV_ODBC3,
+      0));
+
+  // Allocate a connection
+  TRYODBC(hEnv,
+    SQL_HANDLE_ENV,
+    SQLAllocHandle(SQL_HANDLE_DBC, hEnv, &hDbc));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLDriverConnect(hDbc,
+      NULL,
+      pwszConnStr,
+      SQL_NTS,
+      NULL,
+      0,
+      NULL,
+      SQL_DRIVER_COMPLETE));
+  fprintf(stderr, "Connected!\n");
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLSetConnectAttr(hDbc, SQL_ATTR_AUTOCOMMIT, (SQLPOINTER)SQL_AUTOCOMMIT_OFF, SQL_IS_INTEGER));
+
+  TRYODBC(hDbc,
+    SQL_HANDLE_DBC,
+    SQLAllocHandle(SQL_HANDLE_STMT, hDbc, &hInsertStmt));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLPrepare(hInsertStmt, sqlQuery, SQL_NTS));
+
+  SQLLEN nullTerminatedStringOfRun = SQL_NTS;
+  SQLLEN nullTerminatedStringOfSeries = SQL_NTS;
+  SQLLEN nullTerminatedStringOfVariable = SQL_NTS;
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, 2, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&LBACK, 0, NULL));
+
+  // variable, n, dateTimeOfPrediction
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL+2*OUTPUT_SIZE+2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)VARIABLE.c_str(), 0, &nullTerminatedStringOfVariable));
+
+  TRYODBC(hInsertStmt,
+    SQL_HANDLE_STMT,
+    SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 4, SQL_PARAM_INPUT, SQL_C_TYPE_TIMESTAMP, SQL_TYPE_TIMESTAMP, 0, 0, &now_ts, sizeof(TIMESTAMP_STRUCT), NULL));
+#endif
+    
+  random_device rd;     // only used once to initialise (seed) engine
+  mt19937 rng(rd());    // random-number engine used (Mersenne-Twister in this case)
+  
+  vector<string> series_vect;
+  unordered_map<string, M4TS> allSeries_map(30000);//max series in one chunk would be 24k for yearly series
+  unordered_map<string, string> seriesCategories_map(120000);//100k series
+
+  ifstream infoFile(INFO_INPUT_PATH);
+  string line;
+  getline(infoFile, line); //header
+  while (getline(infoFile, line)) {
+    //cout << string( line)<<endl;
+    stringstream  line_stream(line);
+    string series; string category;
+
+    getline(line_stream, series, ',');
+    getline(line_stream, category, ',');
+    seriesCategories_map[series] = category;
+  }
+
+  ifstream file (INPUT_PATH);
+  getline(file, line); //header
+  while ( getline ( file, line) ) {
+    stringstream  line_stream(line);
+    string series0;  string series;
+    getline(line_stream, series0, ',' );
+    for (const auto c : series0) {
+      if (!ispunct(c)) {
+        series.push_back(c);
+      }
+    }
+
+    string category = seriesCategories_map[series];
+    M4TS m4Obj(category, line_stream);
+    if (m4Obj.n >= MIN_SERIES_LENGTH) {
+      series_vect.push_back(series);
+      if (m4Obj.meanAbsSeasDiff==0) {
+        cout<<"Warning, flat series:"<<series<<endl;
+        m4Obj.meanAbsSeasDiff= m4Obj.testVals[0]/100;
+      }
+      allSeries_map[series] = m4Obj;
+    }
+    if (MAX_NUM_OF_SERIES>0 && series_vect.size()>=MAX_NUM_OF_SERIES)
+      break;
+  }
+  cout << "num of series:" << series_vect.size() << endl;
+
+  unsigned int series_len=(unsigned int)series_vect.size();
+  uniform_int_distribution<int> uniOnSeries(0,series_len-1);  // closed interval [a, b]
+  uniform_int_distribution<int> uniOnNets(0,NUM_OF_NETS-1);  // closed interval [a, b]
+  
+  unordered_map<string, array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>> testResults_map((int)series_len*1.5);//per series, etc...
+  unordered_map<string, vector<float>> finalResults_map((int)series_len*1.5);//per series
+  set<string> diagSeries;
+  
+  unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map;
+  for (int ibig=0; ibig<BIG_LOOP; ibig++) {
+  	int ibigDb= ibigOffset+ibig;
+    string outputPathL = OUTPUT_DIR + '/'+ VARIABLE + "_" + to_string(ibigDb)+"_LLB"+ to_string(LBACK)+ ".csv";
+    string outputPathH = OUTPUT_DIR + '/' + VARIABLE + "_" + to_string(ibigDb) + "_HLB" + to_string(LBACK) + ".csv";
+    vector<float> perfValid_vect; 
+    int epochOfLastChangeOfLRate = -1;
+    
+#if defined USE_ODBC        
+    TRYODBC(hInsertStmt,
+      SQL_HANDLE_STMT,
+      SQLBindParameter(hInsertStmt, 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&ibigDb, 0, NULL));
+#endif 
+  
+    //create nets
+    array<ParameterCollection, NUM_OF_NETS> paramsCollection_arr;//per net
+    array<ParameterCollection, NUM_OF_NETS> perSeriesParamsCollection_arr;//per net
+    array<AdamTrainer*, NUM_OF_NETS> trainers_arr;
+    array<AdamTrainer*, NUM_OF_NETS> perSeriesTrainers_arr;
+    
+
+    #if defined USE_RESIDUAL_LSTM
+      array<vector<ResidualDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #elif defined USE_ATTENTIVE_LSTM
+      array<vector<AttentiveDilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #else
+      array<vector<DilatedLSTMBuilder>, NUM_OF_NETS> rnnStack_arr;
+    #endif
+
+    array<Parameter, NUM_OF_NETS> MLPW_parArr;
+    array<Parameter, NUM_OF_NETS> MLPB_parArr;
+    array<Parameter, NUM_OF_NETS> adapterW_parArr;
+    array<Parameter, NUM_OF_NETS> adapterB_parArr;
+    
+    //this is not a history, this is the real stuff
+    unordered_map<string, array<AdditionalParams, NUM_OF_NETS>* > additionalParams_mapOfArr((int)series_len*1.5); //per series, per net
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      additionalParams_mapOfArr[series]=new array<AdditionalParams, NUM_OF_NETS>();
+    }
+    
+    for (int inet=0; inet<NUM_OF_NETS; inet++) {
+      ParameterCollection& pc=paramsCollection_arr[inet];
+      ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+      
+      trainers_arr[inet]=new AdamTrainer (pc, INITIAL_LEARNING_RATE, 0.9, 0.999, EPS);
+      trainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+      perSeriesTrainers_arr[inet]=new AdamTrainer (perSeriesPC, INITIAL_LEARNING_RATE*PER_SERIES_LR_MULTIP, 0.9, 0.999, EPS);
+      perSeriesTrainers_arr[inet]->clip_threshold = GRADIENT_CLIPPING;
+            
+    auto& rNNStack=rnnStack_arr[inet];
+    #if defined USE_RESIDUAL_LSTM
+      rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(ResidualDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #elif defined USE_ATTENTIVE_LSTM
+      rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, ATTENTION_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(AttentiveDilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, ATTENTION_HSIZE, pc));
+    #else
+      rNNStack.emplace_back(DilatedLSTMBuilder(dilations[0], INPUT_SIZE + NUM_OF_CATEGORIES, STATE_HSIZE, pc));
+      for (int il = 1; il<dilations.size(); il++)
+        rNNStack.emplace_back(DilatedLSTMBuilder(dilations[il], STATE_HSIZE, STATE_HSIZE, pc));
+    #endif
+    
+      if (ADD_NL_LAYER) { 
+        MLPW_parArr[inet] = pc.add_parameters({ STATE_HSIZE, STATE_HSIZE });
+        MLPB_parArr[inet] = pc.add_parameters({ STATE_HSIZE });
+      }
+  	  adapterW_parArr[inet]=pc.add_parameters({OUTPUT_SIZE*2, STATE_HSIZE});
+  	  adapterB_parArr[inet]=pc.add_parameters({OUTPUT_SIZE*2});
+      
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        array<AdditionalParams, NUM_OF_NETS>*  additionalParams_arr=additionalParams_mapOfArr[series];
+        additionalParams_arr->at(inet).levSm=perSeriesPC.add_parameters({1}, 0.5);//per series, per net
+        if (SEASONALITY_NUM > 0) {
+          additionalParams_arr->at(inet).sSm = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY; isea++)
+            additionalParams_arr->at(inet).initSeasonality[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+        if (SEASONALITY_NUM > 1) {
+          additionalParams_arr->at(inet).sSm2 = perSeriesPC.add_parameters({ 1 }, 0.5);
+          for (int isea = 0; isea<SEASONALITY2; isea++)
+            additionalParams_arr->at(inet).initSeasonality2[isea] = perSeriesPC.add_parameters({ 1 }, 0.5);
+        }
+      }
+    }//seting up, through nets
+    
+    //history of params. Series->[NUM_OF_NETS,NUM_OF_TRAIN_EPOCHS]
+    unordered_map<string, array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>*> historyOfAdditionalParams_map((int)series_len*1.5);
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      historyOfAdditionalParams_map[series]=new array<array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>, NUM_OF_NETS>();
+    }
+    
+    //first assignment. Yes, we are using vector , so the very first time the duplicates are possible. But a set can't be sorted
+    array<vector<string>, NUM_OF_NETS> seriesAssignment;//every net has an array
+    for (int j=0; j<NUM_OF_NETS/2; j++)
+      for (int i=0; i<series_len; i++) {
+        int inet=uniOnNets(rng);
+        seriesAssignment[inet].push_back(series_vect[i]);
+      }
+    
+    //nesting: ibig
+    for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+      #if defined USE_ODBC
+        TRYODBC(hInsertStmt,
+        SQL_HANDLE_STMT,
+        SQLBindParameter(hInsertStmt, 5, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&iEpoch, 0, NULL));
+      #endif
+    
+      unordered_map<string, array<float, NUM_OF_NETS>> netPerf_map;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {  //Parellalize here, if you can :-)
+        //initialize perf matrix
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          netPerf_map[series][inet]=BIG_FLOAT;
+        }
+        
+        ParameterCollection& pc=paramsCollection_arr[inet];       
+        auto& trainer=trainers_arr[inet];
+        ParameterCollection& perSeriesPC=perSeriesParamsCollection_arr[inet];
+        auto& perSeriesTrainer=perSeriesTrainers_arr[inet];
+        
+      	if (LEARNING_RATES.find(iEpoch) != LEARNING_RATES.end()) {
+        		trainer->learning_rate = LEARNING_RATES.at(iEpoch);
+        		if (inet==0)
+        		  cout << "changing LR to:" << trainer->learning_rate << endl;
+        		perSeriesTrainer->learning_rate = LEARNING_RATES.at(iEpoch)*PER_SERIES_LR_MULTIP;
+      	}
+
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+        
+        vector<string> oneNetAssignments=seriesAssignment[inet];
+        random_shuffle (oneNetAssignments.begin(), oneNetAssignments.end());
+        
+        vector<float> epochLosses;
+        vector<float> forecLosses; vector<float> levVarLosses; vector<float> stateLosses;
+        for (auto iter = oneNetAssignments.begin() ; iter != oneNetAssignments.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+        
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          array<AdditionalParamsF, NUM_OF_TRAIN_EPOCHS>& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+
+					Expression MLPW_ex,MLPB_ex;
+          if (ADD_NL_LAYER)  {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex= parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+            
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.            
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+		      vector<Expression> logDiffOfLevels_vect;
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//Exponential Smoothing-style deseasonalization and smoothing
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1])); //penalty for wiggliness of level
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+              Expression diff_ex = log(cdiv(newLevel_ex, levels_exVect[i - 1]));
+              logDiffOfLevels_vect.push_back(diff_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+		     
+          Expression levelVarLoss_ex;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            vector<Expression> levelVarLoss_v;
+            for (int i = 1; i<logDiffOfLevels_vect.size(); i++) {
+              Expression diff_ex = logDiffOfLevels_vect[i] - logDiffOfLevels_vect[i - 1];
+              levelVarLoss_v.push_back(diff_ex*diff_ex);
+            }
+            levelVarLoss_ex = average(levelVarLoss_v);
+          }
+			   
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          for (int i=INPUT_SIZE-1; i<(m4Obj.n- OUTPUT_SIZE); i++) { 
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+
+            first = m4Obj.vals.begin() + i + 1;
+            pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+            vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+            Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+
+              firstE = season_exVect.begin() + i + 1;
+              pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+              outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+
+              firstE = season2_exVect.begin() + i + 1;
+              pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+              vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+              labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex); //resNet-style
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            Expression out_ex;
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            Expression loss_ex=MSIS(out_ex, labels_ex);
+            //Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+            if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+                losses.push_back(loss_ex); 
+          }//through points of a series
+
+          Expression forecLoss_ex= average(losses);
+			    Expression loss_exp = forecLoss_ex;
+			    
+          float levVarLoss=0;
+          if (LEVEL_VARIABILITY_PENALTY > 0) {
+            Expression levelVarLossP_ex = levelVarLoss_ex*LEVEL_VARIABILITY_PENALTY;
+            levVarLoss = as_scalar(levelVarLossP_ex.value());
+            levVarLosses.push_back(levVarLoss);
+            loss_exp= loss_exp + levelVarLossP_ex;
+          }
+
+          float cStateLoss=0;
+          if (C_STATE_PENALTY>0) {
+            vector<Expression> cStateLosses_vEx;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++)
+              for (int it = 0; it<rNNStack[irnn].c.size(); it++) {  //first index is time
+                auto& state_ex = rNNStack[irnn].c[it][0]; //c-state of first layer in a chunk at time it
+                Expression penalty_ex = square(state_ex);
+                cStateLosses_vEx.push_back(mean_elems(penalty_ex));
+              }
+          Expression cStateLossP_ex = average(cStateLosses_vEx)*C_STATE_PENALTY;
+          cStateLoss = as_scalar(cStateLossP_ex.value());
+          stateLosses.push_back(cStateLoss);
+          loss_exp = loss_exp + cStateLossP_ex;
+        }
+          
+        float loss = as_scalar(cg.forward(loss_exp));
+        epochLosses.push_back(loss);//losses of all series in one epoch
+
+        float forecastLoss = loss - levVarLoss - cStateLoss;
+          forecLosses.push_back(forecastLoss);
+        
+          cg.backward(loss_exp);
+          try {
+            trainer->update();//update shared weights
+            perSeriesTrainer->update();//update params of this series only
+          } catch (exception& e) {//it may happen occasionally. I believe it is due to not robust enough implementation of squashing functions in Dynet. When abs(x)>35 NAs appear.
+          //so the code below is trying to produce some diagnostics, hopefully useful when setting LEVEL_VARIABILITY_PENALTY and  C_STATE_PENALTY.
+            cerr<<"cought exception while doing "<<series<<endl;
+            cerr << e.what() << endl;
+            
+            float minSeason=BIG_FLOAT;
+            for (int isea = 0; isea < season_exVect.size(); isea++) {
+              float val= as_scalar(season_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason=val;
+            }  
+            cout << "min season:"<<minSeason<<endl;
+
+            minSeason = BIG_FLOAT;
+            for (int isea = 0; isea < season2_exVect.size(); isea++) {
+              float val = as_scalar(season2_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minSeason)
+                minSeason = val;
+            }
+            cout << "min season2:"<<minSeason<<endl;
+
+            float minLevel = BIG_FLOAT;
+            for (int isea = 0; isea < levels_exVect.size(); isea++) {
+              float val = as_scalar(levels_exVect[isea].value());
+              //cout << " " << val;
+              if (val<minLevel)
+                minLevel = val;
+            }
+            cout << "min level:"<<minLevel<<endl;
+
+            float maxAbs = 0; int timeOfMax = 0; int layerOfMax = 0; int chunkOfMax=0;
+            for (int irnn = 0; irnn < rNNStack.size(); irnn++) {
+              auto state_vEx= rNNStack[irnn].c;//(time,layers)
+              for (int it = 0; it < state_vEx.size(); it++) {  //through time
+                for (int il = 0; il < state_vEx[it].size(); il++) {//through layers. Each layer has two states: c and h
+                  auto state=as_vector(state_vEx[it][il].value());
+                  for (int iv = 0; iv < state.size(); iv++) {
+                    if (abs(state[iv]) > maxAbs) {
+                      maxAbs = abs(state[iv]);
+                      timeOfMax=it;
+                      layerOfMax=il;
+                      chunkOfMax= irnn;
+                    }
+                  }
+                } //through layers/states
+              } //through time
+            }  //through chunks
+
+            cout << "levSm:" << as_scalar(levSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 0) 
+              cout << "sSm:" << as_scalar(sSm_ex.value()) << endl;
+            if (SEASONALITY_NUM > 1) 
+              cout << "sSm2:" << as_scalar(sSm2_ex.value()) << endl;
+            cout << "max abs:" << maxAbs <<" at time:"<< timeOfMax<<" at layer:"<< layerOfMax<<" and chunk:"<< chunkOfMax<<endl;
+
+            //diagSeries.insert(series);
+            pc.reset_gradient();
+            perSeriesPC.reset_gradient();
+          }
+
+          //diagnostics saving
+          AdditionalParamsF histAdditionalParams;
+          histAdditionalParams.levSm=as_scalar(levSm_ex.value());
+          if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+            for (int iv = 0; iv<levels_exVect.size(); iv++) {
+              histAdditionalParams.levels.push_back(as_scalar(levels_exVect[iv].value()));
+            }
+          }
+
+          if (SEASONALITY_NUM > 0) {
+            histAdditionalParams.sSm=as_scalar(sSm_ex.value());
+            for (int isea = 0; isea<SEASONALITY; isea++)
+              histAdditionalParams.initSeasonality[isea] = as_scalar(season_exVect[isea].value());
+
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season_exVect.size(); iv++) {
+                histAdditionalParams.seasons.push_back(as_scalar(season_exVect[iv].value()));
+              }
+            }
+          }
+         
+          if (SEASONALITY_NUM > 1) {
+            histAdditionalParams.sSm2 = as_scalar(sSm2_ex.value());
+		        for (int isea=0; isea<SEASONALITY2; isea++) 
+			        histAdditionalParams.initSeasonality2[isea]=as_scalar(season2_exVect[isea].value());   
+               
+            if (iEpoch == 1 || iEpoch == NUM_OF_TRAIN_EPOCHS / 2 || iEpoch == NUM_OF_TRAIN_EPOCHS - 1) {
+              for (int iv = 0; iv<season2_exVect.size(); iv++) {
+                histAdditionalParams.seasons2.push_back(as_scalar(season2_exVect[iv].value()));
+              }
+            }
+          }     
+
+          historyOfAdditionalParams_arr[iEpoch]=histAdditionalParams;
+        }//through series
+
+        float averageLoss = accumulate( epochLosses.begin(), epochLosses.end(), 0.0)/epochLosses.size();
+        cout << ibig << " " << iEpoch << " " << inet << " count:" << oneNetAssignments.size() << " loss:" << averageLoss * 100;
+        if (LEVEL_VARIABILITY_PENALTY > 0 || C_STATE_PENALTY > 0) {
+          float averageForecLoss = accumulate(forecLosses.begin(), forecLosses.end(), 0.0) / forecLosses.size();
+          cout << " forec loss:" << averageForecLoss * 100;
+        }
+        if (LEVEL_VARIABILITY_PENALTY > 0) {
+          float averagelevVarLoss = accumulate(levVarLosses.begin(), levVarLosses.end(), 0.0) / levVarLosses.size();
+          cout << " levVar loss:" << averagelevVarLoss * 100;
+        }
+        if (C_STATE_PENALTY > 0) {
+          float averageStateLoss = accumulate(stateLosses.begin(), stateLosses.end(), 0.0) / stateLosses.size();
+          cout << " state loss:" << averageStateLoss * 100;
+        }
+        cout<<endl;
+      }//through nets. This should be done in parallel. One day it will, when Dynet allows it.
+
+
+      //Validation. We just save outputs of all nets on all series
+      //We can't attach validation to training, because training happens across subset of series*nets, and we need to store results from all of these combinations, for future use
+      //level: epoch, but we do not use the epoch value, we overwrite
+      for (int inet=0; inet<NUM_OF_NETS; inet++) { //through _all_ nets. Paralellize here.
+        auto& rNNStack=rnnStack_arr[inet];
+        Parameter& MLPW_par = MLPW_parArr[inet];
+        Parameter& MLPB_par = MLPB_parArr[inet];
+        Parameter& adapterW_par=adapterW_parArr[inet];
+        Parameter& adapterB_par=adapterB_parArr[inet];
+
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {//through _all_ series.
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+          ComputationGraph cg;
+          for (int il=0; il<dilations.size(); il++) {
+            rNNStack[il].new_graph(cg);
+            rNNStack[il].start_new_sequence(); 
+          }
+          
+          AdditionalParams& additionalParams=additionalParams_mapOfArr[series]->at(inet);
+          Expression MLPW_ex, MLPB_ex;
+          if (ADD_NL_LAYER) {
+            MLPW_ex = parameter(cg, MLPW_par);
+            MLPB_ex = parameter(cg, MLPB_par);
+          }
+          Expression adapterW_ex=parameter(cg, adapterW_par);
+          Expression adapterB_ex=parameter(cg, adapterB_par);
+
+          Expression levSmSerNet0_ex = parameter(cg, additionalParams.levSm);
+          Expression levSm_ex = logistic(levSmSerNet0_ex);
+          
+          vector<Expression> season_exVect;//vector, because we do not know how long the series is
+          Expression sSm_ex;
+          if (SEASONALITY_NUM > 0) {
+            Expression sSmSerNet0_ex= parameter(cg, additionalParams.sSm);
+            sSm_ex = logistic(sSmSerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY; isea++) {
+              Expression sSerNet0 = parameter(cg, additionalParams.initSeasonality[isea]);  //per series, per net
+              Expression s1_ex = exp(sSerNet0);
+              season_exVect.push_back(s1_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season_exVect.push_back(season_exVect[0]);
+          }
+
+          vector<Expression> season2_exVect;//vector, because we do not know how long the series is
+          Expression sSm2_ex;
+          if (SEASONALITY_NUM > 1) {
+            Expression sSm2SerNet0_ex= parameter(cg, additionalParams.sSm2);
+            sSm2_ex = logistic(sSm2SerNet0_ex);
+
+            for (int isea = 0; isea<SEASONALITY2; isea++) {
+              Expression sSer2Net0 = parameter(cg, additionalParams.initSeasonality2[isea]);  //per series, per net
+              Expression s2_ex = exp(sSer2Net0);
+              season2_exVect.push_back(s2_ex);//Expression is a simple struct, without any storage management, so the auto copy constructor works OK.
+            }
+            season2_exVect.push_back(season2_exVect[0]);
+          }
+
+          vector<Expression> levels_exVect;
+          if (SEASONALITY_NUM == 0) {
+            levels_exVect.push_back(input(cg, m4Obj.vals[0]));
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = levSm_ex*m4Obj.vals[i] + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+            }
+          }
+          else if (SEASONALITY_NUM == 1) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {//if lback>0 then this is shortened, so it always contains data awe have right to access
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else if (SEASONALITY_NUM == 2) {
+            Expression lev = cdiv(input(cg, m4Obj.vals[0]), season_exVect[0] * season2_exVect[0]);
+            levels_exVect.push_back(lev);
+            for (int i = 1; i<m4Obj.vals.size(); i++) {
+              Expression newLevel_ex = m4Obj.vals[i] * cdiv(levSm_ex, season_exVect[i] * season2_exVect[i]) + (1 - levSm_ex)*levels_exVect[i - 1];
+              levels_exVect.push_back(newLevel_ex);
+
+              Expression newSeason_ex = m4Obj.vals[i] * cdiv(sSm_ex, newLevel_ex*season2_exVect[i]) + (1 - sSm_ex)*season_exVect[i];
+              season_exVect.push_back(newSeason_ex);
+              Expression newSeason2_ex = m4Obj.vals[i] * cdiv(sSm2_ex, newLevel_ex*season_exVect[i]) + (1 - sSm2_ex)*season2_exVect[i];
+              season2_exVect.push_back(newSeason2_ex);
+            }
+
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY) {
+              unsigned long startSeasonalityIndx = season_exVect.size() - SEASONALITY;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY); i++)
+                season_exVect.push_back(season_exVect[startSeasonalityIndx + i]);
+            }
+            //if prediction horizon is larger than seasonality, so we need to repeat some of the seasonality factors
+            if (OUTPUT_SIZE>SEASONALITY2) {
+              unsigned long startSeasonalityIndx = season2_exVect.size() - SEASONALITY2;
+              for (int i = 0; i<(OUTPUT_SIZE - SEASONALITY2); i++)
+                season2_exVect.push_back(season2_exVect[startSeasonalityIndx + i]);
+            }
+          }
+          else {
+            cerr<<"SEASONALITY_NUM="<< SEASONALITY_NUM;
+            exit(-1);
+          }
+
+
+          Expression inputSeasonality_ex; Expression inputSeasonality2_ex;
+          Expression outputSeasonality_ex; Expression outputSeasonality2_ex;
+          vector<Expression> losses;//losses of steps through single time series
+          Expression out_ex;//we declare it here, bcause the last one will be the forecast
+          for (int i=INPUT_SIZE-1; i<m4Obj.n; i++) {
+            vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1 - INPUT_SIZE;
+            vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1; //not including the last one
+            vector<float> input_vect(first, pastLast); //[first,pastLast)
+            Expression input1_ex = input(cg, { INPUT_SIZE }, input_vect);
+
+            if (SEASONALITY_NUM > 0 ) {
+			        vector<Expression>::const_iterator firstE = season_exVect.begin() +i+1-INPUT_SIZE;
+			        vector<Expression>::const_iterator pastLastE = season_exVect.begin() +i+1; //not including the last one
+			        vector<Expression> inputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+			        inputSeasonality_ex=concatenate(inputSeasonality_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality_ex); // input deseasonalization
+            }
+            if (SEASONALITY_NUM > 1) {
+              vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1 - INPUT_SIZE;
+              vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1; //not including the last one
+              vector<Expression> inputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+              inputSeasonality2_ex = concatenate(inputSeasonality2_exVect);
+              input1_ex = cdiv(input1_ex, inputSeasonality2_ex); //input deseasonalization
+            }
+
+            vector<Expression> joinedInput_ex;
+            joinedInput_ex.emplace_back(noise(squash(cdiv(input1_ex, levels_exVect[i])), NOISE_STD)); //input normalization+noise
+            joinedInput_ex.emplace_back(input(cg, { NUM_OF_CATEGORIES }, m4Obj.categories_vect));
+            Expression input_ex = concatenate(joinedInput_ex);
+
+            Expression rnn_ex;
+            try {
+              rnn_ex = rNNStack[0].add_input(input_ex);
+              for (int il=1; il<dilations.size(); il++)
+                rnn_ex=rnn_ex+rNNStack[il].add_input(rnn_ex);
+            }  catch (exception& e) {
+              cerr<<"cought exception 2 while doing "<<series<<endl;
+              cerr << e.what() << endl;
+              cerr<<as_vector(input_ex.value())<<endl;
+            }
+            if (ADD_NL_LAYER) {
+              out_ex=MLPW_ex*rnn_ex+MLPB_ex;
+              out_ex = adapterW_ex*tanh(out_ex)+adapterB_ex;
+            } else 
+              out_ex=adapterW_ex*rnn_ex+adapterB_ex;
+
+            if (i<(m4Obj.n- OUTPUT_SIZE)) {//calc perf on training area
+              vector<float>::const_iterator first = m4Obj.vals.begin() + i + 1;
+              vector<float>::const_iterator pastLast = m4Obj.vals.begin() + i + 1 + OUTPUT_SIZE;
+              vector<float> labels_vect(first, pastLast);  //[first,pastLast)
+              Expression labels1_ex = input(cg, { OUTPUT_SIZE }, labels_vect);
+
+              if (SEASONALITY_NUM > 0) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality_ex); //output deseasonalization
+              }
+              if (SEASONALITY_NUM > 1) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;//checking if enough elements is in the vecor was done a few pe
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+                labels1_ex = cdiv(labels1_ex, outputSeasonality2_ex); //output deseasonalization
+              }
+              Expression labels_ex = squash(cdiv(labels1_ex, levels_exVect[i]));//output normalization
+
+          	  //Expression loss_ex = pinBallLoss(out_ex, labels_ex);
+              Expression loss_ex = MSIS(out_ex, labels_ex);
+          	  if (i>=INPUT_SIZE+MIN_INP_SEQ_LEN)
+          			  losses.push_back(loss_ex);  //training area losses
+            }
+            
+            if (i==(m4Obj.n-1)) {//validation loss
+            	out_ex=expand(out_ex)*levels_exVect[i];//back to original scale
+							if (SEASONALITY_NUM > 0 ) {
+                vector<Expression>::const_iterator firstE = season_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality_exVect(firstE, pastLastE);  //[first,pastLast)
+                for (int ios=0; ios<OUTPUT_SIZE; ios++) 
+                  outputSeasonality_exVect.push_back(outputSeasonality_exVect[ios]);//we are duplicating it, as we deal with two outputs
+                outputSeasonality_ex = concatenate(outputSeasonality_exVect);
+                out_ex = cmult(out_ex, outputSeasonality_ex);//reseasonalize
+              }
+            	if (SEASONALITY_NUM > 1 ) {
+                vector<Expression>::const_iterator firstE = season2_exVect.begin() + i + 1;
+                vector<Expression>::const_iterator pastLastE = season2_exVect.begin() + i + 1 + OUTPUT_SIZE;
+                vector<Expression> outputSeasonality2_exVect(firstE, pastLastE);  //[first,pastLast)
+                for (int ios = 0; ios<OUTPUT_SIZE; ios++)
+                  outputSeasonality2_exVect.push_back(outputSeasonality2_exVect[ios]);//we are duplicating it, as we deal with two outputs
+                Expression outputSeasonality2_ex = concatenate(outputSeasonality2_exVect);
+            		out_ex = cmult(out_ex, outputSeasonality2_ex);//reseasonalize
+              }
+                //we do not need the matching label here, because we do not bother calculate valid losses of each net across all series.
+                //We care about best and topn performance
+            }
+          }//end of going through all point of a series
+          
+          Expression loss_exp = average(losses);
+          float loss = as_scalar(cg.forward(loss_exp));//training loss of a single series
+          netPerf_map[series][inet]=loss;
+          
+          //unordered_map<string, array<array<array<vector<float>, AVERAGING_LEVEL+1>, NUM_OF_NETS>, BIG_LOOP>> testResults_map((int)series_len*1.5);//per series, big loop, etc...
+          //No epoch here, because this will just reflect the current (latest) situation - the last few epochs
+          vector<float> out_vect=as_vector(out_ex.value());
+          testResults_map[series][inet][iEpoch%AVERAGING_LEVEL]=out_vect;
+          if (iEpoch>=AVERAGING_LEVEL && iEpoch % FREQ_OF_TEST==0) {
+            vector<float> firstForec=testResults_map[series][inet][0];
+            testResults_map[series][inet][AVERAGING_LEVEL]=firstForec;
+            for (int ii=1; ii<AVERAGING_LEVEL; ii++) {
+              vector<float> nextForec=testResults_map[series][inet][ii];
+              for (int iii=0; iii<2*OUTPUT_SIZE; iii++)
+                testResults_map[series][inet][AVERAGING_LEVEL][iii]+=nextForec[iii];
+            }
+            for (int iii=0; iii<2*OUTPUT_SIZE; iii++)
+              testResults_map[series][inet][AVERAGING_LEVEL][iii]/=AVERAGING_LEVEL;
+          } //time to average
+        }//through series
+      } //through nets
+      
+      if (iEpoch>0 && iEpoch % FREQ_OF_TEST==0) {
+        //now that we have saved outputs of all nets on all series, let's calc how best and topn combinations performed during current epoch.
+        vector<float> bestEpochLosses;
+        vector<float> bestEpochAvgLosses;
+        vector<float> topnEpochLosses;
+        vector<float> topnEpochAvgLosses;
+        vector<float> bestEpochLossesL;
+        vector<float> bestEpochAvgLossesL;
+        vector<float> topnEpochLossesL;
+        vector<float> topnEpochAvgLossesL;
+        vector<float> bestEpochLossesH;
+        vector<float> bestEpochAvgLossesH;
+        vector<float> topnEpochLossesH;
+        vector<float> topnEpochAvgLossesH;
+        
+        for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+          string series=*iter;
+          auto m4Obj=allSeries_map[series];
+
+#if defined USE_ODBC        
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, 4, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)series.c_str(), 0, &nullTerminatedStringOfSeries));
+
+          TRYODBC(hInsertStmt,
+            SQL_HANDLE_STMT,
+            SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 3, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, (SQLPOINTER)&m4Obj.n, 0, NULL));
+#endif 
+          
+          float avgLoss;
+          vector<float> avgLatest;
+          vector<float> avgAvg;
+          
+          for (int itop=0; itop<TOPN; itop++) {
+            int inet=netRanking_map[series][itop];
+            
+            if (itop==0) {
+              if (LBACK > 0) {
+                float qLoss = errorFunc(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+                bestEpochLosses.push_back(qLoss);
+
+                qLoss=wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0);
+                bestEpochLossesL.push_back(qLoss);
+
+                qLoss = wQuantLoss(testResults_map[series][inet][iEpoch%AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE);
+                bestEpochLossesH.push_back(qLoss);
+              }
+              avgLatest=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL];  //used later for calculating topn loss
+              
+              if (iEpoch>=AVERAGING_LEVEL) {
+                if (LBACK > 0) {
+                  float qLoss = errorFunc(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+                  bestEpochAvgLosses.push_back(qLoss);
+
+                  qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUL, 0);
+                  bestEpochAvgLossesL.push_back(qLoss);
+
+                  qLoss = wQuantLoss(testResults_map[series][inet][AVERAGING_LEVEL], m4Obj.testVals, TAUH, OUTPUT_SIZE);
+                  bestEpochAvgLossesH.push_back(qLoss);
+                }
+                avgAvg=testResults_map[series][inet][AVERAGING_LEVEL];
+              }
+            } else {
+              for (int iii=0; iii<2*OUTPUT_SIZE; iii++) {
+                avgLatest[iii]+=testResults_map[series][inet][iEpoch%AVERAGING_LEVEL][iii];//calculate current topn
+                if (iEpoch>=AVERAGING_LEVEL)
+                  avgAvg[iii]+=testResults_map[series][inet][AVERAGING_LEVEL][iii];
+              }
+            }
+          }//through topn
+          
+          for (int iii=0; iii<2*OUTPUT_SIZE; iii++)
+	          avgLatest[iii]/=TOPN;
+
+          if (LBACK > 0) {
+            float qLoss = errorFunc(avgLatest, m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+            topnEpochLosses.push_back(qLoss);
+
+            qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUL, 0);
+            topnEpochLossesL.push_back(qLoss);
+
+            qLoss = wQuantLoss(avgLatest, m4Obj.testVals, TAUH, OUTPUT_SIZE);
+            topnEpochLossesH.push_back(qLoss);
+          }
+          
+          if (iEpoch>=AVERAGING_LEVEL) {
+            for (int iii = 0; iii<2*OUTPUT_SIZE; iii++) 
+              avgAvg[iii] /= TOPN;
+
+            finalResults_map[series] = avgAvg;
+
+            if (LBACK > 0) {
+#if defined USE_ODBC        
+              TRYODBC(hInsertStmt,
+                SQL_HANDLE_STMT,
+                SQLBindParameter(hInsertStmt, OFFSET_TO_FIRST_ACTUAL + 2 * OUTPUT_SIZE + 1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgLoss, 0, NULL));
+          
+              for (int iv=0; iv<2; iv++)  {
+                if (iv==0)
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runL.c_str(), 0, &nullTerminatedStringOfRun))
+                else                                                      
+                  TRYODBC(hInsertStmt,
+                    SQL_HANDLE_STMT,
+                    SQLBindParameter(hInsertStmt, 1, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, 0, 0, (SQLCHAR*)runH.c_str(), 0, &nullTerminatedStringOfRun));
+                
+                for (int iii=0; iii<OUTPUT_SIZE; iii++) {              
+                  int ipos=OFFSET_TO_FIRST_ACTUAL + 1 + 2*iii;
+                  TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&m4Obj.testVals[iii], 0, NULL));
+
+                  TRYODBC(hInsertStmt,
+                      SQL_HANDLE_STMT,
+                      SQLBindParameter(hInsertStmt, ipos+1, SQL_PARAM_INPUT, SQL_C_FLOAT, SQL_FLOAT, 0, 0, (SQLPOINTER)&avgAvg[iii+iv*OUTPUT_SIZE], 0, NULL));
+                }
+                TRYODBC(hInsertStmt,
+                 SQL_HANDLE_STMT,
+                 SQLExecute(hInsertStmt));                 
+              }
+#endif               
+              float qLoss = errorFunc(avgAvg, m4Obj.testVals, m4Obj.meanAbsSeasDiff);
+              topnEpochAvgLosses.push_back(qLoss);
+
+              qLoss = wQuantLoss(avgAvg, m4Obj.testVals, TAUL, 0);
+              topnEpochAvgLossesL.push_back(qLoss);
+
+              qLoss = wQuantLoss(avgAvg, m4Obj.testVals, TAUH, OUTPUT_SIZE);
+              topnEpochAvgLossesH.push_back(qLoss);
+            }
+          }
+        }//through series
+        if (LBACK > 0) {
+          float bestEpochLoss=accumulate( bestEpochLosses.begin(), bestEpochLosses.end(), 0.0)/bestEpochLosses.size();
+          float topnEpochLoss=accumulate( topnEpochLosses.begin(), topnEpochLosses.end(), 0.0)/topnEpochLosses.size();
+          float bestEpochLossL = accumulate(bestEpochLossesL.begin(), bestEpochLossesL.end(), 0.0) / bestEpochLossesL.size();
+          float topnEpochLossL = accumulate(topnEpochLossesL.begin(), topnEpochLossesL.end(), 0.0) / topnEpochLossesL.size();
+          float bestEpochLossH = accumulate(bestEpochLossesH.begin(), bestEpochLossesH.end(), 0.0) / bestEpochLossesH.size();
+          float topnEpochLossH = accumulate(topnEpochLossesH.begin(), topnEpochLossesH.end(), 0.0) / topnEpochLossesH.size();
+          cout<<ibig<<" "<<iEpoch<<" VALID best:"<<bestEpochLoss<<" L:"<< bestEpochLossL<<" H:"<< bestEpochLossH<<
+            " topn:"<<topnEpochLoss<<" L:"<< topnEpochLossL<<" H:"<< topnEpochLossH;
+          if (iEpoch>=AVERAGING_LEVEL) {
+            float bestEpochAvgLoss=accumulate( bestEpochAvgLosses.begin(), bestEpochAvgLosses.end(), 0.0)/bestEpochAvgLosses.size();
+            float topnEpochAvgLoss=accumulate( topnEpochAvgLosses.begin(), topnEpochAvgLosses.end(), 0.0)/topnEpochAvgLosses.size();
+            float bestEpochAvgLossL = accumulate(bestEpochAvgLossesL.begin(), bestEpochAvgLossesL.end(), 0.0) / bestEpochAvgLossesL.size();
+            float topnEpochAvgLossL = accumulate(topnEpochAvgLossesL.begin(), topnEpochAvgLossesL.end(), 0.0) / topnEpochAvgLossesL.size();
+            float bestEpochAvgLossH = accumulate(bestEpochAvgLossesH.begin(), bestEpochAvgLossesH.end(), 0.0) / bestEpochAvgLossesH.size();
+            float topnEpochAvgLossH = accumulate(topnEpochAvgLossesH.begin(), topnEpochAvgLossesH.end(), 0.0) / topnEpochAvgLossesH.size();
+            cout<<" bestAvg:"<<bestEpochAvgLoss<<" L:"<< bestEpochAvgLossL<<" H:"<< bestEpochAvgLossH<<
+              " topnAvg:"<<topnEpochAvgLoss<<" L:"<< bestEpochAvgLossL<<" H:"<< bestEpochAvgLossH<<endl;
+          } else
+            cout<<endl;
+        }
+      }//time to report
+      
+      //assign
+      for (int inet=0; inet<NUM_OF_NETS; inet++)
+        seriesAssignment[inet].clear();
+      for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+        string series=*iter;
+        //unordered_map<string, array<int, NUM_OF_NETS>> netRanking_map
+        netRanking_map[series]=perfToRanking(netPerf_map[series]);
+        
+        for (int itop=0; itop<TOPN; itop++) {
+          int inet=netRanking_map[series][itop];
+          seriesAssignment[inet].push_back(series); //every net has a set
+        }
+      }
+      
+      //check and fix degenerations
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        if (seriesAssignment[inet].size()==0) {
+          cout<<"Resetting "<<inet<<endl;
+          for (int i=0; i<series_len/2; i++) {
+            int irand=uniOnSeries(rng);
+            seriesAssignment[inet].push_back(series_vect[irand]);
+          }
+        }
+      }
+#if defined USE_ODBC  
+      TRYODBC(hDbc,
+      SQL_HANDLE_DBC,
+      SQLEndTran(
+        SQL_HANDLE_DBC,
+        hDbc,
+        SQL_COMMIT));
+#endif
+    }//through epochs of RNN
+    
+    //some diagnostic info
+    set<string> diagSeries;
+    for (int i=0; i<1; i++) {//add a few normal ones
+      int irand=uniOnSeries(rng);
+      diagSeries.insert(series_vect[irand]);
+    }
+    for(auto series : diagSeries) {
+      cout<<endl<<series<<endl;
+      
+      cout<<"lSm:"<<endl;
+      for (int inet=0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+    	auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].levSm<<" ";
+        cout<<endl;
+      }
+      
+      if (SEASONALITY_NUM > 0 ) {
+        cout<<"sSm:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	    auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm<<" ";
+          cout<<endl;
+        }
+      }  
+      
+      if (SEASONALITY_NUM > 1 ) {
+        cout<<"sSm2:"<<endl;
+        for (int inet=0; inet<NUM_OF_NETS; inet++) {
+          cout<<"inet:"<<inet<<" ";
+    	  auto& historyOfAdditionalParams_arr=historyOfAdditionalParams_map[series]->at(inet);
+          for (int iEpoch=0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++)
+            cout<<historyOfAdditionalParams_arr[iEpoch].sSm2<<" ";
+        cout<<endl;
+        }
+      }
+      
+      for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+        cout<<"inet:"<<inet<<" ";
+        auto& historyOfAdditionalParams_arr = historyOfAdditionalParams_map[series]->at(inet);
+        for (int iEpoch = 0; iEpoch<NUM_OF_TRAIN_EPOCHS; iEpoch++) {
+          if (historyOfAdditionalParams_arr[iEpoch].levels.size()>0) {
+            cout << "levels:" << iEpoch<<" ";
+            for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+              cout << historyOfAdditionalParams_arr[iEpoch].levels[iv] << ", ";
+            cout << endl;
+            if (SEASONALITY_NUM > 0 ) {
+              cout << "seasons:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons[iv] << ", ";
+              cout << endl;
+            }
+            if (SEASONALITY_NUM > 1 ) {
+              cout << "seasons2:" << iEpoch<<" ";
+              for (int iv = 0; iv<historyOfAdditionalParams_arr[iEpoch].levels.size(); iv++)
+                cout << historyOfAdditionalParams_arr[iEpoch].seasons2[iv] << ", ";
+              cout << endl;
+            }
+          }
+        }
+      }
+    }//end of diag printing
+    
+    //save the forecast to outputFile
+    ofstream outputFile;
+    outputFile.open(outputPathL);
+    for (auto iter = series_vect.begin(); iter != series_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile<< series;
+      for (int io=0; io<OUTPUT_SIZE; io++)
+        outputFile << ", " << finalResults_map[series][io];
+      outputFile<<endl;
+    }
+    outputFile.close();
+    
+    outputFile.open(outputPathH);
+    for (auto iter = series_vect.begin(); iter != series_vect.end(); ++iter) {
+      string series = *iter;
+      outputFile << series;
+      for (int io = 0; io<OUTPUT_SIZE; io++)
+        outputFile << ", " << finalResults_map[series][io+OUTPUT_SIZE];
+      outputFile << endl;
+    }
+    outputFile.close();
+    
+    //delete    
+    for (int inet = 0; inet<NUM_OF_NETS; inet++) {
+      delete trainers_arr[inet];
+      perSeriesTrainers_arr[inet];
+    }
+
+    for (auto iter = series_vect.begin() ; iter != series_vect.end(); ++iter) {
+      string series=*iter;
+      delete additionalParams_mapOfArr[series];
+      delete historyOfAdditionalParams_map[series];
+    }
+    additionalParams_mapOfArr.clear();
+    historyOfAdditionalParams_map.clear();
+  }//big loop
+}//main
+
+
+#if defined USE_ODBC
+  #if defined _WINDOWS
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  WCHAR       wszMessage[1000];
+	  WCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		(SQLSMALLINT)(sizeof(wszMessage) / sizeof(WCHAR)),
+		(SQLSMALLINT *)NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+		}
+	}
+  #else
+	void HandleDiagnosticRecord(SQLHANDLE      hHandle,
+	  SQLSMALLINT    hType,
+	  RETCODE        RetCode)
+	{
+	  SQLSMALLINT iRec = 0;
+	  SQLINTEGER  iError;
+	  SQLCHAR       wszMessage[1000];
+	  SQLCHAR       wszState[SQL_SQLSTATE_SIZE + 1];
+
+
+	  if (RetCode == SQL_INVALID_HANDLE)
+	  {
+		fwprintf(stderr, L"Invalid handle!\n");
+		return;
+	  }
+
+	  while (SQLGetDiagRec(hType,
+		hHandle,
+		++iRec,
+		wszState,
+		&iError,
+		wszMessage,
+		1000,
+		NULL) == SQL_SUCCESS)
+	  {
+		  fwprintf(stderr, L"[%5.5s] %s (%d)\n", wszState, wszMessage, iError);
+	  }
+	}
+  #endif
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M44/M44.filters b/118 - slaweks17/c++/windows_VisualStudio/M44/M44.filters
new file mode 100644
index 0000000..a5c1624
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M44/M44.filters	
@@ -0,0 +1,25 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <Text Include="ReadMe.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="yearly30.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M44/M44.vcxproj b/118 - slaweks17/c++/windows_VisualStudio/M44/M44.vcxproj
new file mode 100644
index 0000000..b56923a
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M44/M44.vcxproj	
@@ -0,0 +1,227 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|Win32">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelWithDebug|x64">
+      <Configuration>RelWithDebug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\M41\slstm.cpp" />
+    <ClCompile Include="ES_RNN_E_PI.cc" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\M41\slstm.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{7A192E0C-8F58-4D65-998E-3A7010AB5F87}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>M44</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseIntelMKL>Sequential</UseIntelMKL>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
+      <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/M44/slstm.h b/118 - slaweks17/c++/windows_VisualStudio/M44/slstm.h
new file mode 100644
index 0000000..adb63a7
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/M44/slstm.h	
@@ -0,0 +1,394 @@
+/**
+* file slstm.h
+* header for my implementation of dilated LSTMs, based on Dynet LSTM builders
+  - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
+  - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
+  - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
+*
+Slawek Smyl, Mar-May 2018
+*/
+
+#ifndef DYNET_SLSTMS_H_
+#define DYNET_SLSTMS_H_
+
+#include "dynet/dynet.h"
+#include "dynet/rnn.h"
+#include "dynet/expr.h"
+
+using namespace std;
+
+namespace dynet {
+
+  //basd on VanillaLSTMBuilder
+  struct ResidualDilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    ResidualDilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the ResidualDilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    * \param ln_lstm Whether to use layer normalization
+    * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
+    */
+    explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model,
+      bool ln_lstm = false,
+      float forget_bias = 1.f);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+    /**
+    * \brief Get parameters in ResidualDilatedLSTMBuilder
+    * \return list of points to ParameterStorage objects
+    */
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> ln_params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> ln_param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    bool ln_lstm;
+    float forget_bias;
+    bool dropout_masks_valid;
+    vector<unsigned> dilations; //one int per layer
+
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+
+
+  struct DilatedLSTMBuilder : public RNNBuilder {
+    /**
+    * @brief Default Constructor
+    */
+    DilatedLSTMBuilder();
+    /**
+    * \brief Constructor for the DilatedLSTMBuilder
+    *
+    * \param dilations Vector of dilations
+    * \param input_dim Dimention of the input \f$x_t\f$
+    * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+    * \param model ParameterCollection holding the parameters
+    */
+    explicit DilatedLSTMBuilder(vector<unsigned> dilations,
+      unsigned input_dim,
+      unsigned hidden_dim,
+      ParameterCollection& model);
+
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+
+    void copy(const RNNBuilder & params) override;
+
+    /**
+    * \brief Set the dropout rates to a unique value
+    * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+    * \param d Dropout rate to be applied on all of \f$x,h\f$
+    */
+    void set_dropout(float d);
+    /**
+    * \brief Set the dropout rates
+    * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+    * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+    * The dynamics of the cell are then modified to :
+    *
+    * \f$
+    * \begin{split}
+    i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+    f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+    o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+    \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+    c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+    h_t & = \tanh(c_t)\circ o_t\\
+    \end{split}
+    * \f$
+    *
+    * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+    * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+    */
+    void set_dropout(float d, float d_r);
+    /**
+    * \brief Set all dropout rates to 0
+    * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+    *
+    */
+    void disable_dropout();
+    /**
+    * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+    * \details If this function is not called on batched input, the same mask will be applied across
+    * all batch elements. Use this to apply different masks to each batch element
+    *
+    * \param batch_size Batch size
+    */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> dilations; //one int per layer
+
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+
+  };
+  
+  
+  struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
+    /**
+     * @brief Default Constructor
+     */
+    AttentiveDilatedLSTMBuilder();
+    /**
+     * \brief Constructor for the AttentiveDilatedLSTMBuilder
+     *
+     * \param max_dilations Vector, maximum dilations (per layer)
+     * \param input_dim Dimention of the input \f$x_t\f$
+     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
+     * \param model ParameterCollection holding the parameters
+     */
+    explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
+                                unsigned input_dim,
+                                unsigned hidden_dim,
+                                unsigned attention_dim,
+                                ParameterCollection& model);
+    
+    Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
+    std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
+    std::vector<Expression> final_s() const override {
+      std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
+      for (auto my_h : final_h()) ret.push_back(my_h);
+      return ret;
+    }
+    unsigned num_h0_components() const override { return 2 * layers; }
+    
+    std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
+    std::vector<Expression> get_s(RNNPointer i) const override {
+      std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
+      for (auto my_h : get_h(i)) ret.push_back(my_h);
+      return ret;
+    }
+    
+    void copy(const RNNBuilder & params) override;
+    
+    /**
+     * \brief Set the dropout rates to a unique value
+     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
+     * \param d Dropout rate to be applied on all of \f$x,h\f$
+     */
+    void set_dropout(float d);
+    /**
+     * \brief Set the dropout rates
+     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
+     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
+     * The dynamics of the cell are then modified to :
+     *
+     * \f$
+     * \begin{split}
+     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
+     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
+     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
+     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
+     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
+     h_t & = \tanh(c_t)\circ o_t\\
+     \end{split}
+     * \f$
+     *
+     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
+     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
+     */
+    void set_dropout(float d, float d_r);
+    /**
+     * \brief Set all dropout rates to 0
+     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
+     *
+     */
+    void disable_dropout();
+    /**
+     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
+     * \details If this function is not called on batched input, the same mask will be applied across
+     * all batch elements. Use this to apply different masks to each batch element
+     *
+     * \param batch_size Batch size
+     */
+    void set_dropout_masks(unsigned batch_size = 1);
+
+    void set_weightnoise(float std);
+    ParameterCollection & get_parameter_collection() override;
+  protected:
+    void new_graph_impl(ComputationGraph& cg, bool update) override;
+    void start_new_sequence_impl(const std::vector<Expression>& h0) override;
+    Expression add_input_impl(int prev, const Expression& x) override;
+    Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
+    Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
+    
+  public:
+    ParameterCollection local_model;
+    // first index is layer, then ...
+    std::vector<std::vector<Parameter>> params;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> param_vars;
+    
+    // first index is layer, then ...
+    std::vector<std::vector<Expression>> masks;
+    
+    // first index is time, second is layer
+    std::vector<std::vector<Expression>> h, c;
+    
+    // initial values of h and c at each layer
+    // - both default to zero matrix input
+    bool has_initial_state; // if this is false, treat h0 and c0 as 0
+    std::vector<Expression> h0;
+    std::vector<Expression> c0;
+    unsigned layers;
+    unsigned input_dim, hid;
+    unsigned attention_dim;
+    float dropout_rate_h;
+    float weightnoise_std;
+    vector<unsigned> max_dilations; //one int per layer
+    
+    bool dropout_masks_valid;
+  private:
+    ComputationGraph* _cg; // Pointer to current cg
+    
+  };
+} // namespace dynet
+
+#endif
diff --git a/118 - slaweks17/c++/windows_VisualStudio/readme.txt b/118 - slaweks17/c++/windows_VisualStudio/readme.txt
new file mode 100644
index 0000000..de51078
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/readme.txt	
@@ -0,0 +1,5 @@
+This is Visual Studio 15 solution, with 4 projects, one for each .cc file.
+Two targets are defined: Debug and RelWitDebug, which is Release with debug info, that I used normally.
+You will need to update include and link paths to point to your installation of Dynet.
+In x64\RelWithDebug directory you will find two example scripts to run the executables 
+in conjunction with one program started interactively inside VS.
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/readme.txt b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/readme.txt
new file mode 100644
index 0000000..6568116
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/readme.txt	
@@ -0,0 +1,5 @@
+These example run scripts. They are meant to be run on 6-core computer and assume that the program, 
+M41.exe has been started interactively in Visual Studio, so they add 5 processes.
+run61.cmd should be run for ES_RNN and ES_RNN_PI, so Monthly and Quarterly series, 
+although for Monthly you probably want to use computer with more cores, unless you are fine waiting a week or so :-)
+run61_e.cmd is for ES_RNN_E and ES_RNN_E_PI, so all other cases.
\ No newline at end of file
diff --git a/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61.cmd b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61.cmd
new file mode 100644
index 0000000..5ff41dd
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61.cmd	
@@ -0,0 +1,5 @@
+start M41 10 2
+start M41 11 1 5
+start M41 11 2 5
+start M41 12 1 10
+start M41 12 2 10
diff --git a/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61_e.cmd b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61_e.cmd
new file mode 100644
index 0000000..a862afa
--- /dev/null
+++ b/118 - slaweks17/c++/windows_VisualStudio/x64/RelWithDebug/run61_e.cmd	
@@ -0,0 +1,5 @@
+start M41 5
+start M41 10
+start M41 15
+start M41 20
+start M41 25
diff --git a/118 - slaweks17/readme.txt b/118 - slaweks17/readme.txt
new file mode 100644
index 0000000..0c926cd
--- /dev/null
+++ b/118 - slaweks17/readme.txt	
@@ -0,0 +1,9 @@
+ES-RNN programs, related script, and docs. 
+M4 Forecasting Competition, 2018
+Slawek Smyl, Uber.
+
+The programs are in C++ and use Dynet - a Dynamic Graph NN system (https://github.com/clab/dynet)
+
+
+
+
diff --git a/118 - slaweks17/sql/createM72nn_SQLServer.sql b/118 - slaweks17/sql/createM72nn_SQLServer.sql
new file mode 100644
index 0000000..e7ad329
--- /dev/null
+++ b/118 - slaweks17/sql/createM72nn_SQLServer.sql	
@@ -0,0 +1,135 @@
+USE [slawek]
+GO
+
+/****** Object:  Table [dbo].[M72nn]    Script Date: 6/2/2018 9:37:26 AM ******/
+SET ANSI_NULLS ON
+GO
+
+SET QUOTED_IDENTIFIER ON
+GO
+
+SET ANSI_PADDING ON
+GO
+
+CREATE TABLE [dbo].[M72nn](
+	[run] [varchar](164) NOT NULL,
+	[LBack] [smallint] NOT NULL,
+	[iBig] [smallint] NOT NULL,
+	[series] [varchar](20) NOT NULL,
+	[epoch] [smallint] NOT NULL,
+	[actual1] [real] NULL,
+	[forec1] [real] NULL,
+	[actual2] [real] NULL,
+	[forec2] [real] NULL,
+	[actual3] [real] NULL,
+	[forec3] [real] NULL,
+	[actual4] [real] NULL,
+	[forec4] [real] NULL,
+	[actual5] [real] NULL,
+	[forec5] [real] NULL,
+	[actual6] [real] NULL,
+	[forec6] [real] NULL,
+	[actual7] [real] NULL,
+	[forec7] [real] NULL,
+	[actual8] [real] NULL,
+	[forec8] [real] NULL,
+	[actual9] [real] NULL,
+	[forec9] [real] NULL,
+	[actual10] [real] NULL,
+	[forec10] [real] NULL,
+	[actual11] [real] NULL,
+	[forec11] [real] NULL,
+	[actual12] [real] NULL,
+	[forec12] [real] NULL,
+	[actual13] [real] NULL,
+	[forec13] [real] NULL,
+	[actual14] [real] NULL,
+	[forec14] [real] NULL,
+	[actual15] [real] NULL,
+	[forec15] [real] NULL,
+	[actual16] [real] NULL,
+	[forec16] [real] NULL,
+	[actual17] [real] NULL,
+	[forec17] [real] NULL,
+	[actual18] [real] NULL,
+	[forec18] [real] NULL,
+	[actual19] [real] NULL,
+	[forec19] [real] NULL,
+	[actual20] [real] NULL,
+	[forec20] [real] NULL,
+	[actual21] [real] NULL,
+	[forec21] [real] NULL,
+	[actual22] [real] NULL,
+	[forec22] [real] NULL,
+	[actual23] [real] NULL,
+	[forec23] [real] NULL,
+	[actual24] [real] NULL,
+	[forec24] [real] NULL,
+	[actual25] [real] NULL,
+	[forec25] [real] NULL,
+	[actual26] [real] NULL,
+	[forec26] [real] NULL,
+	[actual27] [real] NULL,
+	[forec27] [real] NULL,
+	[actual28] [real] NULL,
+	[forec28] [real] NULL,
+	[actual29] [real] NULL,
+	[forec29] [real] NULL,
+	[actual30] [real] NULL,
+	[forec30] [real] NULL,
+	[actual31] [real] NULL,
+	[forec31] [real] NULL,
+	[actual32] [real] NULL,
+	[forec32] [real] NULL,
+	[actual33] [real] NULL,
+	[forec33] [real] NULL,
+	[actual34] [real] NULL,
+	[forec34] [real] NULL,
+	[actual35] [real] NULL,
+	[forec35] [real] NULL,
+	[actual36] [real] NULL,
+	[forec36] [real] NULL,
+	[actual37] [real] NULL,
+	[forec37] [real] NULL,
+	[actual38] [real] NULL,
+	[forec38] [real] NULL,
+	[actual39] [real] NULL,
+	[forec39] [real] NULL,
+	[actual40] [real] NULL,
+	[forec40] [real] NULL,
+	[actual41] [real] NULL,
+	[forec41] [real] NULL,
+	[actual42] [real] NULL,
+	[forec42] [real] NULL,
+	[actual43] [real] NULL,
+	[forec43] [real] NULL,
+	[actual44] [real] NULL,
+	[forec44] [real] NULL,
+	[actual45] [real] NULL,
+	[forec45] [real] NULL,
+	[actual46] [real] NULL,
+	[forec46] [real] NULL,
+	[actual47] [real] NULL,
+	[forec47] [real] NULL,
+	[actual48] [real] NULL,
+	[forec48] [real] NULL,
+	[trainingError] [real] NULL,
+	[variable] [varchar](20) NOT NULL,
+	[n] [smallint] NOT NULL,
+	[dateTimeOfPrediction] [datetime] NOT NULL,
+ CONSTRAINT [M72nn_pk] PRIMARY KEY CLUSTERED 
+(
+	[run] ASC,
+	[LBack] ASC,
+	[iBig] ASC,
+	[series] ASC,
+	[epoch] ASC
+)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
+) ON [PRIMARY]
+
+GO
+
+SET ANSI_PADDING OFF
+GO
+
+
diff --git a/118 - slaweks17/sql/createM72nn_mysql.txt b/118 - slaweks17/sql/createM72nn_mysql.txt
new file mode 100644
index 0000000..f86d1c0
--- /dev/null
+++ b/118 - slaweks17/sql/createM72nn_mysql.txt	
@@ -0,0 +1,54 @@
+CREATE TABLE M72nn(
+	run varchar(160) NOT NULL,
+	LBack smallint NOT NULL,
+	iBig smallint NOT NULL,
+	series varchar(20) NOT NULL,
+	epoch smallint NOT NULL,
+	actual1 float NULL,
+	forec1 float NULL,
+	actual2 float NULL,
+	forec2 float NULL,
+	actual3 float NULL,
+	forec3 float NULL,
+	actual4 float NULL,
+	forec4 float NULL,
+	actual5 float NULL,
+	forec5 float NULL,
+	actual6 float NULL,
+	forec6 float NULL,
+	actual7 float NULL,
+	forec7 float NULL,
+	actual8 float NULL,
+	forec8 float NULL,
+	actual9 float NULL,
+	forec9 float NULL,
+	actual10 float NULL,
+	forec10 float NULL,
+	actual11 float NULL,
+	forec11 float NULL,
+	actual12 float NULL,
+	forec12 float NULL,
+	actual13 float NULL,
+	forec13 float NULL,
+	actual14 float NULL,
+	forec14 float NULL,
+	actual15 float NULL,
+	forec15 float NULL,
+	actual16 float NULL,
+	forec16 float NULL,
+	actual17 float NULL,
+	forec17 float NULL,
+	actual18 float NULL,
+	forec18 float NULL,
+	trainingError float NULL,
+	variable varchar(20) NOT NULL,
+	n smallint NOT NULL,
+	dateTimeOfPrediction datetime NOT NULL,
+ CONSTRAINT M72nn_pk PRIMARY KEY CLUSTERED 
+(
+	run ASC,
+	LBack ASC,
+	iBig ASC,
+	series ASC,
+	epoch ASC));
+
diff --git a/118 - slaweks17/sql/readme.txt b/118 - slaweks17/sql/readme.txt
new file mode 100644
index 0000000..e8c186c
--- /dev/null
+++ b/118 - slaweks17/sql/readme.txt	
@@ -0,0 +1,4 @@
+I provide just two example table creation scrits, one for SQL Server and one for mysql. 
+The mysql table is limited to output vector 18, so would not be good for hourly runs.
+Anyway, starting using the database is a large investment of time, apart from installationm, you also need to create auxiliary tables with MASE, and a lot of queries. 
+I do not have time to do all of it here and suspect there will be little interest in ODBC, so this is all what you get :-)
diff --git a/4Theta method.R b/4Theta method.R
deleted file mode 100644
index e5a8294..0000000
--- a/4Theta method.R	
+++ /dev/null
@@ -1,209 +0,0 @@
-#This code can be used to reproduce the forecasts submitted to the M4 competition for the 4Theta method
-
-#Authors: E. Spiliotis and V. Assimakopoulos (2017) / Forecasting & Strategy Unit - NTUA
-
-#Method Description: Generalizing the Theta model for automatic forecasting
-#Method Type: Statistical - Decomposition
-
-library(forecast) #requires version 8.2
-
-SeasonalityTest <- function(input, ppy){
-  #Used for determining whether the time series is seasonal
-  tcrit <- 1.645
-  if (length(input)<3*ppy){
-    test_seasonal <- FALSE
-  }else{
-    xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1]
-    clim <- tcrit/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2)))
-    test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] )
-    
-    if (is.na(test_seasonal)==TRUE){ test_seasonal <- FALSE }
-  }
-  
-  return(test_seasonal)
-}
-
-Theta.fit <- function(input, fh, theta, curve, model, seasonality , plot=FALSE){
-  #Used to fit a Theta model
-  
-  #Check if the inputs are valid
-  if (theta<0){ theta <- 2  }
-  if (fh<1){ fh <- 1  }
-  #Estimate theta line weights
-  outtest <- naive(input, h=fh)$mean
-  if (theta==0){
-    wses <- 0
-  }else{
-    wses <- (1/theta)
-  }
-  wlrl <- (1-wses)
-  #Estimate seasonaly adjusted time series
-  ppy <- frequency(input)
-  if (seasonality=="N"){
-    des_input <- input ; SIout <- rep(1, fh) ; SIin <- rep(1, length(input))
-  }else if (seasonality=="A"){
-    Dec <- decompose(input, type="additive")
-    des_input <- input-Dec$seasonal 
-    SIin <- Dec$seasonal
-    SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
-  }else{
-    Dec <- decompose(input, type="multiplicative")
-    des_input <- input/Dec$seasonal 
-    SIin <- Dec$seasonal
-    SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
-  }
-  
-  #If negative values, force to linear model
-  if (min(des_input)<=0){ curve <- "Lrl" ; model <- "A"  }
-  #Estimate theta line zero
-  observations <- length(des_input)
-  xs <- c(1:observations)
-  xf = xff <- c((observations+1):(observations+fh))
-  dat=data.frame(des_input=des_input, xs=xs)
-  newdf <- data.frame(xs = xff)
-  
-  if (curve=="Exp"){
-    estimate <- lm(log(des_input)~xs)
-    thetaline0In <- exp(predict(estimate))+input-input
-    thetaline0Out <- exp(predict(estimate, newdf))+outtest-outtest
-  }else{
-    estimate <- lm(des_input ~ poly(xs, 1, raw=TRUE))
-    thetaline0In <- predict(estimate)+des_input-des_input
-    thetaline0Out <- predict(estimate, newdf)+outtest-outtest
-  }
-  
-  #Estimete Theta line (theta)
-  if (model=="A"){
-    thetalineT <- theta*des_input+(1-theta)*thetaline0In
-  }else if ((model=="M")&(all(thetaline0In>0)==T)&(all(thetaline0Out>0)==T)){
-    thetalineT <- (des_input^theta)*(thetaline0In^(1-theta))
-  }else{
-    model<-"A"
-    thetalineT <- theta*des_input+(1-theta)*thetaline0In
-  }
-  
-  #forecasting TL2
-  sesmodel <- ses(thetalineT, h=fh)
-  thetaline2In <- sesmodel$fitted
-  thetaline2Out <- sesmodel$mean
-  
-  #Theta forecasts
-  if (model=="A"){
-    forecastsIn <- as.numeric(thetaline2In*wses)+as.numeric(thetaline0In*wlrl)+des_input-des_input
-    forecastsOut <- as.numeric(thetaline2Out*wses)+as.numeric(thetaline0Out*wlrl)+outtest-outtest
-  }else if ((model=="M")&
-            (all(thetaline2In>0)==T)&(all(thetaline2Out>0)==T)&
-            (all(thetaline0In>0)==T)&(all(thetaline0Out>0)==T)){
-    forecastsIn <- ((as.numeric(thetaline2In)^(1/theta))*(as.numeric(thetaline0In)^(1-(1/theta))))+des_input-des_input
-    forecastsOut <- ((as.numeric(thetaline2Out)^(1/theta))*(as.numeric(thetaline0Out)^(1-(1/theta))))+outtest-outtest
-  }else{
-    model<-"A"
-    thetalineT <- theta*des_input+(1-theta)*thetaline0In
-    sesmodel <- ses(thetalineT,h=fh)
-    thetaline2In <- sesmodel$fitted
-    thetaline2Out <- sesmodel$mean
-    forecastsIn <- as.numeric(thetaline2In*wses)+as.numeric(thetaline0In*wlrl)+des_input-des_input
-    forecastsOut <- as.numeric(thetaline2Out*wses)+as.numeric(thetaline0Out*wlrl)+outtest-outtest
-  }
-  
-  #Seasonal adjustments
-  if (seasonality=="A"){
-    forecastsIn <- forecastsIn+SIin
-    forecastsOut <- forecastsOut+SIout
-  }else{
-    forecastsIn <- forecastsIn*SIin
-    forecastsOut <- forecastsOut*SIout
-  }
-  
-  #Zero forecasts become positive
-  for (i in 1:length(forecastsOut)){
-    if (forecastsOut[i]<0){ forecastsOut[i] <- 0 }
-  }
-  
-  if (plot==TRUE){
-    united <- cbind(input,forecastsOut)
-    for (ik in 1:(observations+fh)){ united[ik,1] = sum(united[ik,2],united[ik,1], na.rm = TRUE) }
-    plot(united[,1],col="black",type="l",main=paste("Model:",model,",Curve:",curve,",Theta:",theta),xlab="Time",ylab="Values",
-         ylim=c(min(united[,1])*0.85,max(united[,1])*1.15))
-    lines(forecastsIn, col="green") ; lines(forecastsOut, col="green")
-    lines(thetaline2In, col="blue") ; lines(thetaline2Out, col="blue")
-    lines(thetaline0In, col="red") ; lines(thetaline0Out, col="red")
-  }
-  
-  output=list(fitted=forecastsIn,mean=forecastsOut,
-              fitted0=thetaline0In,mean0=thetaline0Out,
-              fitted2=thetaline2In,mean2=thetaline2Out,
-              model=paste(seasonality,model,curve,c(round(theta,2))))
-  
-  return(output)
-}
-
-FourTheta<- function(input, fh){
-  #Used to automatically select the best Theta model
-  
-  #Scale
-  base <- mean(input) ; input <- input/base
-  
-  molist <- c("M","A") ; trlist <- c("Lrl","Exp") 
-  
-  #Check seasonality & Create list of models
-  ppy <- frequency(input) ; ST <- F
-  if (ppy>1){ ST <- SeasonalityTest(input, ppy) }
-  if (ST==T){
-    
-    selist <- c("M","A")
-    listnames <- c()
-    for (i in 1:length(selist)){
-      for (ii in 1:length(molist)){
-        for (iii in 1:length(trlist)){
-          listnames <- c(listnames,paste(selist[i], molist[ii], trlist[iii]))
-        }
-      }
-    }
-    
-  }else{
-    
-    listnames <- c()
-    for (ii in 1:length(molist)){
-      for (iii in 1:length(trlist)){
-        listnames <- c(listnames, paste("N", molist[ii], trlist[iii]))
-      }
-    }
-    
-  }
-  
-  modellist <- NULL
-  for (i in 1:length(listnames)){
-    modellist[length(modellist)+1] <- list(c(substr(listnames,1,1)[i], substr(listnames,3,3)[i],
-                                             substr(listnames,5,7)[i]))
-  }
-  
-  #Start validation
-  errorsin <- c() ; models <- NULL
-  
-  #With this function determine opt theta per case
-  optfun <- function(x, input, fh, curve, model, seasonality){
-    mean(abs(Theta.fit(input=input, fh, theta=x, curve, model, seasonality , plot=FALSE)$fitted-input))
-  }
-  
-  for (j in 1:length(listnames)){
-    optTheta <- optimize(optfun, c(1:3), 
-                         input=input, fh=fh, curve=modellist[[j]][3], model=modellist[[j]][2], 
-                         seasonality=modellist[[j]][1])$minimum
-    
-    fortheta <- Theta.fit(input=input, fh=fh, theta=optTheta, curve=modellist[[j]][3], model=modellist[[j]][2], 
-                          seasonality=modellist[[j]][1], plot=F)
-    models[length(models)+1] <- list(fortheta)
-    errorsin <- c(errorsin, mean(abs(input-fortheta$fitted)))
-  }
-  
-  #Select model and export
-  selected.model <- models[[which.min(errorsin)]]
-  description <- selected.model$model
-  output <- list(fitted=selected.model$fitted*base,mean=selected.model$mean*base,
-                 description=description) 
-  #Returns the fitted and forecasted values, as well as the model used (Type of seasonality, Type of Model, Type of Trend, Theta coef.)
-  
-  return(output)
-  
-}
diff --git a/Benchmarks and Evaluation.R b/Benchmarks and Evaluation.R
deleted file mode 100644
index 07469ad..0000000
--- a/Benchmarks and Evaluation.R	
+++ /dev/null
@@ -1,162 +0,0 @@
-#This code can be used to reproduce the forecasts of the M4 Competition STATISTICAL Benchmarks and evaluate their accuracy
-
-library(forecast) #Requires v8.2
-
-#################################################################################
-#In this example let us produce forecasts for 100 randomly generated timeseries
-fh <- 6 #The forecasting horizon examined
-frq <- 1 #The frequency of the data
-data_train = data_test <- NULL #Train and test sample
-for (i in 1:100){
-  data_all <- 2+ 0.15*(1:20) + rnorm(20) 
-  data_train[length(data_train)+1] <- list(ts(head(data_all,length(data_all)-fh),frequency = frq))
-  data_test[length(data_test)+1] <- list(tail(data_all,fh))
-}
-#################################################################################
-
-smape_cal <- function(outsample, forecasts){
-  #Used to estimate sMAPE
-  outsample <- as.numeric(outsample) ; forecasts<-as.numeric(forecasts)
-  smape <- (abs(outsample-forecasts)*200)/(abs(outsample)+abs(forecasts))
-  return(smape)
-}
-
-mase_cal <- function(insample, outsample, forecasts){
-  #Used to estimate MASE
-  frq <- frequency(insample)
-  forecastsNaiveSD <- rep(NA,frq)
-  for (j in (frq+1):length(insample)){
-    forecastsNaiveSD <- c(forecastsNaiveSD, insample[j-frq])
-  }
-  masep<-mean(abs(insample-forecastsNaiveSD),na.rm = TRUE)
-  
-  outsample <- as.numeric(outsample) ; forecasts <- as.numeric(forecasts)
-  mase <- (abs(outsample-forecasts))/masep
-  return(mase)
-}
-
-naive_seasonal <- function(input, fh){
-  #Used to estimate Seasonal Naive
-  frcy <- frequency(input)
-  frcst <- naive(input, h=fh)$mean 
-  if (frcy>1){ 
-    frcst <- head(rep(as.numeric(tail(input,frcy)), fh), fh) + frcst - frcst
-  }
-  return(frcst)
-}
-
-Theta.classic <- function(input, fh){
-  #Used to estimate Theta classic
-  
-  #Set parameters
-  wses <- wlrl<-0.5 ; theta <- 2
-  #Estimate theta line (0)
-  observations <- length(input)
-  xt <- c(1:observations)
-  xf <- c((observations+1):(observations+fh))
-  train <- data.frame(input=input, xt=xt)
-  test <- data.frame(xt = xf)
-  
-  estimate <- lm(input ~ poly(xt, 1, raw=TRUE))
-  thetaline0In <- as.numeric(predict(estimate))
-  thetaline0Out <- as.numeric(predict(estimate,test))
-  
-  #Estimate theta line (2)
-  thetalineT <- theta*input+(1-theta)*thetaline0In
-  sesmodel <- ses(thetalineT, h=fh)
-  thetaline2In <- sesmodel$fitted
-  thetaline2Out <- sesmodel$mean
-  
-  #Theta forecasts
-  forecastsIn <- (thetaline2In*wses)+(thetaline0In*wlrl)
-  forecastsOut <- (thetaline2Out*wses)+(thetaline0Out*wlrl)
-  
-  #Zero forecasts become positive
-  for (i in 1:length(forecastsOut)){
-    if (forecastsOut[i]<0){ forecastsOut[i]<-0 }
-  }
-  
-  output=list(fitted = forecastsIn, mean = forecastsOut,
-              fitted0 = thetaline0In, mean0 = thetaline0Out,
-              fitted2 = thetaline2In, mean2 = thetaline2Out)
-  
-  return(output)
-}
-
-SeasonalityTest <- function(input, ppy){
-  #Used to determine whether a time series is seasonal
-  tcrit <- 1.645
-  if (length(input)<3*ppy){
-    test_seasonal <- FALSE
-  }else{
-    xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1]
-    clim <- tcrit/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2)))
-    test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] )
-    
-    if (is.na(test_seasonal)==TRUE){ test_seasonal <- FALSE }
-  }
-  
-  return(test_seasonal)
-}
-
-Benchmarks <- function(input, fh){
-  #Used to estimate the statistical benchmarks of the M4 competition
-  
-  #Estimate seasonaly adjusted time series
-  ppy <- frequency(input) ; ST <- F
-  if (ppy>1){ ST <- SeasonalityTest(input,ppy) }
-  if (ST==T){
-    Dec <- decompose(input,type="multiplicative")
-    des_input <- input/Dec$seasonal
-    SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
-  }else{
-    des_input <- input ; SIout <- rep(1, fh)
-  }
-  
-  f1 <- naive(input, h=fh)$mean #Naive
-  f2 <- naive_seasonal(input, fh=fh) #Seasonal Naive
-  f3 <- naive(des_input, h=fh)$mean*SIout #Naive2
-  f4 <- ses(des_input, h=fh)$mean*SIout #Ses
-  f5 <- holt(des_input, h=fh, damped=F)$mean*SIout #Holt
-  f6 <- holt(des_input, h=fh, damped=T)$mean*SIout #Damped
-  f7 <- Theta.classic(input=des_input, fh=fh)$mean*SIout #Theta
-  f8 <- (f4+f5+f6)/3 #Comb
-  
-  return(list(f1,f2,f3,f4,f5,f6,f7,f8))
-}
-
-Names_benchmarks <- c("Naive", "sNaive", "Naive2", "SES", "Holt", "Damped", "Theta", "Com")
-Total_smape=Total_mase <- array(NA,dim = c(length(Names_benchmarks), fh, length(data_train)))
-#Methods, Horizon, time-series
-for (i in 1:length(data_train)){
-  
-  insample <- data_train[[i]]
-  outsample <- data_test[[i]]
-  forecasts <- Benchmarks(input=insample, fh=fh)
-  
-  #sMAPE
-  for (j in 1:length(Names_benchmarks)){
-    Total_smape[j,,i] <- smape_cal(outsample, forecasts[[j]]) #j the # of the benchmark
-  }
-  #MASE
-  for (j in 1:length(Names_benchmarks)){
-    Total_mase[j,,i] <- mase_cal(insample, outsample, forecasts[[j]]) #j the # of the benchmark
-  }
-  
-}
-
-print("########### sMAPE ###############")
-for (i in 1:length(Names_benchmarks)){
-  print(paste(Names_benchmarks[i], round(mean(Total_smape[i,,]), 3)))
-}
-print("########### MASE ################")
-for (i in 1:length(Names_benchmarks)){
-  print(paste(Names_benchmarks[i], round(mean(Total_mase[i,,]), 3)))
-}
-print("########### OWA ################")
-for (i in 1:length(Names_benchmarks)){
-  print(paste(Names_benchmarks[i],
-              round(((mean(Total_mase[i,,])/mean(Total_mase[3,,]))+(mean(Total_smape[i,,])/mean(Total_smape[3,,])))/2, 3)))
-}
-
-
diff --git a/ML_benchmarks.py b/ML_benchmarks.py
deleted file mode 100644
index 0bc61aa..0000000
--- a/ML_benchmarks.py
+++ /dev/null
@@ -1,341 +0,0 @@
-# This code can be used to reproduce the forecasts of M4 Competition NN benchmarks and evaluate their accuracy
-
-from numpy.random import seed
-seed(42)
-from tensorflow import set_random_seed
-set_random_seed(42)
-from sklearn.neural_network import MLPRegressor
-from keras.models import Sequential
-from keras.layers import Dense, SimpleRNN
-from keras.optimizers import rmsprop
-from keras import backend as ker
-from math import sqrt
-import numpy as np
-import tensorflow as tf
-import pandas as pd
-import gc
-
-
-def detrend(insample_data):
-    """
-    Calculates a & b parameters of LRL
-
-    :param insample_data:
-    :return:
-    """
-    x = np.arange(len(insample_data))
-    a, b = np.polyfit(x, insample_data, 1)
-    return a, b
-
-
-def deseasonalize(original_ts, ppy):
-    """
-    Calculates and returns seasonal indices
-
-    :param original_ts: original data
-    :param ppy: periods per year
-    :return:
-    """
-    """
-    # === get in-sample data
-    original_ts = original_ts[:-out_of_sample]
-    """
-    if seasonality_test(original_ts, ppy):
-        # print("seasonal")
-        # ==== get moving averages
-        ma_ts = moving_averages(original_ts, ppy)
-
-        # ==== get seasonality indices
-        le_ts = original_ts * 100 / ma_ts
-        le_ts = np.hstack((le_ts, np.full((ppy - (len(le_ts) % ppy)), np.nan)))
-        le_ts = np.reshape(le_ts, (-1, ppy))
-        si = np.nanmean(le_ts, 0)
-        norm = np.sum(si) / (ppy * 100)
-        si = si / norm
-    else:
-        # print("NOT seasonal")
-        si = np.full(ppy, 100)
-
-    return si
-
-
-def moving_averages(ts_init, window):
-    """
-    Calculates the moving averages for a given TS
-
-    :param ts_init: the original time series
-    :param window: window length
-    :return: moving averages ts
-    """
-    if len(ts_init) % 2 == 0:
-        ts_ma = pd.rolling_mean(ts_init, window, center=True)
-        ts_ma = pd.rolling_mean(ts_ma, 2, center=True)
-        ts_ma = np.roll(ts_ma, -1)
-    else:
-        ts_ma = pd.rolling_mean(ts_init, window, center=True)
-
-    return ts_ma
-
-
-def seasonality_test(original_ts, ppy):
-    """
-    Seasonality test
-
-    :param original_ts: time series
-    :param ppy: periods per year
-    :return: boolean value: whether the TS is seasonal
-    """
-    s = acf(original_ts, 1)
-    for i in range(2, ppy):
-        s = s + (acf(original_ts, i) ** 2)
-
-    limit = 1.645 * (sqrt((1 + 2 * s) / len(original_ts)))
-
-    return (abs(acf(original_ts, ppy))) > limit
-
-
-def acf(data, k):
-    """
-    Autocorrelation function
-
-    :param data: time series
-    :param k: lag
-    :return:
-    """
-    m = np.mean(data)
-    s1 = 0
-    for i in range(k, len(data)):
-        s1 = s1 + ((data[i] - m) * (data[i - k] - m))
-
-    s2 = 0
-    for i in range(0, len(data)):
-        s2 = s2 + ((data[i] - m) ** 2)
-
-    return float(s1 / s2)
-
-
-def split_into_train_test(data, in_num, fh):
-    """
-    Splits the series into train and test sets. Each step takes multiple points as inputs
-
-    :param data: an individual TS
-    :param fh: number of out of sample points
-    :param in_num: number of input points for the forecast
-    :return:
-    """
-    train, test = data[:-fh], data[-(fh + in_num):]
-    x_train, y_train = train[:-1], np.roll(train, -in_num)[:-in_num]
-    x_test, y_test = train[-in_num:], np.roll(test, -in_num)[:-in_num]
-
-    # reshape input to be [samples, time steps, features] (N-NF samples, 1 time step, 1 feature)
-    x_train = np.reshape(x_train, (-1, 1))
-    x_test = np.reshape(x_test, (-1, 1))
-    temp_test = np.roll(x_test, -1)
-    temp_train = np.roll(x_train, -1)
-    for x in range(1, in_num):
-        x_train = np.concatenate((x_train[:-1], temp_train[:-1]), 1)
-        x_test = np.concatenate((x_test[:-1], temp_test[:-1]), 1)
-        temp_test = np.roll(temp_test, -1)[:-1]
-        temp_train = np.roll(temp_train, -1)[:-1]
-
-    return x_train, y_train, x_test, y_test
-
-
-def rnn_bench(x_train, y_train, x_test, fh, input_size):
-    """
-    Forecasts using 6 SimpleRNN nodes in the hidden layer and a Dense output layer
-
-    :param x_train: train data
-    :param y_train: target values for training
-    :param x_test: test data
-    :param fh: forecasting horizon
-    :param input_size: number of points used as input
-    :return:
-    """
-    # reshape to match expected input
-    x_train = np.reshape(x_train, (-1, input_size, 1))
-    x_test = np.reshape(x_test, (-1, input_size, 1))
-
-    # create the model
-    model = Sequential([
-        SimpleRNN(6, input_shape=(input_size, 1), activation='linear',
-                  use_bias=False, kernel_initializer='glorot_uniform',
-                  recurrent_initializer='orthogonal', bias_initializer='zeros',
-                  dropout=0.0, recurrent_dropout=0.0),
-        Dense(1, use_bias=True, activation='linear')
-    ])
-    opt = rmsprop(lr=0.001)
-    model.compile(loss='mean_squared_error', optimizer=opt)
-
-    # fit the model to the training data
-    model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=0)
-
-    # make predictions
-    y_hat_test = []
-    last_prediction = model.predict(x_test)[0]
-    for i in range(0, fh):
-        y_hat_test.append(last_prediction)
-        x_test[0] = np.roll(x_test[0], -1)
-        x_test[0, (len(x_test[0]) - 1)] = last_prediction
-        last_prediction = model.predict(x_test)[0]
-
-    return np.asarray(y_hat_test)
-
-
-def mlp_bench(x_train, y_train, x_test, fh):
-    """
-    Forecasts using a simple MLP which 6 nodes in the hidden layer
-
-    :param x_train: train input data
-    :param y_train: target values for training
-    :param x_test: test data
-    :param fh: forecasting horizon
-    :return:
-    """
-    y_hat_test = []
-
-    model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam',
-                         max_iter=100, learning_rate='adaptive', learning_rate_init=0.001,
-                         random_state=42)
-    model.fit(x_train, y_train)
-
-    last_prediction = model.predict(x_test)[0]
-    for i in range(0, fh):
-        y_hat_test.append(last_prediction)
-        x_test[0] = np.roll(x_test[0], -1)
-        x_test[0, (len(x_test[0]) - 1)] = last_prediction
-        last_prediction = model.predict(x_test)[0]
-
-    return np.asarray(y_hat_test)
-
-
-def smape(a, b):
-    """
-    Calculates sMAPE
-
-    :param a: actual values
-    :param b: predicted values
-    :return: sMAPE
-    """
-    a = np.reshape(a, (-1,))
-    b = np.reshape(b, (-1,))
-    return np.mean(2.0 * np.abs(a - b) / (np.abs(a) + np.abs(b))).item()
-
-
-def mase(insample, y_test, y_hat_test, freq):
-    """
-    Calculates MAsE
-
-    :param insample: insample data
-    :param y_test: out of sample target values
-    :param y_hat_test: predicted values
-    :param freq: data frequency
-    :return:
-    """
-    y_hat_naive = []
-    for i in range(freq, len(insample)):
-        y_hat_naive.append(insample[(i - freq)])
-
-    masep = np.mean(abs(insample[freq:] - y_hat_naive))
-
-    return np.mean(abs(y_test - y_hat_test)) / masep
-
-
-def main():
-    fh = 6         # forecasting horizon
-    freq = 1       # data frequency
-    in_size = 3    # number of points used as input for each forecast
-
-    err_MLP_sMAPE = []
-    err_MLP_MASE = []
-    err_RNN_sMAPE = []
-    err_RNN_MASE = []
-
-    # ===== In this example we produce forecasts for 100 randomly generated timeseries =====
-    data_all = np.array(np.random.random_integers(0, 100, (100, 20)), dtype=np.float32)
-    for i in range(0, 100):
-        for j in range(0, 20):
-            data_all[i, j] = j * 10 + data_all[i, j]
-
-    counter = 0
-    # ===== Main loop which goes through all timeseries =====
-    for j in range(len(data_all)):
-        ts = data_all[j, :]
-
-        # remove seasonality
-        seasonality_in = deseasonalize(ts, freq)
-
-        for i in range(0, len(ts)):
-            ts[i] = ts[i] * 100 / seasonality_in[i % freq]
-
-        # detrending
-        a, b = detrend(ts)
-
-        for i in range(0, len(ts)):
-            ts[i] = ts[i] - ((a * i) + b)
-
-        x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh)
-
-        # RNN benchmark - Produce forecasts
-        y_hat_test_RNN = np.reshape(rnn_bench(x_train, y_train, x_test, fh, in_size), (-1))
-
-        # MLP benchmark - Produce forecasts
-        y_hat_test_MLP = mlp_bench(x_train, y_train, x_test, fh)
-        for i in range(0, 29):
-            y_hat_test_MLP = np.vstack((y_hat_test_MLP, mlp_bench(x_train, y_train, x_test, fh)))
-        y_hat_test_MLP = np.median(y_hat_test_MLP, axis=0)
-
-        # add trend
-        for i in range(0, len(ts)):
-            ts[i] = ts[i] + ((a * i) + b)
-
-        for i in range(0, fh):
-            y_hat_test_MLP[i] = y_hat_test_MLP[i] + ((a * (len(ts) + i + 1)) + b)
-            y_hat_test_RNN[i] = y_hat_test_RNN[i] + ((a * (len(ts) + i + 1)) + b)
-
-        # add seasonality
-        for i in range(0, len(ts)):
-            ts[i] = ts[i] * seasonality_in[i % freq] / 100
-
-        for i in range(len(ts), len(ts) + fh):
-            y_hat_test_MLP[i - len(ts)] = y_hat_test_MLP[i - len(ts)] * seasonality_in[i % freq] / 100
-            y_hat_test_RNN[i - len(ts)] = y_hat_test_RNN[i - len(ts)] * seasonality_in[i % freq] / 100
-
-        # check if negative or extreme
-        for i in range(len(y_hat_test_MLP)):
-            if y_hat_test_MLP[i] < 0:
-                y_hat_test_MLP[i] = 0
-            if y_hat_test_RNN[i] < 0:
-                y_hat_test_RNN[i] = 0
-                
-            if y_hat_test_MLP[i] > (1000 * max(ts)):
-                y_hat_test_MLP[i] = max(ts)         
-            if y_hat_test_RNN[i] > (1000 * max(ts)):
-                y_hat_test_RNN[i] = max(ts)
-
-        x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh)
-
-        # Calculate errors
-        err_MLP_sMAPE.append(smape(y_test, y_hat_test_MLP))
-        err_RNN_sMAPE.append(smape(y_test, y_hat_test_RNN))
-        err_MLP_MASE.append(mase(ts[:-fh], y_test, y_hat_test_MLP, freq))
-        err_RNN_MASE.append(mase(ts[:-fh], y_test, y_hat_test_RNN, freq))
-
-        # memory handling
-        ker.clear_session()
-        tf.reset_default_graph()
-        gc.collect()
-
-        counter = counter + 1
-        print("-------------TS ID: ", counter, "-------------")
-
-    print("\n\n---------FINAL RESULTS---------")
-    print("=============sMAPE=============\n")
-    print("#### MLP ####\n", np.mean(err_MLP_sMAPE), "\n")
-    print("#### RNN ####\n", np.mean(err_RNN_sMAPE), "\n")
-    print("==============MASE=============")
-    print("#### MLP ####\n", np.mean(err_MLP_MASE), "\n")
-    print("#### RNN ####\n", np.mean(err_RNN_MASE), "\n")
-
-
-main()
diff --git a/README.md b/README.md
deleted file mode 100644
index 9b7ceb1..0000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# M4-methods
-Includes the source code to reproduce the forecasts of the methods which participated in the M4 Competition