rgio · jphilbin01 · May 17, 2018
diff --git a/cnn.py b/cnn.py
@@ -7,101 +7,133 @@
 import tensorflow as tf
 import pdb
 
-
 def variable_summaries(var):
+	"""Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
 	with tf.name_scope('summaries'):
 		mean = tf.reduce_mean(var)
 		tf.summary.scalar('mean', mean)
 		with tf.name_scope('stddev'):
 			stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
 		tf.summary.scalar('stddev', stddev)
 		tf.summary.scalar('max', tf.reduce_max(var))
+		tf.summary.scalar('min', tf.reduce_min(var))
 		tf.summary.histogram('histogram', var)
 
-
 def weight_variable(shape):
+	"""Create a weight variable with appropriate initialization."""
 	initial = tf.truncated_normal(shape, stddev=0.1)
 	return tf.Variable(initial)
 
 def bias_variable(shape):
+	"""Create a bias variable with appropriate initialization."""
 	initial = tf.constant(0.1, shape=shape)
 	return tf.Variable(initial)
 
+def weights_and_biases(W_shape):
+	""" """
+	with tf.name_scope('weights'):
+		W = weight_variable(W_shape)
+		variable_summaries(W)
+	with tf.name_scope('biases'):
+		b = weight_variable(W_shape[-1:])
+		variable_summaries(b)
+	return W, b
+
+# TODO: have no activation function possible
+def fc_layer(input_tensor, W_fc_shape, layer_name, act=tf.nn.relu):
+	""" """
+	with tf.name_scope(layer_name):
+		W_fc, b_fc = weights_and_biases(W_fc_shape)
+		fc_layer = act(tf.matmul(input_tensor, W_fc) + b_fc)
+	return fc_layer
+
+def normed_fc_layer(input_tensor, W_fc_shape, layer_name, act=tf.nn.relu):
+	""" """
+	return tf.contrib.layers.layer_norm(fc_layer(input_tensor, W_fc_shape, layer_name, act))
+
 def conv2d(x, W):
 	return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID')
 
 def separable_conv2d(x, W, P):
 	return tf.nn.separable_conv2d(x, W, P, strides=[1, 1, 1, 1], padding='VALID')
 
+# TODO: have no activation function possible
+def conv_layer(input_tensor, W_conv_shape, layer_name, sep=False, act=tf.nn.relu):
+	"""" """
+	with tf.name_scope(layer_name):
+		with tf.name_scope('convs'):
+			W_conv, b_conv = weights_and_biases(W_conv_shape)
+			if sep:
+				with tf.name_scope('sep_convs'):
+					with tf.name_scope('weights'):
+						P_conv = weight_variable([1, 1, W_conv_shape[3]*W_conv_shape[2], W_conv_shape[3]])
+						variable_summaries(P_conv)
+					h_conv = act(separable_conv2d(input_tensor, W_conv, P_conv) + b_conv)
+					tf.summary.histogram('separable_conv_results', h_conv)
+			else:
+				with tf.name_scope('standard_convs'):
+					h_conv = act(conv2d(input_tensor, W_conv) + b_conv)
+					tf.summary.histogram('conv_results', h_conv)
+	return h_conv
+
+def normed_conv_layer(input_tensor, W_conv_shape, layer_name, sep=False, act=tf.nn.relu):
+	""" """
+	return tf.contrib.layers.layer_norm(conv_layer(input_tensor, W_conv_shape, layer_name, sep, act))
+
 def cnn_model(x, init_weights, hparams, params):
 	"""Low level model for a CNN."""
 
 	# Reshape the input to use as our first feature layer
-	with tf.name_scope('input_tensor'):
+	with tf.name_scope('price_window'):
 		input_price = tf.reshape(x, [-1, params.num_coins, hparams.window_size, params.num_input_channels])
 
 	# First convolution layer
+	W_conv_shape = [1, hparams.len_conv1_filters, params.num_input_channels, hparams.num_conv1_features]
 	with tf.name_scope('conv1'):
-		W_conv1 = weight_variable([1, hparams.len_conv1_filters, params.num_input_channels, hparams.num_conv1_features])
-		b_conv1 = bias_variable([hparams.num_conv1_features])
-		if hparams.conv_layers_separable:
-			P_conv1 = weight_variable([1, 1, hparams.num_conv1_features * params.num_input_channels, hparams.num_conv1_features])
-			h_conv1 = tf.nn.relu(separable_conv2d(input_price, W_conv1, P_conv1) + b_conv1)
-		else: # use standard convolution layer
-			h_conv1 = tf.nn.relu(conv2d(input_price, W_conv1) + b_conv1)
-
+		conv_layer_1 = normed_conv_layer(input_price, W_conv_shape,'conv1', sep=hparams.conv_layers_separable)
+
 	# Second convolution layer
+	W_conv_shape = [1, hparams.window_size-hparams.len_conv1_filters+1, hparams.num_conv1_features, hparams.num_conv2_features]
 	with tf.name_scope('conv2'):
-		W_conv2 = weight_variable([1, hparams.window_size-hparams.len_conv1_filters+1, hparams.num_conv1_features, hparams.num_conv2_features])
-		b_conv2 = bias_variable([hparams.num_conv2_features])
-		if hparams.conv_layers_separable:
-			P_conv2 = weight_variable([1, 1, hparams.num_conv2_features*hparams.num_conv1_features, hparams.num_conv2_features])
-			h_conv2 = tf.nn.relu(separable_conv2d(h_conv1, W_conv2, P_conv2) + b_conv2)
-		else: # use standard convolution layer
-			h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
+		conv_layer_2 = normed_conv_layer(conv_layer_1, W_conv_shape,'conv2', sep=hparams.conv_layers_separable)
 
 	# Add in previous weights as a feature
 	with tf.name_scope('previous_weights'):
 		past_weights = tf.reshape(init_weights[:,1:], [-1, params.num_coins, 1, 1])
-		h_conv2_weights = tf.concat([h_conv2, past_weights], axis=3)
+		h_conv2_weights = tf.concat([conv_layer_2, past_weights], axis=3)
 
 	# Dropout on 2nd convolution layer during training
 	with tf.name_scope('dropout'):
 		keep_prob = tf.placeholder(tf.float32)
+		tf.summary.scalar('dropout_keep_prob', keep_prob)
 		h_conv2_weights_dropout = tf.nn.dropout(h_conv2_weights, keep_prob)
 
 	# Three possible endings for this cnn model: third_conv_layer, one_fc_layer, two_fc_layers
 	if hparams.model_ending == 'third_conv_layer': 
 		# Third and final convolution layer
-		with tf.name_scope('conv3'):
-			W_conv3 = weight_variable([1, params.len_conv3_filters, hparams.num_conv2_features+1, 1])
-			b_conv3 = bias_variable([1])
-			if hparams.conv_layers_separable:
-				P_conv3 = weight_variable([1, 1, hparams.num_conv2_features+1, 1])
-				h_conv3 = tf.nn.relu(separable_conv2d(h_conv2_weights_dropout, W_conv3, P_conv3) + b_conv3)
-			else:
-				h_conv3 = tf.nn.relu(conv2d(h_conv2_weights_dropout, W_conv3) + b_conv3)
-			final_layer = tf.reshape(h_conv3, [-1, params.num_coins])
+		W_conv_shape = [1, params.len_conv3_filters, hparams.num_conv2_features+1, 1]
+		conv_layer_3 = normed_conv_layer(h_conv2_weights_dropout, W_conv_shape, 'conv3', sep=hparams.conv_layers_separable)
+		final_layer = tf.reshape(conv_layer_3, [-1, params.num_coins])
+
 	else:
 		# Flatten the 2nd convolution layer prior to the fully connected layers
 		h_conv2_weights_dropout_flat = tf.reshape(h_conv2_weights_dropout, [-1, params.num_coins*(hparams.num_conv2_features+1)])
+
 		if hparams.model_ending == 'one_fc_layer':
 			# First and only fully connected layer
+			W_fc_shape = [params.num_coins*(hparams.num_conv2_features+1), params.num_coins]
 			with tf.name_scope('fc1'):
-				W_fc1 = weight_variable([params.num_coins*(hparams.num_conv2_features+1), params.num_coins])
-				b_fc1 = weight_variable([params.num_coins])
-				final_layer = tf.nn.relu(tf.matmul(h_conv2_weights_dropout_flat, W_fc1) + b_fc1)		 
+				final_layer = normed_fc_layer(h_conv2_weights_dropout_flat, W_fc_shape, 'fc1')		 
+
 		elif hparams.model_ending == 'two_fc_layers': 
 			# First fully connected layer
+			W_fc_shape = [params.num_coins*(hparams.num_conv2_features+1), hparams.num_fc1_neurons]
 			with tf.name_scope('fc1'):
-				W_fc1 = weight_variable([params.num_coins*(hparams.num_conv2_features+1), hparams.num_fc1_neurons])
-				b_fc1 = weight_variable([hparams.num_fc1_neurons])
-				h_fc1 = tf.nn.relu(tf.matmul(h_conv2_weights_dropout_flat, W_fc1) + b_fc1)
-			# Second and last fully connected layer 
+				fc1_layer = normed_fc_layer(h_conv2_weights_dropout_flat, W_fc_shape, 'fc1')
+			# Second and last fully connected layer
+			W_fc_shape = [hparams.num_fc1_neurons, params.num_coins]
 			with tf.name_scope('fc2'):
-				W_fc2 = weight_variable([hparams.num_fc1_neurons, params.num_coins])
-				b_fc2 = bias_variable([params.num_coins])
-				final_layer = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
+				final_layer = normed_fc_layer(fc1_layer, W_fc_shape, 'fc2')
 
 	# Add in a bias for cash to the final layer before taking softmax to get portfolio weights
 	with tf.name_scope('cash_bias'): 
@@ -111,7 +143,7 @@ def cnn_model(x, init_weights, hparams, params):
 
 	# Final portfolio weight tensor
 	with tf.name_scope('weights'):
-		weights = tf.nn.softmax(final_layer_cash, name="output_tensor")
+		weights = tf.nn.softmax(final_layer_cash, name='output_tensor')
 
 	return weights, keep_prob
 

diff --git a/crypto_bot.py b/crypto_bot.py
@@ -97,10 +97,10 @@ def __init__(self, hparams, params, test=False, tuning=False):
         opt_test_portfolio, opt_test_port_return = pdata.calc_optimal_portfolio(test_labels, test_path)
 
         # Create the model
-        input_prices = tf.placeholder(tf.float32, [None, params.num_coins, hparams.window_size, params.num_input_channels])
-        labels = tf.placeholder(tf.float32, [None, params.num_coins+1])
-        init_weights = tf.placeholder(tf.float32, [None, params.num_coins+1])
-        batch_size = tf.placeholder(tf.int32)
+        input_prices = tf.placeholder(tf.float32, [None, params.num_coins, hparams.window_size, params.num_input_channels], name='nn_input')
+        labels = tf.placeholder(tf.float32, [None, params.num_coins+1], name='labels')
+        init_weights = tf.placeholder(tf.float32, [None, params.num_coins+1], name='initial_weights')
+        batch_size = tf.placeholder(tf.int32, name='batch_size')
 
         # Build the graph
         weights, keep_prob = cnn.cnn_model(input_prices, init_weights, hparams, params)

diff --git a/hparams.py b/hparams.py
@@ -34,22 +34,22 @@ def set_hparams(test=False) -> HParams:
 		window_size=50,
 		stride=1,
 		batch_size=100,
-		num_training_steps=200000,
+		num_training_steps=50000,
 		learning_rate=2e-4,
 		geometric_decay=0.5,  # the large geometric_decay is the more recent times will be selected in training
 		conv_layers_separable=True,
 		len_conv1_filters=3,
-		num_conv1_features=8,
+		num_conv1_features=4,
 		# TODO: possibly have len_conv2_filters not just be the entire length as it currently is
 		# len_conv2_filters = xxx
-		num_conv2_features=32,
+		num_conv2_features=16,
 		num_fc1_neurons=12,   # only for option two_fc_layers; it is set to num_coins for one_fc_layer in cnn.py
 		model_ending='one_fc_layer',   # options: two_fc_layers, one_fc_layer, third_conv_layer
 		dropout_keep_prob=0.5,
 	)
 	if test:
 		hparams.set_hparam('batch_size', 30)
-		hparams.set_hparam('num_training_steps', 4)
+		hparams.set_hparam('num_training_steps', 10)
 	return hparams
 
 
@@ -84,8 +84,8 @@ def init_search_space_dict(test=False) -> dict:
 		dim_dropout_keep_prob=space.Real(low=.1, high=.9, name='dropout_keep_prob'),
 	)
 	if test:
-		search_space_dict.update({'dim_batch_size': space.Integer(low=10, high=30, name='batch_size'),
-								  'dim_num_training_steps': space.Integer(low=2, high=4, name='num_training_steps'),
+		search_space_dict.update({'dim_batch_size': space.Integer(low=20, high=60, name='batch_size'),
+								  'dim_num_training_steps': space.Integer(low=10, high=20, name='num_training_steps'),
 								  'dim_window_size': space.Integer(low=10, high=50, name='window_size'),
 								  'dim_stride': space.Integer(low=1, high=2, name='stride')})
 	return search_space_dict