diff --git a/cnn.py b/cnn.py index 3cf9fce..c2c5b98 100644 --- a/cnn.py +++ b/cnn.py @@ -7,8 +7,8 @@ import tensorflow as tf import pdb - def variable_summaries(var): + """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) @@ -16,92 +16,124 @@ def variable_summaries(var): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) + tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) - def weight_variable(shape): + """Create a weight variable with appropriate initialization.""" initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): + """Create a bias variable with appropriate initialization.""" initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) +def weights_and_biases(W_shape): + """ """ + with tf.name_scope('weights'): + W = weight_variable(W_shape) + variable_summaries(W) + with tf.name_scope('biases'): + b = weight_variable(W_shape[-1:]) + variable_summaries(b) + return W, b + +# TODO: have no activation function possible +def fc_layer(input_tensor, W_fc_shape, layer_name, act=tf.nn.relu): + """ """ + with tf.name_scope(layer_name): + W_fc, b_fc = weights_and_biases(W_fc_shape) + fc_layer = act(tf.matmul(input_tensor, W_fc) + b_fc) + return fc_layer + +def normed_fc_layer(input_tensor, W_fc_shape, layer_name, act=tf.nn.relu): + """ """ + return tf.contrib.layers.layer_norm(fc_layer(input_tensor, W_fc_shape, layer_name, act)) + def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID') def separable_conv2d(x, W, P): return tf.nn.separable_conv2d(x, W, P, strides=[1, 1, 1, 1], padding='VALID') +# TODO: have no activation function possible +def conv_layer(input_tensor, W_conv_shape, layer_name, sep=False, act=tf.nn.relu): + """" """ + with tf.name_scope(layer_name): + with tf.name_scope('convs'): + W_conv, b_conv = weights_and_biases(W_conv_shape) + if sep: + with tf.name_scope('sep_convs'): + with tf.name_scope('weights'): + P_conv = weight_variable([1, 1, W_conv_shape[3]*W_conv_shape[2], W_conv_shape[3]]) + variable_summaries(P_conv) + h_conv = act(separable_conv2d(input_tensor, W_conv, P_conv) + b_conv) + tf.summary.histogram('separable_conv_results', h_conv) + else: + with tf.name_scope('standard_convs'): + h_conv = act(conv2d(input_tensor, W_conv) + b_conv) + tf.summary.histogram('conv_results', h_conv) + return h_conv + +def normed_conv_layer(input_tensor, W_conv_shape, layer_name, sep=False, act=tf.nn.relu): + """ """ + return tf.contrib.layers.layer_norm(conv_layer(input_tensor, W_conv_shape, layer_name, sep, act)) + def cnn_model(x, init_weights, hparams, params): """Low level model for a CNN.""" # Reshape the input to use as our first feature layer - with tf.name_scope('input_tensor'): + with tf.name_scope('price_window'): input_price = tf.reshape(x, [-1, params.num_coins, hparams.window_size, params.num_input_channels]) # First convolution layer + W_conv_shape = [1, hparams.len_conv1_filters, params.num_input_channels, hparams.num_conv1_features] with tf.name_scope('conv1'): - W_conv1 = weight_variable([1, hparams.len_conv1_filters, params.num_input_channels, hparams.num_conv1_features]) - b_conv1 = bias_variable([hparams.num_conv1_features]) - if hparams.conv_layers_separable: - P_conv1 = weight_variable([1, 1, hparams.num_conv1_features * params.num_input_channels, hparams.num_conv1_features]) - h_conv1 = tf.nn.relu(separable_conv2d(input_price, W_conv1, P_conv1) + b_conv1) - else: # use standard convolution layer - h_conv1 = tf.nn.relu(conv2d(input_price, W_conv1) + b_conv1) - + conv_layer_1 = normed_conv_layer(input_price, W_conv_shape,'conv1', sep=hparams.conv_layers_separable) + # Second convolution layer + W_conv_shape = [1, hparams.window_size-hparams.len_conv1_filters+1, hparams.num_conv1_features, hparams.num_conv2_features] with tf.name_scope('conv2'): - W_conv2 = weight_variable([1, hparams.window_size-hparams.len_conv1_filters+1, hparams.num_conv1_features, hparams.num_conv2_features]) - b_conv2 = bias_variable([hparams.num_conv2_features]) - if hparams.conv_layers_separable: - P_conv2 = weight_variable([1, 1, hparams.num_conv2_features*hparams.num_conv1_features, hparams.num_conv2_features]) - h_conv2 = tf.nn.relu(separable_conv2d(h_conv1, W_conv2, P_conv2) + b_conv2) - else: # use standard convolution layer - h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2) + conv_layer_2 = normed_conv_layer(conv_layer_1, W_conv_shape,'conv2', sep=hparams.conv_layers_separable) # Add in previous weights as a feature with tf.name_scope('previous_weights'): past_weights = tf.reshape(init_weights[:,1:], [-1, params.num_coins, 1, 1]) - h_conv2_weights = tf.concat([h_conv2, past_weights], axis=3) + h_conv2_weights = tf.concat([conv_layer_2, past_weights], axis=3) # Dropout on 2nd convolution layer during training with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) + tf.summary.scalar('dropout_keep_prob', keep_prob) h_conv2_weights_dropout = tf.nn.dropout(h_conv2_weights, keep_prob) # Three possible endings for this cnn model: third_conv_layer, one_fc_layer, two_fc_layers if hparams.model_ending == 'third_conv_layer': # Third and final convolution layer - with tf.name_scope('conv3'): - W_conv3 = weight_variable([1, params.len_conv3_filters, hparams.num_conv2_features+1, 1]) - b_conv3 = bias_variable([1]) - if hparams.conv_layers_separable: - P_conv3 = weight_variable([1, 1, hparams.num_conv2_features+1, 1]) - h_conv3 = tf.nn.relu(separable_conv2d(h_conv2_weights_dropout, W_conv3, P_conv3) + b_conv3) - else: - h_conv3 = tf.nn.relu(conv2d(h_conv2_weights_dropout, W_conv3) + b_conv3) - final_layer = tf.reshape(h_conv3, [-1, params.num_coins]) + W_conv_shape = [1, params.len_conv3_filters, hparams.num_conv2_features+1, 1] + conv_layer_3 = normed_conv_layer(h_conv2_weights_dropout, W_conv_shape, 'conv3', sep=hparams.conv_layers_separable) + final_layer = tf.reshape(conv_layer_3, [-1, params.num_coins]) + else: # Flatten the 2nd convolution layer prior to the fully connected layers h_conv2_weights_dropout_flat = tf.reshape(h_conv2_weights_dropout, [-1, params.num_coins*(hparams.num_conv2_features+1)]) + if hparams.model_ending == 'one_fc_layer': # First and only fully connected layer + W_fc_shape = [params.num_coins*(hparams.num_conv2_features+1), params.num_coins] with tf.name_scope('fc1'): - W_fc1 = weight_variable([params.num_coins*(hparams.num_conv2_features+1), params.num_coins]) - b_fc1 = weight_variable([params.num_coins]) - final_layer = tf.nn.relu(tf.matmul(h_conv2_weights_dropout_flat, W_fc1) + b_fc1) + final_layer = normed_fc_layer(h_conv2_weights_dropout_flat, W_fc_shape, 'fc1') + elif hparams.model_ending == 'two_fc_layers': # First fully connected layer + W_fc_shape = [params.num_coins*(hparams.num_conv2_features+1), hparams.num_fc1_neurons] with tf.name_scope('fc1'): - W_fc1 = weight_variable([params.num_coins*(hparams.num_conv2_features+1), hparams.num_fc1_neurons]) - b_fc1 = weight_variable([hparams.num_fc1_neurons]) - h_fc1 = tf.nn.relu(tf.matmul(h_conv2_weights_dropout_flat, W_fc1) + b_fc1) - # Second and last fully connected layer + fc1_layer = normed_fc_layer(h_conv2_weights_dropout_flat, W_fc_shape, 'fc1') + # Second and last fully connected layer + W_fc_shape = [hparams.num_fc1_neurons, params.num_coins] with tf.name_scope('fc2'): - W_fc2 = weight_variable([hparams.num_fc1_neurons, params.num_coins]) - b_fc2 = bias_variable([params.num_coins]) - final_layer = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) + final_layer = normed_fc_layer(fc1_layer, W_fc_shape, 'fc2') # Add in a bias for cash to the final layer before taking softmax to get portfolio weights with tf.name_scope('cash_bias'): @@ -111,7 +143,7 @@ def cnn_model(x, init_weights, hparams, params): # Final portfolio weight tensor with tf.name_scope('weights'): - weights = tf.nn.softmax(final_layer_cash, name="output_tensor") + weights = tf.nn.softmax(final_layer_cash, name='output_tensor') return weights, keep_prob diff --git a/crypto_bot.py b/crypto_bot.py index 4983ad5..2eb24a2 100644 --- a/crypto_bot.py +++ b/crypto_bot.py @@ -97,10 +97,10 @@ def __init__(self, hparams, params, test=False, tuning=False): opt_test_portfolio, opt_test_port_return = pdata.calc_optimal_portfolio(test_labels, test_path) # Create the model - input_prices = tf.placeholder(tf.float32, [None, params.num_coins, hparams.window_size, params.num_input_channels]) - labels = tf.placeholder(tf.float32, [None, params.num_coins+1]) - init_weights = tf.placeholder(tf.float32, [None, params.num_coins+1]) - batch_size = tf.placeholder(tf.int32) + input_prices = tf.placeholder(tf.float32, [None, params.num_coins, hparams.window_size, params.num_input_channels], name='nn_input') + labels = tf.placeholder(tf.float32, [None, params.num_coins+1], name='labels') + init_weights = tf.placeholder(tf.float32, [None, params.num_coins+1], name='initial_weights') + batch_size = tf.placeholder(tf.int32, name='batch_size') # Build the graph weights, keep_prob = cnn.cnn_model(input_prices, init_weights, hparams, params) diff --git a/hparams.py b/hparams.py index 093aa20..a9ecc2e 100644 --- a/hparams.py +++ b/hparams.py @@ -34,22 +34,22 @@ def set_hparams(test=False) -> HParams: window_size=50, stride=1, batch_size=100, - num_training_steps=200000, + num_training_steps=50000, learning_rate=2e-4, geometric_decay=0.5, # the large geometric_decay is the more recent times will be selected in training conv_layers_separable=True, len_conv1_filters=3, - num_conv1_features=8, + num_conv1_features=4, # TODO: possibly have len_conv2_filters not just be the entire length as it currently is # len_conv2_filters = xxx - num_conv2_features=32, + num_conv2_features=16, num_fc1_neurons=12, # only for option two_fc_layers; it is set to num_coins for one_fc_layer in cnn.py model_ending='one_fc_layer', # options: two_fc_layers, one_fc_layer, third_conv_layer dropout_keep_prob=0.5, ) if test: hparams.set_hparam('batch_size', 30) - hparams.set_hparam('num_training_steps', 4) + hparams.set_hparam('num_training_steps', 10) return hparams @@ -84,8 +84,8 @@ def init_search_space_dict(test=False) -> dict: dim_dropout_keep_prob=space.Real(low=.1, high=.9, name='dropout_keep_prob'), ) if test: - search_space_dict.update({'dim_batch_size': space.Integer(low=10, high=30, name='batch_size'), - 'dim_num_training_steps': space.Integer(low=2, high=4, name='num_training_steps'), + search_space_dict.update({'dim_batch_size': space.Integer(low=20, high=60, name='batch_size'), + 'dim_num_training_steps': space.Integer(low=10, high=20, name='num_training_steps'), 'dim_window_size': space.Integer(low=10, high=50, name='window_size'), 'dim_stride': space.Integer(low=1, high=2, name='stride')}) return search_space_dict