From 904b7721ac5341e50645aab3e7745a04328245d0 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 10:53:53 +0200 Subject: [PATCH 01/11] Added verbose load_weights_from that also matches weights by shape, so that weights can be loaded even if architecture is not completely identical. --- nolearn/lasagne/base.py | 74 +++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/nolearn/lasagne/base.py b/nolearn/lasagne/base.py index fd54fe1..04a8bdb 100644 --- a/nolearn/lasagne/base.py +++ b/nolearn/lasagne/base.py @@ -2,6 +2,7 @@ from .._compat import pickle from collections import OrderedDict +from difflib import SequenceMatcher import functools import itertools import operator @@ -395,28 +396,6 @@ def get_all_params(self): params = sum([l.get_params() for l in layers], []) return unique(params) - def load_weights_from(self, source): - self.initialize() - - if isinstance(source, str): - source = np.load(source) - - if isinstance(source, NeuralNet): - source = source.get_all_params() - - source_weights = [ - w.get_value() if hasattr(w, 'get_value') else w for w in source] - - for w1, w2 in zip(source_weights, self.get_all_params()): - if w1.shape != w2.get_value().shape: - continue - w2.set_value(w1) - - def save_weights_to(self, fname): - weights = [w.get_value() for w in self.get_all_params()] - with open(fname, 'wb') as f: - pickle.dump(weights, f, -1) - def __getstate__(self): state = dict(self.__dict__) for attr in ( @@ -456,3 +435,54 @@ def _get_param_names(self): # This allows us to have **kwargs in __init__ (woot!): param_names = super(NeuralNet, self)._get_param_names() return param_names + self._kwarg_keys + + def save_weights_to(self, fname): + weights = [w.get_value() for w in self.get_all_params()] + with open(fname, 'wb') as f: + pickle.dump(weights, f, -1) + + @staticmethod + def _param_alignment(shapes0, shapes1): + shapes0 = list(map(str, shapes0)) + shapes1 = list(map(str, shapes1)) + matcher = SequenceMatcher(a=shapes0, b=shapes1) + matches = [] + for block in matcher.get_matching_blocks(): + if block.size == 0: + continue + matches.append((list(range(block.a, block.a + block.size)), + list(range(block.b, block.b + block.size)))) + result = [line for match in matches for line in zip(*match)] + return result + + def load_weights_from(self, src): + if not hasattr(self, '_initialized'): + raise AttributeError( + "Please initialize the net before loading weights.") + + if isinstance(src, str): + src = np.load(src) + if isinstance(src, NeuralNet): + src = src.get_all_params() + + target = self.get_all_params() + src_params = [p.get_value() if hasattr(p, 'get_value') else p + for p in src] + target_params = [p.get_value() for p in target] + + src_shapes = [p.shape for p in src_params] + target_shapes = [p.shape for p in target_params] + matches = self._param_alignment(src_shapes, target_shapes) + + for i, j in matches: + # ii, jj are the indices of the layers, assuming 2 + # parameters per layer + ii, jj = int(0.5 * i) + 1, int(0.5 * j) + 1 + target[j].set_value(src_params[i]) + + if not self.verbose: + continue + target_layer_name = list(self.layers_)[jj] + param_shape = 'x'.join(map(str, src_params[i].shape)) + print("* Loaded parameter from layer {} to layer {} ({}) " + "(shape: {})".format(ii, jj, target_layer_name, param_shape)) From 0f7e5022095e2c6e0c1afad281e9178a7aaa6f71 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 11:06:17 +0200 Subject: [PATCH 02/11] Nets have to be initialized manually before loading weights. --- nolearn/tests/test_lasagne.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nolearn/tests/test_lasagne.py b/nolearn/tests/test_lasagne.py index ca1283b..8e954de 100644 --- a/nolearn/tests/test_lasagne.py +++ b/nolearn/tests/test_lasagne.py @@ -115,6 +115,7 @@ def on_epoch_finished(nn, train_history): # Use load_weights_from to initialize an untrained model: nn3 = clone(nn_def) + nn3.initialize() nn3.load_weights_from(nn2) assert np.array_equal(nn3.predict(X_test), y_pred) From 4f60d9e30f8051614e6c176ad36275cc60709f06 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 13:02:27 +0200 Subject: [PATCH 03/11] More detailed architecture information is now printed for convolutional nets (see Xudong Cao); layer infos are saved in layer_infos_ attribute for potential later use. New dependency: tabulate. --- nolearn/lasagne/base.py | 221 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 209 insertions(+), 12 deletions(-) diff --git a/nolearn/lasagne/base.py b/nolearn/lasagne/base.py index 04a8bdb..793021c 100644 --- a/nolearn/lasagne/base.py +++ b/nolearn/lasagne/base.py @@ -3,12 +3,13 @@ from .._compat import pickle from collections import OrderedDict from difflib import SequenceMatcher -import functools import itertools -import operator +import operator as op from time import time import pdb +from lasagne.layers import Conv2DLayer +from lasagne.layers import MaxPool2DLayer from lasagne.objectives import categorical_crossentropy from lasagne.objectives import mse from lasagne.objectives import Objective @@ -21,8 +22,15 @@ from sklearn.metrics import accuracy_score from sklearn.metrics import mean_squared_error from sklearn.preprocessing import LabelEncoder +from tabulate import tabulate import theano from theano import tensor as T +try: + from lasagne.layers.cuda_convnet import Conv2DCCLayer + from lasagne.layers.cuda_convnet import MaxPool2DCCLayer +except ImportError: + Conv2DCCLayer = Conv2DLayer + MaxPool2DCCLayer = MaxPool2DLayer class _list(list): @@ -36,7 +44,10 @@ def __contains__(self, key): class ansi: BLUE = '\033[94m' + CYAN = '\033[36m' GREEN = '\033[32m' + MAGENTA = '\033[35m' + RED = '\033[31m' ENDC = '\033[0m' @@ -64,6 +75,146 @@ def transform(self, Xb, yb): return Xb, yb +def get_real_filter(layers, img_size): + """Get the real filter sizes of each layer involved in + convoluation. See Xudong Cao: + https://www.kaggle.com/c/datasciencebowl/forums/t/13166/happy-lantern-festival-report-and-code + + This does not yet take into consideration feature pooling, + padding, striding and similar gimmicks. + + """ + # imports here to prevent circular dependencies + real_filter = np.zeros((len(layers), 2)) + conv_mode = True + first_conv_layer = True + expon = np.ones((1, 2)) + + for i, layer in enumerate(layers[1:]): + j = i + 1 + if not conv_mode: + real_filter[j] = img_size + continue + + if isinstance(layer, Conv2DLayer): + if not first_conv_layer: + new_filter = np.array(layer.filter_size) * expon + real_filter[j] = new_filter + else: + new_filter = np.array(layer.filter_size) * expon + real_filter[j] = new_filter + first_conv_layer = False + elif (isinstance(layer, MaxPool2DLayer) or + isinstance(layer, MaxPool2DCCLayer)): + real_filter[j] = real_filter[i] + expon *= np.array(layer.ds) + else: + conv_mode = False + real_filter[j] = img_size + + real_filter[0] = img_size + return real_filter + + +def get_receptive_field(layers, img_size): + """Get the real filter sizes of each layer involved in + convoluation. See Xudong Cao: + https://www.kaggle.com/c/datasciencebowl/forums/t/13166/happy-lantern-festival-report-and-code + + This does not yet take into consideration feature pooling, + padding, striding and similar gimmicks. + + """ + receptive_field = np.zeros((len(layers), 2)) + conv_mode = True + first_conv_layer = True + expon = np.ones((1, 2)) + + for i, layer in enumerate(layers[1:]): + j = i + 1 + if not conv_mode: + receptive_field[j] = img_size + continue + + if isinstance(layer, Conv2DLayer): + if not first_conv_layer: + last_field = receptive_field[i] + new_field = (last_field + expon * + (np.array(layer.filter_size) - 1)) + receptive_field[j] = new_field + else: + receptive_field[j] = layer.filter_size + first_conv_layer = False + elif (isinstance(layer, MaxPool2DLayer) or + isinstance(layer, MaxPool2DCCLayer)): + receptive_field[j] = receptive_field[i] + expon *= np.array(layer.ds) + else: + conv_mode = False + receptive_field[j] = img_size + + receptive_field[0] = img_size + return receptive_field + + +def get_conv_infos(net, min_capacity=100. / 6, tablefmt='pipe', + detailed=False): + CYA = ansi.CYAN + END = ansi.ENDC + MAG = ansi.MAGENTA + RED = ansi.RED + + if not hasattr(net, '_initialized'): + raise AttributeError("Please initialize the net before callings this " + "function, for instance by calling " + "net.initialize()") + + layers = net.layers_.values() + img_size = net.layers_['input'].get_output_shape()[2:] + + header = ['name', 'size', 'total', 'cap. Y [%]', 'cap. X [%]', + 'cov. Y [%]', 'cov. X [%]'] + if detailed: + header += ['filter Y', 'filter X', 'field Y', 'field X'] + + shapes = [layer.get_output_shape()[1:] for layer in layers] + totals = [str(reduce(op.mul, shape)) for shape in shapes] + shapes = ['x'.join(map(str, shape)) for shape in shapes] + shapes = np.array(shapes).reshape(-1, 1) + totals = np.array(totals).reshape(-1, 1) + + real_filters = get_real_filter(layers, img_size) + receptive_fields = get_receptive_field(layers, img_size) + capacity = 100. * real_filters / receptive_fields + capacity[np.negative(np.isfinite(capacity))] = 1 + img_coverage = 100. * receptive_fields / img_size + layer_names = [layer.name if layer.name + else str(layer).rsplit('.')[-1].split(' ')[0] + for layer in layers] + + colored_names = [] + for name, (covy, covx), (capy, capx) in zip( + layer_names, img_coverage, capacity): + if ( + ((covy > 100) or (covx > 100)) and + ((capy < min_capacity) or (capx < min_capacity)) + ): + name = "{}{}{}".format(RED, name, END) + elif (covy > 100) or (covx > 100): + name = "{}{}{}".format(CYA, name, END) + elif (capy < min_capacity) or (capx < min_capacity): + name = "{}{}{}".format(MAG, name, END) + colored_names.append(name) + colored_names = np.array(colored_names).reshape(-1, 1) + + table = np.hstack((colored_names, shapes, totals, capacity, img_coverage)) + if detailed: + table = np.hstack((table, real_filters.astype(int), + receptive_fields.astype(int))) + + return tabulate(table, header, tablefmt=tablefmt, floatfmt='.2f') + + class NeuralNet(BaseEstimator): """A scikit-learn estimator based on Lasagne. """ @@ -154,8 +305,6 @@ def initialize(self): out = getattr(self, '_output_layer', None) if out is None: out = self._output_layer = self.initialize_layers() - if self.verbose: - self._print_layer_info(self.layers_.values()) iter_funcs = self._create_iter_funcs( self.layers_, self.objective, self.update, @@ -165,6 +314,9 @@ def initialize(self): self.train_iter_, self.eval_iter_, self.predict_iter_ = iter_funcs self._initialized = True + if self.verbose: + self._print_layer_info() + def _get_params_for(self, name): collected = {} prefix = '{}_'.format(name) @@ -412,14 +564,59 @@ def __setstate__(self, state): self.__dict__.update(state) self.initialize() - def _print_layer_info(self, layers): - for layer in layers: - output_shape = layer.get_output_shape() - print(" {:<18}\t{:<20}\tproduces {:>7} outputs".format( - layer.name, - str(output_shape), - str(functools.reduce(operator.mul, output_shape[1:])), - )) + def _print_layer_info(self): + shapes = [param.get_value().shape for param in + self.get_all_params() if param] + nparams = reduce(op.add, [reduce(op.mul, shape) for + shape in shapes]) + print("# Neural Network with {} learnable parameters" + "\n".format(nparams)) + print("## Layer information") + + layers = self.layers_.values() + has_conv2d = any([isinstance(layer, Conv2DLayer) or + isinstance(layer, Conv2DCCLayer) + for layer in layers]) + if has_conv2d: + self._print_layer_info_conv() + else: + self._print_layer_info_plain() + + def _print_layer_info_plain(self): + nums = range(len(self.layers)) + names = list(zip(*self.layers))[0] + output_shapes = ['x'.join(map(str, layer.get_output_shape()[1:])) + for layer in self.layers_.values()] + table = OrderedDict([ + ('#', nums), + ('name', names), + ('size', output_shapes), + ]) + self.layer_infos_ = tabulate(table, 'keys', tablefmt='pipe') + print(self.layer_infos_) + print("") + + def _print_layer_info_conv(self): + if self.verbose > 1: + detailed = True + tablefmt = 'simple' + else: + detailed = False + tablefmt = 'pipe' + + self.layer_infos_ = get_conv_infos(self, detailed=detailed, + tablefmt=tablefmt) + print(self.layer_infos_) + print("\nExplanation") + print(" X, Y: image dimensions") + print(" cap.: learning capacity") + print(" cov.: coverage of image") + print(" {}: capacity too low (<1/6)" + "".format("{}{}{}".format(ansi.MAGENTA, "magenta", ansi.ENDC))) + print(" {}: image coverage too high (>100%)" + "".format("{}{}{}".format(ansi.CYAN, "cyan", ansi.ENDC))) + print(" {}: capacity too low and coverage too high\n" + "".format("{}{}{}".format(ansi.RED, "red", ansi.ENDC))) def get_params(self, deep=True): params = super(NeuralNet, self).get_params(deep=deep) From edc4f322c5b04d477a25675188ed23cb9a34f504 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 13:05:20 +0200 Subject: [PATCH 04/11] Updated requirements: tabulate. --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 39e6185..f9b18b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ joblib==0.8.4 scikit-learn==0.15.2 Theano==0.7 +tabulate==0.7.5 git+https://github.com/benanne/Lasagne.git@cd5e396f87#egg=Lasagne From c48720e1d5d06f05be0f15970ad1dda6c940b37a Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 13:48:34 +0200 Subject: [PATCH 05/11] Training info now in a slightly different format, is logged using tabulate in the log_ attribute. --- nolearn/lasagne/base.py | 65 +++++++++++++++++------------------ nolearn/tests/test_lasagne.py | 10 +++--- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/nolearn/lasagne/base.py b/nolearn/lasagne/base.py index 793021c..9fbbeb3 100644 --- a/nolearn/lasagne/base.py +++ b/nolearn/lasagne/base.py @@ -434,15 +434,16 @@ def train_loop(self, X, y): on_training_finished = [on_training_finished] epoch = 0 - info = None - best_valid_loss = np.inf - best_train_loss = np.inf - - if self.verbose: - print(""" - Epoch | Train loss | Valid loss | Train / Val | Valid acc | Dur ---------|--------------|--------------|---------------|-------------|-------\ -""") + best_valid_loss = ( + min([row['valid loss'] for row in self.train_history_]) if + self.train_history_ else np.inf + ) + best_train_loss = ( + min([row['train loss'] for row in self.train_history_]) if + self.train_history_ else np.inf + ) + first_iteration = True + num_epochs_past = len(self.train_history_) while epoch < self.max_epochs: epoch += 1 @@ -470,32 +471,28 @@ def train_loop(self, X, y): best_train_loss = avg_train_loss if avg_valid_loss < best_valid_loss: best_valid_loss = avg_valid_loss - - if self.verbose: - best_train = best_train_loss == avg_train_loss - best_valid = best_valid_loss == avg_valid_loss - print(" {:>5} | {}{:>10.6f}{} | {}{:>10.6f}{} " - "| {:>11.6f} | {:>9} | {:>3.1f}s".format( - epoch, - ansi.BLUE if best_train else "", - avg_train_loss, - ansi.ENDC if best_train else "", - ansi.GREEN if best_valid else "", - avg_valid_loss, - ansi.ENDC if best_valid else "", - avg_train_loss / avg_valid_loss, - "{:.2f}%".format(avg_valid_accuracy * 100) - if not self.regression else "", - time() - t0, - )) - - info = dict( - epoch=epoch, - train_loss=avg_train_loss, - valid_loss=avg_valid_loss, - valid_accuracy=avg_valid_accuracy, - ) + best_train_loss == avg_train_loss + best_valid = best_valid_loss == avg_valid_loss + + info = OrderedDict([ + ('epoch', num_epochs_past + epoch), + ('train loss', avg_train_loss), + ('valid loss', avg_valid_loss), + ('valid best', avg_valid_loss if best_valid else None), + ('train/val', avg_train_loss / avg_valid_loss), + ('valid acc', avg_valid_accuracy), + ('dur', time() - t0), + ]) self.train_history_.append(info) + self.log_ = tabulate(self.train_history_, headers='keys', + tablefmt='pipe', floatfmt='.4f') + if self.verbose: + if first_iteration: + print(self.log_.split('\n', 2)[0]) + print(self.log_.split('\n', 2)[1]) + first_iteration = False + print(self.log_.rsplit('\n', 1)[-1]) + try: for func in on_epoch_finished: func(self, self.train_history_) diff --git a/nolearn/tests/test_lasagne.py b/nolearn/tests/test_lasagne.py index 8e954de..70fd982 100644 --- a/nolearn/tests/test_lasagne.py +++ b/nolearn/tests/test_lasagne.py @@ -97,11 +97,13 @@ def on_epoch_finished(nn, train_history): nn = clone(nn_def) nn.fit(X_train, y_train) assert len(epochs) == 2 - assert epochs[0]['valid_accuracy'] > 0.8 - assert epochs[1]['valid_accuracy'] > epochs[0]['valid_accuracy'] - assert sorted(epochs[0].keys()) == [ - 'epoch', 'train_loss', 'valid_accuracy', 'valid_loss', + assert epochs[0]['valid acc'] > 0.8 + assert epochs[1]['valid acc'] > epochs[0]['valid acc'] + expected_keys = [ + 'epoch', 'train loss', 'valid loss', 'valid best', + 'train/val', 'valid acc', 'dur', ] + assert set(epochs[0].keys()) == set(expected_keys) y_pred = nn.predict(X_test) assert accuracy_score(y_pred, y_test) > 0.85 From 92c21c720684d8230c194302105b353b41779c4d Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 14:16:56 +0200 Subject: [PATCH 06/11] Added possibility to add custom score to the train info (e.g. use parameter custom_score=('mean abs error', mean_abs_error)). --- nolearn/lasagne/base.py | 13 ++++++++++++- nolearn/tests/test_lasagne.py | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/nolearn/lasagne/base.py b/nolearn/lasagne/base.py index 9fbbeb3..e5ab85b 100644 --- a/nolearn/lasagne/base.py +++ b/nolearn/lasagne/base.py @@ -230,6 +230,7 @@ def __init__( regression=False, max_epochs=100, eval_size=0.2, + custom_score=None, X_tensor_type=None, y_tensor_type=None, use_label_encoder=False, @@ -266,6 +267,7 @@ def __init__( self.regression = regression self.max_epochs = max_epochs self.eval_size = eval_size + self.custom_score = custom_score self.X_tensor_type = X_tensor_type self.y_tensor_type = y_tensor_type self.use_label_encoder = use_label_encoder @@ -451,6 +453,7 @@ def train_loop(self, X, y): train_losses = [] valid_losses = [] valid_accuracies = [] + custom_score = [] t0 = time() @@ -462,10 +465,15 @@ def train_loop(self, X, y): batch_valid_loss, accuracy = self.eval_iter_(Xb, yb) valid_losses.append(batch_valid_loss) valid_accuracies.append(accuracy) + if self.custom_score: + y_prob = self.predict_iter_(Xb) + custom_score.append(self.custom_score[1](yb, y_prob)) avg_train_loss = np.mean(train_losses) avg_valid_loss = np.mean(valid_losses) avg_valid_accuracy = np.mean(valid_accuracies) + if custom_score: + avg_custom_score = np.mean(custom_score) if avg_train_loss < best_train_loss: best_train_loss = avg_train_loss @@ -481,8 +489,11 @@ def train_loop(self, X, y): ('valid best', avg_valid_loss if best_valid else None), ('train/val', avg_train_loss / avg_valid_loss), ('valid acc', avg_valid_accuracy), - ('dur', time() - t0), ]) + if self.custom_score: + info.update({self.custom_score[0]: avg_custom_score}) + info.update({'dur': time() - t0}) + self.train_history_.append(info) self.log_ = tabulate(self.train_history_, headers='keys', tablefmt='pipe', floatfmt='.4f') diff --git a/nolearn/tests/test_lasagne.py b/nolearn/tests/test_lasagne.py index 70fd982..54fd921 100644 --- a/nolearn/tests/test_lasagne.py +++ b/nolearn/tests/test_lasagne.py @@ -190,6 +190,7 @@ def test_clone(): on_training_finished=None, max_epochs=100, eval_size=0.1, + custom_score=None, verbose=0, ) nn = NeuralNet(**params) @@ -204,6 +205,7 @@ def test_clone(): 'output_nonlinearity', 'loss', 'objective' + 'custom_score', ): for par in (params, params1, params2): par.pop(ignore, None) From 3288f96e58ffec0d69b5007326428bd3233db73e Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 16:06:28 +0200 Subject: [PATCH 07/11] Unnecessary check. --- nolearn/lasagne/base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/nolearn/lasagne/base.py b/nolearn/lasagne/base.py index e5ab85b..f24e556 100644 --- a/nolearn/lasagne/base.py +++ b/nolearn/lasagne/base.py @@ -164,11 +164,6 @@ def get_conv_infos(net, min_capacity=100. / 6, tablefmt='pipe', MAG = ansi.MAGENTA RED = ansi.RED - if not hasattr(net, '_initialized'): - raise AttributeError("Please initialize the net before callings this " - "function, for instance by calling " - "net.initialize()") - layers = net.layers_.values() img_size = net.layers_['input'].get_output_shape()[2:] From af1084643fd70ed9aaeb357b1c146567ee10c988 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 21 Apr 2015 16:15:58 +0200 Subject: [PATCH 08/11] Increased test coverage for lasagne, mainly covering the verbose outputs. --- nolearn/tests/test_lasagne.py | 156 ++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/nolearn/tests/test_lasagne.py b/nolearn/tests/test_lasagne.py index 54fd921..b78c498 100644 --- a/nolearn/tests/test_lasagne.py +++ b/nolearn/tests/test_lasagne.py @@ -2,9 +2,11 @@ from mock import patch from mock import Mock +from lasagne.layers import Conv2DLayer from lasagne.layers import DenseLayer from lasagne.layers import DropoutLayer from lasagne.layers import InputLayer +from lasagne.layers import MaxPool2DLayer from lasagne.nonlinearities import identity from lasagne.nonlinearities import softmax from lasagne.objectives import categorical_crossentropy @@ -91,6 +93,7 @@ def on_epoch_finished(nn, train_history): update_momentum=0.9, max_epochs=5, + verbose=1, on_epoch_finished=on_epoch_finished, ) @@ -344,3 +347,156 @@ def test_diamond(self, NeuralNet): concat.assert_called_with([hidden1.return_value, hidden2.return_value], name='concat') output.assert_called_with(concat.return_value, name='output') + + +def test_verbose_nn(mnist): + # Just check that no exception is thrown + from nolearn.lasagne import NeuralNet + + X, y = mnist + X_train, y_train = X[:1000], y[:1000] + num_epochs = 7 + + nn = NeuralNet( + layers=[ + ('input', InputLayer), + ('hidden1', DenseLayer), + ('dropout1', DropoutLayer), + ('hidden2', DenseLayer), + ('dropout2', DropoutLayer), + ('output', DenseLayer), + ], + input_shape=(None, 784), + output_num_units=10, + output_nonlinearity=softmax, + + more_params=dict( + hidden1_num_units=512, + hidden2_num_units=512, + ), + + update=nesterov_momentum, + update_learning_rate=0.01, + update_momentum=0.9, + + max_epochs=num_epochs, + verbose=True, + ) + + nn.fit(X_train, y_train) + nn.predict_proba(X_train) + nn.predict(X_train) + nn.score(X_train, y_train) + + assert nn.layer_infos_.replace(' ', '').startswith(u'|#|name|size|') + assert nn.log_.replace(' ', '').startswith( + u'|epoch|trainloss|val''idloss|validbest|train/val|validacc|dur|') + assert nn.log_.count('\n') == num_epochs + 1 + + +def test_verbose_nn_with_custom_score(mnist): + # Just check that no exception is thrown + from nolearn.lasagne import NeuralNet + + def my_score(y_true, y_prob): + return 1.2345 + + X, y = mnist + X_train, y_train = X[:1000], y[:1000] + num_epochs = 4 + + nn = NeuralNet( + layers=[ + ('input', InputLayer), + ('hidden1', DenseLayer), + ('dropout1', DropoutLayer), + ('hidden2', DenseLayer), + ('dropout2', DropoutLayer), + ('output', DenseLayer), + ], + input_shape=(None, 784), + output_num_units=10, + output_nonlinearity=softmax, + + more_params=dict( + hidden1_num_units=512, + hidden2_num_units=512, + ), + + update=nesterov_momentum, + update_learning_rate=0.01, + update_momentum=0.9, + + custom_score=('score_name', my_score), + max_epochs=num_epochs, + verbose=True, + ) + + nn.fit(X_train, y_train) + nn.predict_proba(X_train) + nn.predict(X_train) + nn.score(X_train, y_train) + + assert nn.layer_infos_.replace(' ', '').startswith(u'|#|name|size|') + assert nn.log_.replace(' ', '').startswith( + u'|epoch|trainloss|val''idloss|validbest|train/val|validacc|' + 'score_name|dur|') + assert nn.log_.count('\n') == num_epochs + 1 + log_my_score = nn.log_.replace(' ', '').rsplit('\n')[-1].split('|')[-3] + assert log_my_score == '1.2345' + + +def test_verbose_cnn(mnist): + # Just check that no exception is thrown + from nolearn.lasagne import NeuralNet + + X, y = mnist + X_train, y_train = X[:100].reshape(-1, 1, 28, 28), y[:100] + X_train = X_train.reshape(-1, 1, 28, 28) + num_epochs = 3 + + nn = NeuralNet( + layers=[ + ('input', InputLayer), + ('conv1', Conv2DLayer), + ('conv2', Conv2DLayer), + ('pool2', MaxPool2DLayer), + ('conv3', Conv2DLayer), + ('conv4', Conv2DLayer), + ('pool4', MaxPool2DLayer), + ('hidden1', DenseLayer), + ('output', DenseLayer), + ], + input_shape=(None, 1, 28, 28), + output_num_units=10, + output_nonlinearity=softmax, + + more_params=dict( + conv1_filter_size=(5, 5), conv1_num_filters=16, + conv2_filter_size=(3, 3), conv2_num_filters=16, + pool2_ds=(3, 3), + conv3_filter_size=(3, 3), conv3_num_filters=16, + conv4_filter_size=(3, 3), conv4_num_filters=16, + pool4_ds=(2, 2), + hidden1_num_units=512, + ), + + update=nesterov_momentum, + update_learning_rate=0.01, + update_momentum=0.9, + + max_epochs=num_epochs, + verbose=2, + ) + + nn.fit(X_train, y_train) + nn.predict_proba(X_train) + nn.predict(X_train) + nn.score(X_train, y_train) + + assert nn.layer_infos_.replace(' ', '').startswith( + u'namesizetotalcap.Y[%]cap.X[%]cov.Y[%]cov.X[%]filterYfilterXfieldY' + 'fieldX') + assert nn.log_.replace(' ', '').startswith( + u'|epoch|trainloss|val''idloss|validbest|train/val|validacc|dur|') + assert nn.log_.count('\n') == num_epochs + 1 From f40eea276c6146c3dfd04cfce7700b493ef607a0 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Fri, 24 Apr 2015 10:43:41 +0200 Subject: [PATCH 09/11] Restore master branch as in origin nolearn. --- nolearn/lasagne/base.py | 364 +++++++--------------------------- nolearn/tests/test_lasagne.py | 169 +--------------- 2 files changed, 71 insertions(+), 462 deletions(-) diff --git a/nolearn/lasagne/base.py b/nolearn/lasagne/base.py index f24e556..fd54fe1 100644 --- a/nolearn/lasagne/base.py +++ b/nolearn/lasagne/base.py @@ -2,14 +2,12 @@ from .._compat import pickle from collections import OrderedDict -from difflib import SequenceMatcher +import functools import itertools -import operator as op +import operator from time import time import pdb -from lasagne.layers import Conv2DLayer -from lasagne.layers import MaxPool2DLayer from lasagne.objectives import categorical_crossentropy from lasagne.objectives import mse from lasagne.objectives import Objective @@ -22,15 +20,8 @@ from sklearn.metrics import accuracy_score from sklearn.metrics import mean_squared_error from sklearn.preprocessing import LabelEncoder -from tabulate import tabulate import theano from theano import tensor as T -try: - from lasagne.layers.cuda_convnet import Conv2DCCLayer - from lasagne.layers.cuda_convnet import MaxPool2DCCLayer -except ImportError: - Conv2DCCLayer = Conv2DLayer - MaxPool2DCCLayer = MaxPool2DLayer class _list(list): @@ -44,10 +35,7 @@ def __contains__(self, key): class ansi: BLUE = '\033[94m' - CYAN = '\033[36m' GREEN = '\033[32m' - MAGENTA = '\033[35m' - RED = '\033[31m' ENDC = '\033[0m' @@ -75,141 +63,6 @@ def transform(self, Xb, yb): return Xb, yb -def get_real_filter(layers, img_size): - """Get the real filter sizes of each layer involved in - convoluation. See Xudong Cao: - https://www.kaggle.com/c/datasciencebowl/forums/t/13166/happy-lantern-festival-report-and-code - - This does not yet take into consideration feature pooling, - padding, striding and similar gimmicks. - - """ - # imports here to prevent circular dependencies - real_filter = np.zeros((len(layers), 2)) - conv_mode = True - first_conv_layer = True - expon = np.ones((1, 2)) - - for i, layer in enumerate(layers[1:]): - j = i + 1 - if not conv_mode: - real_filter[j] = img_size - continue - - if isinstance(layer, Conv2DLayer): - if not first_conv_layer: - new_filter = np.array(layer.filter_size) * expon - real_filter[j] = new_filter - else: - new_filter = np.array(layer.filter_size) * expon - real_filter[j] = new_filter - first_conv_layer = False - elif (isinstance(layer, MaxPool2DLayer) or - isinstance(layer, MaxPool2DCCLayer)): - real_filter[j] = real_filter[i] - expon *= np.array(layer.ds) - else: - conv_mode = False - real_filter[j] = img_size - - real_filter[0] = img_size - return real_filter - - -def get_receptive_field(layers, img_size): - """Get the real filter sizes of each layer involved in - convoluation. See Xudong Cao: - https://www.kaggle.com/c/datasciencebowl/forums/t/13166/happy-lantern-festival-report-and-code - - This does not yet take into consideration feature pooling, - padding, striding and similar gimmicks. - - """ - receptive_field = np.zeros((len(layers), 2)) - conv_mode = True - first_conv_layer = True - expon = np.ones((1, 2)) - - for i, layer in enumerate(layers[1:]): - j = i + 1 - if not conv_mode: - receptive_field[j] = img_size - continue - - if isinstance(layer, Conv2DLayer): - if not first_conv_layer: - last_field = receptive_field[i] - new_field = (last_field + expon * - (np.array(layer.filter_size) - 1)) - receptive_field[j] = new_field - else: - receptive_field[j] = layer.filter_size - first_conv_layer = False - elif (isinstance(layer, MaxPool2DLayer) or - isinstance(layer, MaxPool2DCCLayer)): - receptive_field[j] = receptive_field[i] - expon *= np.array(layer.ds) - else: - conv_mode = False - receptive_field[j] = img_size - - receptive_field[0] = img_size - return receptive_field - - -def get_conv_infos(net, min_capacity=100. / 6, tablefmt='pipe', - detailed=False): - CYA = ansi.CYAN - END = ansi.ENDC - MAG = ansi.MAGENTA - RED = ansi.RED - - layers = net.layers_.values() - img_size = net.layers_['input'].get_output_shape()[2:] - - header = ['name', 'size', 'total', 'cap. Y [%]', 'cap. X [%]', - 'cov. Y [%]', 'cov. X [%]'] - if detailed: - header += ['filter Y', 'filter X', 'field Y', 'field X'] - - shapes = [layer.get_output_shape()[1:] for layer in layers] - totals = [str(reduce(op.mul, shape)) for shape in shapes] - shapes = ['x'.join(map(str, shape)) for shape in shapes] - shapes = np.array(shapes).reshape(-1, 1) - totals = np.array(totals).reshape(-1, 1) - - real_filters = get_real_filter(layers, img_size) - receptive_fields = get_receptive_field(layers, img_size) - capacity = 100. * real_filters / receptive_fields - capacity[np.negative(np.isfinite(capacity))] = 1 - img_coverage = 100. * receptive_fields / img_size - layer_names = [layer.name if layer.name - else str(layer).rsplit('.')[-1].split(' ')[0] - for layer in layers] - - colored_names = [] - for name, (covy, covx), (capy, capx) in zip( - layer_names, img_coverage, capacity): - if ( - ((covy > 100) or (covx > 100)) and - ((capy < min_capacity) or (capx < min_capacity)) - ): - name = "{}{}{}".format(RED, name, END) - elif (covy > 100) or (covx > 100): - name = "{}{}{}".format(CYA, name, END) - elif (capy < min_capacity) or (capx < min_capacity): - name = "{}{}{}".format(MAG, name, END) - colored_names.append(name) - colored_names = np.array(colored_names).reshape(-1, 1) - - table = np.hstack((colored_names, shapes, totals, capacity, img_coverage)) - if detailed: - table = np.hstack((table, real_filters.astype(int), - receptive_fields.astype(int))) - - return tabulate(table, header, tablefmt=tablefmt, floatfmt='.2f') - - class NeuralNet(BaseEstimator): """A scikit-learn estimator based on Lasagne. """ @@ -225,7 +78,6 @@ def __init__( regression=False, max_epochs=100, eval_size=0.2, - custom_score=None, X_tensor_type=None, y_tensor_type=None, use_label_encoder=False, @@ -262,7 +114,6 @@ def __init__( self.regression = regression self.max_epochs = max_epochs self.eval_size = eval_size - self.custom_score = custom_score self.X_tensor_type = X_tensor_type self.y_tensor_type = y_tensor_type self.use_label_encoder = use_label_encoder @@ -302,6 +153,8 @@ def initialize(self): out = getattr(self, '_output_layer', None) if out is None: out = self._output_layer = self.initialize_layers() + if self.verbose: + self._print_layer_info(self.layers_.values()) iter_funcs = self._create_iter_funcs( self.layers_, self.objective, self.update, @@ -311,9 +164,6 @@ def initialize(self): self.train_iter_, self.eval_iter_, self.predict_iter_ = iter_funcs self._initialized = True - if self.verbose: - self._print_layer_info() - def _get_params_for(self, name): collected = {} prefix = '{}_'.format(name) @@ -431,16 +281,15 @@ def train_loop(self, X, y): on_training_finished = [on_training_finished] epoch = 0 - best_valid_loss = ( - min([row['valid loss'] for row in self.train_history_]) if - self.train_history_ else np.inf - ) - best_train_loss = ( - min([row['train loss'] for row in self.train_history_]) if - self.train_history_ else np.inf - ) - first_iteration = True - num_epochs_past = len(self.train_history_) + info = None + best_valid_loss = np.inf + best_train_loss = np.inf + + if self.verbose: + print(""" + Epoch | Train loss | Valid loss | Train / Val | Valid acc | Dur +--------|--------------|--------------|---------------|-------------|-------\ +""") while epoch < self.max_epochs: epoch += 1 @@ -448,7 +297,6 @@ def train_loop(self, X, y): train_losses = [] valid_losses = [] valid_accuracies = [] - custom_score = [] t0 = time() @@ -460,45 +308,41 @@ def train_loop(self, X, y): batch_valid_loss, accuracy = self.eval_iter_(Xb, yb) valid_losses.append(batch_valid_loss) valid_accuracies.append(accuracy) - if self.custom_score: - y_prob = self.predict_iter_(Xb) - custom_score.append(self.custom_score[1](yb, y_prob)) avg_train_loss = np.mean(train_losses) avg_valid_loss = np.mean(valid_losses) avg_valid_accuracy = np.mean(valid_accuracies) - if custom_score: - avg_custom_score = np.mean(custom_score) if avg_train_loss < best_train_loss: best_train_loss = avg_train_loss if avg_valid_loss < best_valid_loss: best_valid_loss = avg_valid_loss - best_train_loss == avg_train_loss - best_valid = best_valid_loss == avg_valid_loss - - info = OrderedDict([ - ('epoch', num_epochs_past + epoch), - ('train loss', avg_train_loss), - ('valid loss', avg_valid_loss), - ('valid best', avg_valid_loss if best_valid else None), - ('train/val', avg_train_loss / avg_valid_loss), - ('valid acc', avg_valid_accuracy), - ]) - if self.custom_score: - info.update({self.custom_score[0]: avg_custom_score}) - info.update({'dur': time() - t0}) - self.train_history_.append(info) - self.log_ = tabulate(self.train_history_, headers='keys', - tablefmt='pipe', floatfmt='.4f') if self.verbose: - if first_iteration: - print(self.log_.split('\n', 2)[0]) - print(self.log_.split('\n', 2)[1]) - first_iteration = False - print(self.log_.rsplit('\n', 1)[-1]) - + best_train = best_train_loss == avg_train_loss + best_valid = best_valid_loss == avg_valid_loss + print(" {:>5} | {}{:>10.6f}{} | {}{:>10.6f}{} " + "| {:>11.6f} | {:>9} | {:>3.1f}s".format( + epoch, + ansi.BLUE if best_train else "", + avg_train_loss, + ansi.ENDC if best_train else "", + ansi.GREEN if best_valid else "", + avg_valid_loss, + ansi.ENDC if best_valid else "", + avg_train_loss / avg_valid_loss, + "{:.2f}%".format(avg_valid_accuracy * 100) + if not self.regression else "", + time() - t0, + )) + + info = dict( + epoch=epoch, + train_loss=avg_train_loss, + valid_loss=avg_valid_loss, + valid_accuracy=avg_valid_accuracy, + ) + self.train_history_.append(info) try: for func in on_epoch_finished: func(self, self.train_history_) @@ -551,6 +395,28 @@ def get_all_params(self): params = sum([l.get_params() for l in layers], []) return unique(params) + def load_weights_from(self, source): + self.initialize() + + if isinstance(source, str): + source = np.load(source) + + if isinstance(source, NeuralNet): + source = source.get_all_params() + + source_weights = [ + w.get_value() if hasattr(w, 'get_value') else w for w in source] + + for w1, w2 in zip(source_weights, self.get_all_params()): + if w1.shape != w2.get_value().shape: + continue + w2.set_value(w1) + + def save_weights_to(self, fname): + weights = [w.get_value() for w in self.get_all_params()] + with open(fname, 'wb') as f: + pickle.dump(weights, f, -1) + def __getstate__(self): state = dict(self.__dict__) for attr in ( @@ -567,59 +433,14 @@ def __setstate__(self, state): self.__dict__.update(state) self.initialize() - def _print_layer_info(self): - shapes = [param.get_value().shape for param in - self.get_all_params() if param] - nparams = reduce(op.add, [reduce(op.mul, shape) for - shape in shapes]) - print("# Neural Network with {} learnable parameters" - "\n".format(nparams)) - print("## Layer information") - - layers = self.layers_.values() - has_conv2d = any([isinstance(layer, Conv2DLayer) or - isinstance(layer, Conv2DCCLayer) - for layer in layers]) - if has_conv2d: - self._print_layer_info_conv() - else: - self._print_layer_info_plain() - - def _print_layer_info_plain(self): - nums = range(len(self.layers)) - names = list(zip(*self.layers))[0] - output_shapes = ['x'.join(map(str, layer.get_output_shape()[1:])) - for layer in self.layers_.values()] - table = OrderedDict([ - ('#', nums), - ('name', names), - ('size', output_shapes), - ]) - self.layer_infos_ = tabulate(table, 'keys', tablefmt='pipe') - print(self.layer_infos_) - print("") - - def _print_layer_info_conv(self): - if self.verbose > 1: - detailed = True - tablefmt = 'simple' - else: - detailed = False - tablefmt = 'pipe' - - self.layer_infos_ = get_conv_infos(self, detailed=detailed, - tablefmt=tablefmt) - print(self.layer_infos_) - print("\nExplanation") - print(" X, Y: image dimensions") - print(" cap.: learning capacity") - print(" cov.: coverage of image") - print(" {}: capacity too low (<1/6)" - "".format("{}{}{}".format(ansi.MAGENTA, "magenta", ansi.ENDC))) - print(" {}: image coverage too high (>100%)" - "".format("{}{}{}".format(ansi.CYAN, "cyan", ansi.ENDC))) - print(" {}: capacity too low and coverage too high\n" - "".format("{}{}{}".format(ansi.RED, "red", ansi.ENDC))) + def _print_layer_info(self, layers): + for layer in layers: + output_shape = layer.get_output_shape() + print(" {:<18}\t{:<20}\tproduces {:>7} outputs".format( + layer.name, + str(output_shape), + str(functools.reduce(operator.mul, output_shape[1:])), + )) def get_params(self, deep=True): params = super(NeuralNet, self).get_params(deep=deep) @@ -635,54 +456,3 @@ def _get_param_names(self): # This allows us to have **kwargs in __init__ (woot!): param_names = super(NeuralNet, self)._get_param_names() return param_names + self._kwarg_keys - - def save_weights_to(self, fname): - weights = [w.get_value() for w in self.get_all_params()] - with open(fname, 'wb') as f: - pickle.dump(weights, f, -1) - - @staticmethod - def _param_alignment(shapes0, shapes1): - shapes0 = list(map(str, shapes0)) - shapes1 = list(map(str, shapes1)) - matcher = SequenceMatcher(a=shapes0, b=shapes1) - matches = [] - for block in matcher.get_matching_blocks(): - if block.size == 0: - continue - matches.append((list(range(block.a, block.a + block.size)), - list(range(block.b, block.b + block.size)))) - result = [line for match in matches for line in zip(*match)] - return result - - def load_weights_from(self, src): - if not hasattr(self, '_initialized'): - raise AttributeError( - "Please initialize the net before loading weights.") - - if isinstance(src, str): - src = np.load(src) - if isinstance(src, NeuralNet): - src = src.get_all_params() - - target = self.get_all_params() - src_params = [p.get_value() if hasattr(p, 'get_value') else p - for p in src] - target_params = [p.get_value() for p in target] - - src_shapes = [p.shape for p in src_params] - target_shapes = [p.shape for p in target_params] - matches = self._param_alignment(src_shapes, target_shapes) - - for i, j in matches: - # ii, jj are the indices of the layers, assuming 2 - # parameters per layer - ii, jj = int(0.5 * i) + 1, int(0.5 * j) + 1 - target[j].set_value(src_params[i]) - - if not self.verbose: - continue - target_layer_name = list(self.layers_)[jj] - param_shape = 'x'.join(map(str, src_params[i].shape)) - print("* Loaded parameter from layer {} to layer {} ({}) " - "(shape: {})".format(ii, jj, target_layer_name, param_shape)) diff --git a/nolearn/tests/test_lasagne.py b/nolearn/tests/test_lasagne.py index b78c498..ca1283b 100644 --- a/nolearn/tests/test_lasagne.py +++ b/nolearn/tests/test_lasagne.py @@ -2,11 +2,9 @@ from mock import patch from mock import Mock -from lasagne.layers import Conv2DLayer from lasagne.layers import DenseLayer from lasagne.layers import DropoutLayer from lasagne.layers import InputLayer -from lasagne.layers import MaxPool2DLayer from lasagne.nonlinearities import identity from lasagne.nonlinearities import softmax from lasagne.objectives import categorical_crossentropy @@ -93,20 +91,17 @@ def on_epoch_finished(nn, train_history): update_momentum=0.9, max_epochs=5, - verbose=1, on_epoch_finished=on_epoch_finished, ) nn = clone(nn_def) nn.fit(X_train, y_train) assert len(epochs) == 2 - assert epochs[0]['valid acc'] > 0.8 - assert epochs[1]['valid acc'] > epochs[0]['valid acc'] - expected_keys = [ - 'epoch', 'train loss', 'valid loss', 'valid best', - 'train/val', 'valid acc', 'dur', + assert epochs[0]['valid_accuracy'] > 0.8 + assert epochs[1]['valid_accuracy'] > epochs[0]['valid_accuracy'] + assert sorted(epochs[0].keys()) == [ + 'epoch', 'train_loss', 'valid_accuracy', 'valid_loss', ] - assert set(epochs[0].keys()) == set(expected_keys) y_pred = nn.predict(X_test) assert accuracy_score(y_pred, y_test) > 0.85 @@ -120,7 +115,6 @@ def on_epoch_finished(nn, train_history): # Use load_weights_from to initialize an untrained model: nn3 = clone(nn_def) - nn3.initialize() nn3.load_weights_from(nn2) assert np.array_equal(nn3.predict(X_test), y_pred) @@ -193,7 +187,6 @@ def test_clone(): on_training_finished=None, max_epochs=100, eval_size=0.1, - custom_score=None, verbose=0, ) nn = NeuralNet(**params) @@ -208,7 +201,6 @@ def test_clone(): 'output_nonlinearity', 'loss', 'objective' - 'custom_score', ): for par in (params, params1, params2): par.pop(ignore, None) @@ -347,156 +339,3 @@ def test_diamond(self, NeuralNet): concat.assert_called_with([hidden1.return_value, hidden2.return_value], name='concat') output.assert_called_with(concat.return_value, name='output') - - -def test_verbose_nn(mnist): - # Just check that no exception is thrown - from nolearn.lasagne import NeuralNet - - X, y = mnist - X_train, y_train = X[:1000], y[:1000] - num_epochs = 7 - - nn = NeuralNet( - layers=[ - ('input', InputLayer), - ('hidden1', DenseLayer), - ('dropout1', DropoutLayer), - ('hidden2', DenseLayer), - ('dropout2', DropoutLayer), - ('output', DenseLayer), - ], - input_shape=(None, 784), - output_num_units=10, - output_nonlinearity=softmax, - - more_params=dict( - hidden1_num_units=512, - hidden2_num_units=512, - ), - - update=nesterov_momentum, - update_learning_rate=0.01, - update_momentum=0.9, - - max_epochs=num_epochs, - verbose=True, - ) - - nn.fit(X_train, y_train) - nn.predict_proba(X_train) - nn.predict(X_train) - nn.score(X_train, y_train) - - assert nn.layer_infos_.replace(' ', '').startswith(u'|#|name|size|') - assert nn.log_.replace(' ', '').startswith( - u'|epoch|trainloss|val''idloss|validbest|train/val|validacc|dur|') - assert nn.log_.count('\n') == num_epochs + 1 - - -def test_verbose_nn_with_custom_score(mnist): - # Just check that no exception is thrown - from nolearn.lasagne import NeuralNet - - def my_score(y_true, y_prob): - return 1.2345 - - X, y = mnist - X_train, y_train = X[:1000], y[:1000] - num_epochs = 4 - - nn = NeuralNet( - layers=[ - ('input', InputLayer), - ('hidden1', DenseLayer), - ('dropout1', DropoutLayer), - ('hidden2', DenseLayer), - ('dropout2', DropoutLayer), - ('output', DenseLayer), - ], - input_shape=(None, 784), - output_num_units=10, - output_nonlinearity=softmax, - - more_params=dict( - hidden1_num_units=512, - hidden2_num_units=512, - ), - - update=nesterov_momentum, - update_learning_rate=0.01, - update_momentum=0.9, - - custom_score=('score_name', my_score), - max_epochs=num_epochs, - verbose=True, - ) - - nn.fit(X_train, y_train) - nn.predict_proba(X_train) - nn.predict(X_train) - nn.score(X_train, y_train) - - assert nn.layer_infos_.replace(' ', '').startswith(u'|#|name|size|') - assert nn.log_.replace(' ', '').startswith( - u'|epoch|trainloss|val''idloss|validbest|train/val|validacc|' - 'score_name|dur|') - assert nn.log_.count('\n') == num_epochs + 1 - log_my_score = nn.log_.replace(' ', '').rsplit('\n')[-1].split('|')[-3] - assert log_my_score == '1.2345' - - -def test_verbose_cnn(mnist): - # Just check that no exception is thrown - from nolearn.lasagne import NeuralNet - - X, y = mnist - X_train, y_train = X[:100].reshape(-1, 1, 28, 28), y[:100] - X_train = X_train.reshape(-1, 1, 28, 28) - num_epochs = 3 - - nn = NeuralNet( - layers=[ - ('input', InputLayer), - ('conv1', Conv2DLayer), - ('conv2', Conv2DLayer), - ('pool2', MaxPool2DLayer), - ('conv3', Conv2DLayer), - ('conv4', Conv2DLayer), - ('pool4', MaxPool2DLayer), - ('hidden1', DenseLayer), - ('output', DenseLayer), - ], - input_shape=(None, 1, 28, 28), - output_num_units=10, - output_nonlinearity=softmax, - - more_params=dict( - conv1_filter_size=(5, 5), conv1_num_filters=16, - conv2_filter_size=(3, 3), conv2_num_filters=16, - pool2_ds=(3, 3), - conv3_filter_size=(3, 3), conv3_num_filters=16, - conv4_filter_size=(3, 3), conv4_num_filters=16, - pool4_ds=(2, 2), - hidden1_num_units=512, - ), - - update=nesterov_momentum, - update_learning_rate=0.01, - update_momentum=0.9, - - max_epochs=num_epochs, - verbose=2, - ) - - nn.fit(X_train, y_train) - nn.predict_proba(X_train) - nn.predict(X_train) - nn.score(X_train, y_train) - - assert nn.layer_infos_.replace(' ', '').startswith( - u'namesizetotalcap.Y[%]cap.X[%]cov.Y[%]cov.X[%]filterYfilterXfieldY' - 'fieldX') - assert nn.log_.replace(' ', '').startswith( - u'|epoch|trainloss|val''idloss|validbest|train/val|validacc|dur|') - assert nn.log_.count('\n') == num_epochs + 1 From b9a7eec083e0ede97ba16139cd634903eb24b0ce Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Fri, 24 Apr 2015 10:59:46 +0200 Subject: [PATCH 10/11] Rewind commit to state of master. --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f9b18b1..39e6185 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ joblib==0.8.4 scikit-learn==0.15.2 Theano==0.7 -tabulate==0.7.5 git+https://github.com/benanne/Lasagne.git@cd5e396f87#egg=Lasagne From 017f59980bfa4fd05a8b3fd094a1011e85225c1d Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Fri, 24 Apr 2015 12:52:13 +0200 Subject: [PATCH 11/11] Added dynamic matching of parameter shapes when loading stored weights, which allows to load weights into a different architecture, which would fail otherwise once the first layer does not match. This now requires the net to be initialized before loading. --- nolearn/lasagne/base.py | 62 +++++++++++++++++++++++++---------- nolearn/tests/test_lasagne.py | 51 ++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 17 deletions(-) diff --git a/nolearn/lasagne/base.py b/nolearn/lasagne/base.py index fd54fe1..f794b1c 100644 --- a/nolearn/lasagne/base.py +++ b/nolearn/lasagne/base.py @@ -2,6 +2,7 @@ from .._compat import pickle from collections import OrderedDict +from difflib import SequenceMatcher import functools import itertools import operator @@ -395,28 +396,55 @@ def get_all_params(self): params = sum([l.get_params() for l in layers], []) return unique(params) - def load_weights_from(self, source): - self.initialize() - - if isinstance(source, str): - source = np.load(source) - - if isinstance(source, NeuralNet): - source = source.get_all_params() - - source_weights = [ - w.get_value() if hasattr(w, 'get_value') else w for w in source] - - for w1, w2 in zip(source_weights, self.get_all_params()): - if w1.shape != w2.get_value().shape: - continue - w2.set_value(w1) - def save_weights_to(self, fname): weights = [w.get_value() for w in self.get_all_params()] with open(fname, 'wb') as f: pickle.dump(weights, f, -1) + @staticmethod + def _param_alignment(shapes0, shapes1): + shapes0 = list(map(str, shapes0)) + shapes1 = list(map(str, shapes1)) + matcher = SequenceMatcher(a=shapes0, b=shapes1) + matches = [] + for block in matcher.get_matching_blocks(): + if block.size == 0: + continue + matches.append((list(range(block.a, block.a + block.size)), + list(range(block.b, block.b + block.size)))) + result = [line for match in matches for line in zip(*match)] + return result + + def load_weights_from(self, src): + if not hasattr(self, '_initialized'): + raise AttributeError( + "Please initialize the net before loading weights using " + "the '.initialize()' method.") + + if isinstance(src, str): + src = np.load(src) + if isinstance(src, NeuralNet): + src = src.get_all_params() + + target = self.get_all_params() + src_params = [p.get_value() if hasattr(p, 'get_value') else p + for p in src] + target_params = [p.get_value() for p in target] + + src_shapes = [p.shape for p in src_params] + target_shapes = [p.shape for p in target_params] + matches = self._param_alignment(src_shapes, target_shapes) + + for i, j in matches: + target[j].set_value(src_params[i]) + + if not self.verbose: + continue + param_shape = 'x'.join(map(str, src_params[i].shape)) + param_name = target[j].name + ' ' if target[j].name else None + print("* Loaded parameter {}(shape: {})".format( + param_name, param_shape)) + def __getstate__(self): state = dict(self.__dict__) for attr in ( diff --git a/nolearn/tests/test_lasagne.py b/nolearn/tests/test_lasagne.py index ca1283b..f42660d 100644 --- a/nolearn/tests/test_lasagne.py +++ b/nolearn/tests/test_lasagne.py @@ -115,10 +115,61 @@ def on_epoch_finished(nn, train_history): # Use load_weights_from to initialize an untrained model: nn3 = clone(nn_def) + nn3.initialize() nn3.load_weights_from(nn2) assert np.array_equal(nn3.predict(X_test), y_pred) +def test_lasagne_loading_params_matches(): + # Loading mechanism should find layers with matching parameter + # shapes, even if they are not perfectly aligned. + from nolearn.lasagne import NeuralNet + + layers0 = [('input', InputLayer), + ('dense0', DenseLayer), + ('dense1', DenseLayer), + ('output', DenseLayer)] + net0 = NeuralNet( + layers=layers0, + input_shape=(None, 784), + dense0_num_units=100, + dense1_num_units=200, + output_nonlinearity=softmax, output_num_units=10, + update=nesterov_momentum, + update_learning_rate=0.01, + max_epochs=5, + ) + net0.initialize() + net0.save_weights_to('tmp_params.np') + + layers1 = [('input', InputLayer), + ('dense0', DenseLayer), + ('dense1', DenseLayer), + ('dense2', DenseLayer), + ('output', DenseLayer)] + net1 = NeuralNet( + layers=layers1, + input_shape=(None, 784), + dense0_num_units=100, + dense1_num_units=20, + dense2_num_units=200, + output_nonlinearity=softmax, output_num_units=10, + update=nesterov_momentum, + update_learning_rate=0.01, + max_epochs=5, + ) + net1.initialize() + + # output weights have the same shape but should differ + assert not (net0.layers_['output'].W.get_value() == + net1.layers_['output'].W.get_value()).all() + # after loading, these weights should be equal, despite the + # additional dense layer + net1.load_weights_from('tmp_params.np') + assert (net0.layers_['output'].W.get_value() == + net1.layers_['output'].W.get_value()).all() + + def test_lasagne_functional_grid_search(mnist, monkeypatch): # Make sure that we can satisfy the grid search interface. from nolearn.lasagne import NeuralNet