diff --git a/lxmls/deep_learning/numpy_models/rnn.py b/lxmls/deep_learning/numpy_models/rnn.py index c3cc405d..f8323c58 100644 --- a/lxmls/deep_learning/numpy_models/rnn.py +++ b/lxmls/deep_learning/numpy_models/rnn.py @@ -11,18 +11,18 @@ def __init__(self, **config): # self.parameters RNN.__init__(self, **config) - def predict(self, input=None): + def predict(self, model_input=None): """ Predict model outputs given input """ - p_y = np.exp(self.log_forward(input)[0]) + p_y = np.exp(self.log_forward(model_input)[0]) return np.argmax(p_y, axis=1) - def update(self, input=None, output=None): + def update(self, model_input=None, output=None): """ Update model parameters given batch of data """ - gradients = self.backpropagation(input, output) + gradients = self.backpropagation(model_input, output) learning_rate = self.config['learning_rate'] # Update each parameter with SGD rule num_parameters = len(self.parameters) @@ -30,15 +30,17 @@ def update(self, input=None, output=None): # Update weight self.parameters[m] -= learning_rate * gradients[m] - def log_forward(self, input): + def log_forward(self, model_input): # Get parameters and sizes W_e, W_x, W_h, W_y = self.parameters hidden_size = W_h.shape[0] - nr_steps = input.shape[0] + nr_steps = model_input.shape[0] + nr_tokens = W_e.shape[1] # Embedding layer - z_e = W_e[input, :] + input_ohe = index2onehot(model_input, nr_tokens) + z_e = input_ohe @ W_e.T # Recurrent layer h = np.zeros((nr_steps + 1, hidden_size)) @@ -56,19 +58,21 @@ def log_forward(self, input): # Softmax log_p_y = y - logsumexp(y, axis=1, keepdims=True) - return log_p_y, y, h, z_e, input + return log_p_y, y, h, z_e, model_input - def backpropagation(self, input, output): + def backpropagation(self, model_input, output) -> list[np.ndarray]: + """ + Compute gradients for the RNN, with the back-propagation method. - ''' - Compute gradientes, with the back-propagation method - inputs: - x: vector with the (embedding) indicies of the words of a + Inputs: + x: vector with the (embedding) indices of the words of a sentence - outputs: vector with the indicies of the tags for each word of - the sentence outputs: - gradient_parameters: vector with parameters gradientes - ''' + outputs: vector with the indices of the tags for each word of + the sentence + Outputs: + gradient_parameters (list[np.ndarray]): List with W_e, W_x, W_h, W_y parameters' gradients + """ + # print(f"Model input shape: {model_input.shape}, Output shape: {output.shape}") # Get parameters and sizes W_e, W_x, W_h, W_y = self.parameters diff --git a/lxmls/deep_learning/rnn.py b/lxmls/deep_learning/rnn.py index 2d505dc4..1094c82f 100644 --- a/lxmls/deep_learning/rnn.py +++ b/lxmls/deep_learning/rnn.py @@ -51,7 +51,7 @@ def initialize_rnn_parameters(input_size, embedding_size, hidden_size, W_e, W_x, W_h, W_y = loaded_parameters # Note: Pytorch requires this shape order fro nn.Embedding() - assert W_e.shape == (input_size, embedding_size), \ + assert W_e.shape == (embedding_size, input_size), \ "Embedding layer ze not matching saved model" assert W_x.shape == (hidden_size, embedding_size), \ "Input layer ze not matching saved model" @@ -65,7 +65,7 @@ def initialize_rnn_parameters(input_size, embedding_size, hidden_size, # INITIALIZE # Input layer - W_e = 0.01*random_seed.uniform(size=(input_size, embedding_size)) + W_e = 0.01*random_seed.uniform(size=(embedding_size, input_size)) # Input layer W_x = random_seed.uniform(size=(hidden_size, embedding_size)) # Recurrent layer diff --git a/tests/test_sequence_models_deep_learning.py b/tests/test_sequence_models_deep_learning.py index 6236e0f1..6999b053 100644 --- a/tests/test_sequence_models_deep_learning.py +++ b/tests/test_sequence_models_deep_learning.py @@ -14,7 +14,7 @@ @pytest.fixture(scope='module') -def data(): +def data(): return PostagCorpusData() @@ -33,7 +33,7 @@ def test_numpy_rnn(data): # Get functions to get and set values of a particular weight of the model get_parameter, set_parameter = get_rnn_parameter_handlers( layer_index=-1, - row=0, + row=0, column=0 ) @@ -68,21 +68,21 @@ def test_numpy_rnn(data): # Batch loop for batch in train_batches: - model.update(input=batch['input'], output=batch['output']) + model.update(model_input=batch['input'], output=batch['output']) # Evaluation dev is_hit = [] for batch in dev_set: - is_hit.extend(model.predict(input=batch['input']) == batch['output']) + is_hit.extend(model.predict(model_input=batch['input']) == batch['output']) accuracy = 100*np.mean(is_hit) # tested for 2 epochs only assert np.allclose(accuracy, 31.325, tolerance) - + # Evaluation test is_hit = [] for batch in test_set: - is_hit.extend(model.predict(input=batch['input']) == batch['output']) + is_hit.extend(model.predict(model_input=batch['input']) == batch['output']) accuracy = 100*np.mean(is_hit) assert np.allclose(accuracy, 30.105, tolerance) @@ -122,7 +122,7 @@ def test_pytorch_rnn(data): # tested for 2 epochs only assert np.allclose(accuracy, 31.325, tolerance) - + # Evaluation test is_hit = [] for batch in test_set: