Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pilco/models/mgpr.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from typing import Tuple

import tensorflow as tf
from tensorflow_probability import distributions as tfd
import gpflow
from gpflow.utilities import to_default_float
import numpy as np
float_type = gpflow.config.default_float()

def randomize(model, mean=1, sigma=0.01):
def randomize(model: gpflow.models.GPR, mean=1, sigma=0.01):
model.kernel.lengthscales.assign(
mean + sigma*np.random.normal(size=model.kernel.lengthscales.shape))
model.kernel.variance.assign(
Expand All @@ -15,6 +17,7 @@ def randomize(model, mean=1, sigma=0.01):
mean + sigma*np.random.normal())

class MGPR(gpflow.Module):
"""Multivariate Gaussian Process Regression"""
def __init__(self, data, name=None):
super(MGPR, self).__init__(name)

Expand All @@ -35,7 +38,7 @@ def create_models(self, data):
self.models.append(gpflow.models.GPR((data[0], data[1][:, i:i+1]), kernel=kern))
self.models[-1].likelihood.prior = tfd.Gamma(to_default_float(1.2), to_default_float(1/0.05))

def set_data(self, data):
def set_data(self, data: Tuple):
for i in range(len(self.models)):
if isinstance(self.models[i].data[0], gpflow.Parameter):
self.models[i].X.assign(data[0])
Expand Down Expand Up @@ -75,10 +78,12 @@ def optimize(self, restarts=1):
model.likelihood.variance.assign(best_params["l_variance"])

def predict_on_noisy_inputs(self, m, s):
"""Apply the learned model of the environment to predict the change of the state."""
iK, beta = self.calculate_factorizations()
return self.predict_given_factorizations(m, s, iK, beta)

def calculate_factorizations(self):
"""TODO document me"""
K = self.K(self.X)
batched_eye = tf.eye(tf.shape(self.X)[0], batch_shape=[self.num_outputs], dtype=float_type)
L = tf.linalg.cholesky(K + self.noise[:, None, None]*batched_eye)
Expand Down
15 changes: 14 additions & 1 deletion pilco/models/pilco.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Tuple

import numpy as np
import tensorflow as tf
import gpflow
Expand All @@ -13,7 +15,7 @@
from gpflow import set_trainable

class PILCO(gpflow.models.BayesianModel):
def __init__(self, data, num_induced_points=None, horizon=30, controller=None,
def __init__(self, data: Tuple, num_induced_points=None, horizon=30, controller=None,
reward=None, m_init=None, S_init=None, name=None):
super(PILCO, self).__init__(name)
if num_induced_points is None:
Expand Down Expand Up @@ -113,9 +115,19 @@ def optimize_policy(self, maxiter=50, restarts=1):
set_trainable(param, True)

def compute_action(self, x_m):
"""Computes action for a real interaction with environment.

Unlike in approximate inference, we don't have any uncertainty about the state,
therefore covariance is set to zero."""

return self.controller.compute_action(x_m, tf.zeros([self.state_dim, self.state_dim], float_type))[0]

def predict(self, m_x, s_x, n):
"""Do approximate inference for n time steps into the future.

Returns the distribution over the state after n steps (mean and sigma),
and the total reward."""

loop_vars = [
tf.constant(0, tf.int32),
m_x,
Expand All @@ -138,6 +150,7 @@ def predict(self, m_x, s_x, n):
def propagate(self, m_x, s_x):
m_u, s_u, c_xu = self.controller.compute_action(m_x, s_x)

# find mean, sigma for the concatenated vector of state and action (x~ in the paper)
m = tf.concat([m_x, m_u], axis=1)
s1 = tf.concat([s_x, s_x@c_xu], axis=1)
s2 = tf.concat([tf.transpose(s_x@c_xu), s_u], axis=1)
Expand Down