diff --git a/.gitignore b/.gitignore
index 5d34afb..6fa4e72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,44 @@ sensepy/__pycache__
*.txt
*.png
*.pdf
+*.pyo
+*.pyd
+*.pdb
+*.egg
+*.egg-info
+*.whl
+*.manifest
+*.spec
+*.log
+*.pot
+*.mo
+*.so
+*.dll
+*.dylib
+*.a
+*.lib
+*.swp
+*.swo
+*.tmp
+*.bak
+*.old
+*.orig
+*.rej
+*.sublime-project
+*.sublime-workspace
+*.project
+*.pydevproject
+*.idea/
+.vscode/
+__pycache__/
+*.coverage
+.coverage.*
+.cache
+.tox/
+.nox/
+.pytest_cache/
+htmlcov/
+dist/
+build/
+site/
+docs/_build/
diff --git a/setup.py b/setup.py
index 07cd061..8e0e9fe 100755
--- a/setup.py
+++ b/setup.py
@@ -1,29 +1,35 @@
from setuptools import setup
packages = [
- 'numpy',
- 'scipy',
- 'matplotlib',
- 'sklearn',
- 'tensorflow',
- 'cvxpy',
- 'torch',
- 'pymanopt',
- 'pandas',
- 'mosek',
- 'quadprog',
- 'cvxpylayers',
- 'functorch',
- 'autograd_minimize'
+ "numpy",
+ "scipy",
+ "matplotlib",
+ "scikit-learn",
+ "tensorflow",
+ "cvxpy",
+ "torch",
+ "pymanopt",
+ "pandas",
+ "mosek",
+ "quadprog",
+ "cvxpylayers",
+ "autograd_minimize",
+ "torch-cluster",
+ "nmf-torch",
+ "fast-pytorch-kmeans",
+ "tqdm",
]
#
-setup(name='stpy',
- version='0.0.2',
- description='Stochastic Process Library for Python',
- url='',
- author='Mojmir Mutny',
- author_email='mojmir.mutny@inf.ethz.ch',
- license='custom ',
- packages=['stpy'],
- zip_safe=False,
- install_requires=packages)
+setup(
+ name="stpy",
+ version="0.0.2",
+ description="Stochastic Process Library for Python",
+ url="",
+ author="Mojmir Mutny",
+ author_email="mojmir.mutny@inf.ethz.ch",
+ license="custom ",
+ packages=["stpy"],
+ zip_safe=False,
+ install_requires=packages,
+ setup_requires=["torch", "Cython"],
+)
diff --git a/stpy.egg-info/PKG-INFO b/stpy.egg-info/PKG-INFO
deleted file mode 100644
index b9cb176..0000000
--- a/stpy.egg-info/PKG-INFO
+++ /dev/null
@@ -1,8 +0,0 @@
-Metadata-Version: 2.1
-Name: stpy
-Version: 0.0.2
-Summary: Stochastic Process Library for Python
-Home-page:
-Author: Mojmir Mutny
-Author-email: mojmir.mutny@inf.ethz.ch
-License: custom
diff --git a/stpy.egg-info/not-zip-safe b/stpy.egg-info/not-zip-safe
deleted file mode 100644
index 8b13789..0000000
--- a/stpy.egg-info/not-zip-safe
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/stpy/approx_inference/expected-propagation.py b/stpy/approx_inference/expected-propagation.py
index 44b6b0e..fbb1132 100644
--- a/stpy/approx_inference/expected-propagation.py
+++ b/stpy/approx_inference/expected-propagation.py
@@ -3,63 +3,67 @@
from scipy.stats import multivariate_normal
-class ExpectedPropagationQuadratic():
+class ExpectedPropagationQuadratic:
- def __init__(self, mu_prior, Sigma_prior, likelihood_single, data):
+ def __init__(self, mu_prior, Sigma_prior, likelihood_single, data):
- # takes two arguments param, theta
- self.likelihood_single = likelihood_single
+ # takes two arguments param, theta
+ self.likelihood_single = likelihood_single
- # prior information
- self.mu_prior = mu_prior
- self.Sigma_prior = Sigma_prior
+ # prior information
+ self.mu_prior = mu_prior
+ self.Sigma_prior = Sigma_prior
- self.d = mu_prior.size()[1]
+ self.d = mu_prior.size()[1]
- self.n = len(self.data)
- self.data = data
+ self.n = len(self.data)
+ self.data = data
- self.approx = []
- for i in range(self.n):
- mu = torch.zeros(size=(1, self.d)).double()
- Sigma = torch.eye(size=(self.d, self.d)).double()
- self.approx.append((mu, Sigma))
+ self.approx = []
+ for i in range(self.n):
+ mu = torch.zeros(size=(1, self.d)).double()
+ Sigma = torch.eye(size=(self.d, self.d)).double()
+ self.approx.append((mu, Sigma))
- def marginalized_version(self, j):
- mu = torch.zeros(size=(1, self.d)).double()
- Sigma = torch.zeros(size=(self.d, self.d)).double()
+ def marginalized_version(self, j):
+ mu = torch.zeros(size=(1, self.d)).double()
+ Sigma = torch.zeros(size=(self.d, self.d)).double()
- for i in range(self.n):
- if i != j:
- Sigma_elem = self.approx[j][0]
- mu_elem = self.approx[j][1]
- Sigma_elem_inv = torch.inverse(Sigma_elem)
- mu += Sigma_elem_inv @ mu_elem
- Sigma += Sigma_elem_inv
- Sigma = torch.inverse(Sigma)
- mu = Sigma @ mu
- return (mu, Sigma)
+ for i in range(self.n):
+ if i != j:
+ Sigma_elem = self.approx[j][0]
+ mu_elem = self.approx[j][1]
+ Sigma_elem_inv = torch.inverse(Sigma_elem)
+ mu += Sigma_elem_inv @ mu_elem
+ Sigma += Sigma_elem_inv
+ Sigma = torch.inverse(Sigma)
+ mu = Sigma @ mu
+ return (mu, Sigma)
- def match_likelihood(self, j):
- mu, Sigma = self.marginalized_version(j)
- lik = lambda x: self.likelihood_single(torch.from_numpy(x), self.data[j]).numpy()
- prob = lambda x: multivariate_normal.pdf(x, mean=mu.view(-1).reshape.numpy(), cov=Sigma.numpy())
- first_moment = integrate.quad(lambda x: x * lik(x) * prob(x), 0.0, 10e10)
- second_moment = integrate.quad(lambda x: x * x * lik(x) * prob(x), 0.0, 10e10)
+ def match_likelihood(self, j):
+ mu, Sigma = self.marginalized_version(j)
+ lik = lambda x: self.likelihood_single(
+ torch.from_numpy(x), self.data[j]
+ ).numpy()
+ prob = lambda x: multivariate_normal.pdf(
+ x, mean=mu.view(-1).reshape.numpy(), cov=Sigma.numpy()
+ )
+ first_moment = integrate.quad(lambda x: x * lik(x) * prob(x), 0.0, 10e10)
+ second_moment = integrate.quad(lambda x: x * x * lik(x) * prob(x), 0.0, 10e10)
- self.approx[j][0] = first_moment
- self.approx[j][1] = second_moment
+ self.approx[j][0] = first_moment
+ self.approx[j][1] = second_moment
- return (first_moment, second_moment - first_moment ** 2)
+ return (first_moment, second_moment - first_moment**2)
- def finalize(self):
- pass
+ def finalize(self):
+ pass
- def fit_gp(self, iterations='auto'):
- if iterations == 'auto':
- T = 100
- for i in range(T):
- for j in range(self.n):
- self.match_likelihood(j)
- mu, Sigma = self.finalize()
- return mu, Sigma
+ def fit_gp(self, iterations="auto"):
+ if iterations == "auto":
+ T = 100
+ for i in range(T):
+ for j in range(self.n):
+ self.match_likelihood(j)
+ mu, Sigma = self.finalize()
+ return mu, Sigma
diff --git a/stpy/approx_inference/hmc.py b/stpy/approx_inference/hmc.py
index 879fd17..1e6ce13 100644
--- a/stpy/approx_inference/hmc.py
+++ b/stpy/approx_inference/hmc.py
@@ -1,5 +1,7 @@
-params_hmc = hamiltorch.sample(log_prob_func=log_prob_func,
- params_init=params_init,
- num_samples=num_samples,
- step_size=step_size,
- num_steps_per_sample=num_steps_per_sample)
+params_hmc = hamiltorch.sample(
+ log_prob_func=log_prob_func,
+ params_init=params_init,
+ num_samples=num_samples,
+ step_size=step_size,
+ num_steps_per_sample=num_steps_per_sample,
+)
diff --git a/stpy/approx_inference/langevin.py b/stpy/approx_inference/langevin.py
index 3ed2dc2..21e430a 100644
--- a/stpy/approx_inference/langevin.py
+++ b/stpy/approx_inference/langevin.py
@@ -3,25 +3,29 @@
import torch
-class LangevinSampler():
+class LangevinSampler:
- def __init__(self, verbose=False):
- self.verbose = verbose
- pass
+ def __init__(self, verbose=False):
+ self.verbose = verbose
+ pass
- def calculate(self, HessianF, theta0):
- W = HessianF(theta0)
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3))
- return L
+ def calculate(self, HessianF, theta0):
+ W = HessianF(theta0)
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3
+ )
+ )
+ return L
- def sample(self, F, nablaF, HessianF, theta0, steps=100):
- L = self.calculate(HessianF, theta0)
- eta = 0.5 / (L + 1)
- m = theta0.size()[0]
- theta = theta0
- for k in range(steps):
- w = torch.randn(size=(m, 1)).double()
- theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w
- if self.verbose == True:
- print("Iter:", k, theta.T)
- return theta
+ def sample(self, F, nablaF, HessianF, theta0, steps=100):
+ L = self.calculate(HessianF, theta0)
+ eta = 0.5 / (L + 1)
+ m = theta0.size()[0]
+ theta = theta0
+ for k in range(steps):
+ w = torch.randn(size=(m, 1)).double()
+ theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w
+ if self.verbose == True:
+ print("Iter:", k, theta.T)
+ return theta
diff --git a/stpy/approx_inference/proximal_langevin.py b/stpy/approx_inference/proximal_langevin.py
index f1da7b6..fec409d 100644
--- a/stpy/approx_inference/proximal_langevin.py
+++ b/stpy/approx_inference/proximal_langevin.py
@@ -3,18 +3,23 @@
def ProximalLangevin(LangevinSampler):
- def sample(self, F, nablaF, HessianF, theta0, prox, steps=100):
- L = self.calculate(HessianF, theta0)
- eta = 0.5 / (L + 1)
- m = theta0.size()[0]
- theta = theta0
- for k in range(steps):
- w = torch.randn(size=(m, 1)).double()
- theta = (1 - eta) * theta - eta * nablaF(theta) + eta * prox(theta) + np.sqrt(2 * eta) * w
- if self.verbose == True:
- print("Iter:", k, theta.T)
- return prox(theta)
+ def sample(self, F, nablaF, HessianF, theta0, prox, steps=100):
+ L = self.calculate(HessianF, theta0)
+ eta = 0.5 / (L + 1)
+ m = theta0.size()[0]
+ theta = theta0
+ for k in range(steps):
+ w = torch.randn(size=(m, 1)).double()
+ theta = (
+ (1 - eta) * theta
+ - eta * nablaF(theta)
+ + eta * prox(theta)
+ + np.sqrt(2 * eta) * w
+ )
+ if self.verbose == True:
+ print("Iter:", k, theta.T)
+ return prox(theta)
def MirrorLangevin(LangvinSampler):
- pass
+ pass
diff --git a/stpy/approx_inference/sampling_helper.py b/stpy/approx_inference/sampling_helper.py
index 8976e70..960a10e 100644
--- a/stpy/approx_inference/sampling_helper.py
+++ b/stpy/approx_inference/sampling_helper.py
@@ -4,53 +4,53 @@
def get_increment(eta, steps, f, w0, path=False):
- """
+ """
- :param eta: terminal time
- :param steps: number of steps
- :param f: the operator
- :param w0: initial point
- :return:
- """
+ :param eta: terminal time
+ :param steps: number of steps
+ :param f: the operator
+ :param w0: initial point
+ :return:
+ """
- tau = eta / steps
- w = w0
- sequence = []
+ tau = eta / steps
+ w = w0
+ sequence = []
- for i in range(steps):
+ for i in range(steps):
- n = torch.randn(size=w0.size()).double()
- w = w + np.sqrt(2 * tau) * f(w, n)
- if path:
- sequence.append(w)
+ n = torch.randn(size=w0.size()).double()
+ w = w + np.sqrt(2 * tau) * f(w, n)
+ if path:
+ sequence.append(w)
- if path:
- return sequence
- else:
- return w
+ if path:
+ return sequence
+ else:
+ return w
if __name__ == "__main__":
- f = lambda w: torch.diag(1. / torch.abs(w.view(-1)))
- d = 1
- w0 = torch.zeros(size=(d, 1)).double() + 2
- step = 100
- path = get_increment(2, step, f, w0, path=True)
- # plt.plot(path)
-
- i = 0
- colors = ['k', 'r', 'b', 'orange', 'brown', 'purple']
- for steps in [5, 10, 20, 100, 200, 500]:
-
- repeats = 100
- ws = []
- for _ in range(repeats):
- path = get_increment(2, steps, f, w0, path=True)
- xtest = torch.linspace(0, 2, steps)
- plt.plot(xtest, path, color=colors[i])
- i = i + 1
- # plt.hist(np.array(ws), label = str(step))
-
- plt.legend()
- plt.show()
+ f = lambda w: torch.diag(1.0 / torch.abs(w.view(-1)))
+ d = 1
+ w0 = torch.zeros(size=(d, 1)).double() + 2
+ step = 100
+ path = get_increment(2, step, f, w0, path=True)
+ # plt.plot(path)
+
+ i = 0
+ colors = ["k", "r", "b", "orange", "brown", "purple"]
+ for steps in [5, 10, 20, 100, 200, 500]:
+
+ repeats = 100
+ ws = []
+ for _ in range(repeats):
+ path = get_increment(2, steps, f, w0, path=True)
+ xtest = torch.linspace(0, 2, steps)
+ plt.plot(xtest, path, color=colors[i])
+ i = i + 1
+ # plt.hist(np.array(ws), label = str(step))
+
+ plt.legend()
+ plt.show()
diff --git a/stpy/approx_inference/variational_mf.py b/stpy/approx_inference/variational_mf.py
index 5fff78d..9eae29c 100644
--- a/stpy/approx_inference/variational_mf.py
+++ b/stpy/approx_inference/variational_mf.py
@@ -16,9 +16,10 @@
You should have received a copy of the GNU General Public License
along with SGCP_Inference. If not, see .
"""
-__author__ = 'Christian Donner'
-__email__ = 'christian.donner(at)bccn-berlin.de'
-__license__ = 'gpl-3.0'
+
+__author__ = "Christian Donner"
+__email__ = "christian.donner(at)bccn-berlin.de"
+__license__ = "gpl-3.0"
import time
@@ -28,673 +29,752 @@
from scipy.special import digamma, gammaln
-class VMF_SGCP():
-
- def __init__(self, S_borders, X, cov_params, num_inducing_points,
- lmbda_star=None, conv_crit=1e-4,
- num_integration_points=1000, output=False,
- update_hyperparams=True,
- noise=1e-4, epsilon=5e-2):
- """ Class initialisation for variational mean field inference for
- sigmoidal Gaussian Cox process.
-
- :param S_borders: numpy.ndarray [D x 2]
- Limits of the region of interest.
- :param X: numpy.ndarray [num_points x D]
- Positions of the observations.
- :param cov_params: numpy.ndarray [D + 1]
- Hyperparameters of the covariance functions. First is amplitude,
- and the others the length scale for each dimension.
- :param num_inducing_points: int
- Number of inducing points (Should be a power of dimensions)
- :param lmbda_star: float
- Maximal intensity. If None it is initialized as twice the mean
- observation rate for a homogeneous process. (Default=None)
- :param conv_crit:
- Convergence criterion, when algorithm should stop. (Default=1e-4)
- :param num_integration_points: int
- Number of points that should be used for Monte Carlo integration.
- (Default = 1000)
- :param output: bool
- Prints info after each optimisation step. (Default=False)
- :param update_hyperparams: bool
- Whether the hyperparameters are updated (by Adam) or not. (
- Default=False)
- :param noise: float
- Noise added to the diagonal of the covariance matrix (should be
- small). (Default=1e-4)
- param epsilon: float
- Step size for Adam in the hyperparameter update. (Default=5e-2)
- """
-
- self.S_borders = S_borders
- self.S = S_borders[:, 1] - S_borders[:, 0]
- self.R = numpy.prod(self.S)
- self.D = S_borders.shape[0]
- self.noise = noise
- self.cov_params = cov_params
- self.num_integration_points = num_integration_points
- self.num_inducing_points = num_inducing_points # must be power of D
- self.X = X
-
- self.place_inducing_points()
- self.mu_g_s = numpy.zeros(self.induced_points.shape[0])
- self.Sigma_g_s = numpy.identity(self.induced_points.shape[0])
- self.Sigma_g_s_inv = numpy.identity(self.induced_points.shape[0])
- self.Ks = self.cov_func(self.induced_points, self.induced_points)
- L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye(
- self.Ks.shape[0]))
- L_inv = solve_triangular(L, numpy.eye(L.shape[0]), lower=True,
- check_finite=False)
- self.Ks_inv = L_inv.T.dot(L_inv)
- self.logdet_Ks = 2. * numpy.sum(numpy.log(L.diagonal()))
-
- self.place_integration_points()
- self.ks_X = self.cov_func(self.induced_points, self.X)
- self.LB_list = []
- self.times = []
-
- self.kappa_X = self.Ks_inv.dot(self.ks_X)
- self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points)
- self.mu_g_X, var_g_X = self.predictive_posterior_GP(self.X, 'X')
- self.mu_g2_X = var_g_X + self.mu_g_X ** 2
- self.mu_g_int_points, var_g_int_points = self.predictive_posterior_GP(
- self.integration_points, 'int_points')
- self.mu_g2_int_points = var_g_int_points + self.mu_g_int_points ** 2
- self.epsilon = epsilon
- self.alpha0 = 4.
- self.beta0 = 2. / (float(self.X.shape[0] / self.R))
- if lmbda_star is None:
- self.lmbda_star_q1 = self.alpha0 / self.beta0
- self.log_lmbda_star_q1 = digamma(self.alpha0) - numpy.log(self.beta0)
- else:
- self.lmbda_star_q1 = lmbda_star
- self.log_lmbda_star_q1 = numpy.log(lmbda_star)
- self.alpha_q1 = self.alpha0
- self.beta_q1 = self.beta0
- self.convergence = numpy.inf
- self.conv_crit = conv_crit
- self.num_iterations = 0
- self.output = output
- self.update_hyperparams = update_hyperparams
-
- # ADAM parameters
- self.beta1_adam = .9
- self.beta2_adam = .99
- self.epsilon_adam = 1e-5
- self.m_hyper_adam = numpy.zeros(self.D + 1)
- self.v_hyper_adam = numpy.zeros(self.D + 1)
- self.m_bm_adam = numpy.zeros(self.D)
- self.v_bm_adam = numpy.zeros(self.D)
-
- def place_inducing_points(self):
- """ Places the induced points for sparse GP.
- """
-
- num_per_dim = int(numpy.ceil(self.num_inducing_points ** (1. / self.D)))
- induced_grid = numpy.empty([num_per_dim, self.D])
- for di in range(self.D):
- dist_between_points = self.S[di] / num_per_dim
- induced_grid[:, di] = numpy.arange(.5 * dist_between_points,
- self.S[di],
- dist_between_points)
-
- self.induced_points = numpy.meshgrid(*induced_grid.T.tolist())
- self.induced_points = numpy.array(self.induced_points).reshape([
- self.D, -1]).T
-
- def run(self):
- """ Fitting function for the variational mean-field algorithm.
- """
-
- # Initialisation
- self.times.append(time.perf_counter())
- self.calculate_PG_expectations()
- self.calculate_posterior_intensity()
- converged = False
- while not converged:
- self.num_iterations += 1
- # Update second factor q2
- self.calculate_postrior_GP()
- self.update_predictive_posterior()
- self.update_max_intensity()
- # Update first factor q1
- self.calculate_PG_expectations()
- self.calculate_posterior_intensity()
- # Update hyperparameters
- if self.update_hyperparams:
- self.update_hyperparameters()
- # Calculate lower bound
- self.LB_list.append(self.calculate_lower_bound())
- # Check for convergence
- if self.num_iterations > 1:
- self.convergence = numpy.absolute(self.LB_list[-1] -
- self.LB_list[
- -2]) / numpy.amax([numpy.abs(self.LB_list[-1]),
- numpy.abs(self.LB_list[-2]), 1])
- converged = self.convergence < self.conv_crit
- self.times.append(time.perf_counter())
- if self.output:
- self.print_info()
-
- def print_info(self):
- """ Functions to print info, while iteratively updating posterior.
- """
- print((' +-----------------+ ' +
- '\n | Iteration %4d |' +
- '\n | Conv. = %.4f |' +
- '\n +-----------------+') % (self.num_iterations,
- self.convergence_inner))
-
- def place_integration_points(self):
- """ Places the integration points for Monte Carlo integration and
- updates all related kernels.
- """
-
- self.integration_points = numpy.random.rand(
- self.num_integration_points, self.D)
- self.integration_points *= self.S[numpy.newaxis]
- self.ks_int_points = self.cov_func(self.induced_points,
- self.integration_points)
- self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points)
-
- def calculate_posterior_intensity(self):
- """ The rate of the posterior process is updated.
- """
-
- self.lmbda_q2 = .5 * numpy.exp(
- -.5 * self.mu_g_int_points + self.log_lmbda_star_q1) / \
- numpy.cosh(.5 * self.c_int_points)
-
- def calculate_PG_expectations(self):
- """ The Polya-Gamma posterior is updated.
- """
-
- self.c_X = numpy.sqrt(self.mu_g2_X)
- self.mu_omega_X = .5 / self.c_X * numpy.tanh(
- .5 * self.c_X)
- self.c_int_points = numpy.sqrt(self.mu_g2_int_points)
- self.mu_omega_int_points = .5 / self.c_int_points \
- * numpy.tanh(.5 * self.c_int_points)
-
- def calculate_predictive_posterior_intensity(self, X_prime):
- """ Calculates the posterior intensity at X_prime for the latent
- Poisson process. (Not the intensity of the observed Poisson process!!!)
-
- :param X_prime: numpy.ndarray [num_points x D]
- Position of points, that should be evaluated.
-
- :return: numpy.ndarray [num_points]
- Posterior intensity.
- """
- mu_g, var_g = self.predictive_posterior_GP(X_prime)
- mu_g = mu_g
- mu_g2 = var_g + mu_g ** 2
- c = numpy.sqrt(mu_g2)
- pred_lmbda_q2 = .5 * numpy.exp(
- -.5 * mu_g + self.log_lmbda_star_q1) / \
- numpy.cosh(.5 * c)
- return pred_lmbda_q2
-
- def calculate_postrior_GP(self):
- """ The new GP at the inducing points is calculated.
- """
-
- A_int_points = self.lmbda_q2 * self.mu_omega_int_points
- A_X = self.mu_omega_X
- kAk = self.kappa_X.dot(A_X[:, numpy.newaxis] * self.kappa_X.T) + \
- self.kappa_int_points.dot(A_int_points[:, numpy.newaxis] *
- self.kappa_int_points.T) \
- / self.num_integration_points * self.R
- self.Sigma_g_s_inv = kAk + self.Ks_inv
- L_inv = numpy.linalg.cholesky(self.Sigma_g_s_inv + self.noise *
- numpy.eye(
- self.Sigma_g_s_inv.shape[0]))
- L = solve_triangular(L_inv, numpy.eye(L_inv.shape[0]), lower=True,
- check_finite=False)
- self.Sigma_g_s = L.T.dot(L)
- self.logdet_Sigma_g_s = 2 * numpy.sum(numpy.log(L.diagonal()))
- b_int_points = -.5 * self.lmbda_q2
- b_X = .5 * numpy.ones(self.X.shape[0])
- kb = self.ks_X.dot(b_X) + self.ks_int_points.dot(b_int_points) / \
- self.num_integration_points * self.R
- self.mu_g_s = self.Sigma_g_s.dot(kb.dot(self.Ks_inv))
-
- def predictive_posterior_GP(self, x_prime, points=None):
- """ Computes the predictive posterior for given points
-
- :param x_prime: numpy.ndarray [num_points x D]
- Points, which should be predicted for.
- :param points: str
- If 'int_points' or 'X' posterior for integration points or
- observation points is calculated, respectively. (Default=None)
- :returns:
- numpy.ndarray [num_points]: mean of predictive posterior
- numpy.ndarray [num_points]: variance of predictive posterior
- """
- if points is None:
- ks_x_prime = self.cov_func(self.induced_points, x_prime)
- kappa = self.Ks_inv.dot(ks_x_prime)
- elif points is 'int_points':
- ks_x_prime = self.ks_int_points
- kappa = self.kappa_int_points
- elif points is 'X':
- ks_x_prime = self.ks_X
- kappa = self.kappa_X
-
- mu_g_x_prime = kappa.T.dot(self.mu_g_s)
- K_xx = self.cov_func(x_prime, x_prime, only_diagonal=True)
- var_g_x_prime = K_xx - numpy.sum(kappa * (ks_x_prime - kappa.T.dot(
- self.Sigma_g_s).T), axis=0)
- return mu_g_x_prime, var_g_x_prime
-
- def cov_func(self, x, x_prime, only_diagonal=False):
- """ Computes the covariance functions between x and x_prime.
-
- :param x: numpy.ndarray [num_points x D]
- Contains coordinates for points of x
- :param x_prime: numpy.ndarray [num_points_prime x D]
- Contains coordinates for points of x_prime
- :param only_diagonal: bool
- If true only diagonal is computed (Works only if x and x_prime
- are the same, Default=False)
-
- :return: numpy.ndarray [num_points x num_points_prime]
- ([num_points_prime] if only diagonal)
- Kernel matrix.
- """
-
- theta_1, theta_2 = self.cov_params[0], self.cov_params[1]
- if only_diagonal:
- return theta_1 * numpy.ones(x.shape[0])
-
- else:
- x_theta2 = x / theta_2
- xprime_theta2 = x_prime / theta_2
- h = numpy.sum(x_theta2 ** 2, axis=1)[:, None] - 2. * numpy.dot(
- x_theta2, xprime_theta2.T) + \
- numpy.sum(xprime_theta2 ** 2, axis=1)[None]
- return theta_1 * numpy.exp(-.5 * h)
-
- def calculate_lower_bound(self):
- """ Calculates the variational lower bound for current posterior.
-
- :return: float
- Variational lower bound.
- """
-
- Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s)
- f_int_points = .5 * (- self.mu_g_int_points -
- self.mu_g2_int_points * self.mu_omega_int_points) - \
- numpy.log(2)
- integrand = f_int_points - \
- numpy.log(self.lmbda_q2 * numpy.cosh(.5 * self.c_int_points)) \
- + self.log_lmbda_star_q1 + \
- .5 * self.c_int_points ** 2 * self.mu_omega_int_points + 1.
- f_X = .5 * (self.mu_g_X - self.mu_g2_X * self.mu_omega_X) - \
- numpy.log(2)
- summand = f_X + self.log_lmbda_star_q1 - numpy.log(numpy.cosh(
- .5 * self.c_X)) + .5 * self.c_X ** 2 * self.mu_omega_X
-
- L = integrand.dot(self.lmbda_q2) / self.num_integration_points * self.R
- L -= self.lmbda_star_q1 * self.R
- L += numpy.sum(summand)
- L -= .5 * numpy.trace(self.Ks_inv.dot(Sigma_s_mugmug))
- L -= .5 * self.logdet_Ks
- L += .5 * self.logdet_Sigma_g_s + .5 * self.num_inducing_points
- L += self.alpha0 * numpy.log(self.beta0) - gammaln(self.alpha0) + \
- (self.alpha0 - 1) * self.log_lmbda_star_q1 - \
- self.beta0 * self.lmbda_star_q1
- L += self.alpha_q1 - numpy.log(self.beta_q1) + gammaln(self.alpha_q1) \
- + (1. - self.alpha_q1) * digamma(self.alpha_q1)
-
- return L
-
- def update_max_intensity(self):
- """ Updates the posterior for the maximal intensity.
- """
- self.alpha_q1 = self.X.shape[0] + numpy.sum(
- self.lmbda_q2) / self.num_integration_points * self.R + self.alpha0
- self.beta_q1 = self.beta0 + self.R
- self.lmbda_star_q1 = self.alpha_q1 / self.beta_q1
- self.log_lmbda_star_q1 = digamma(self.alpha_q1) - \
- numpy.log(self.beta_q1)
-
- def update_kernels(self):
- """ Updates all kernels (for inducing, observed and integration points).
- """
- self.ks_int_points = self.cov_func(self.induced_points,
- self.integration_points)
- self.ks_X = self.cov_func(self.induced_points, self.X)
- self.Ks = self.cov_func(self.induced_points, self.induced_points)
- L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye(
- self.Ks.shape[0]))
- L_inv = solve_triangular(L, numpy.eye(L.shape[0]), lower=True,
- check_finite=False)
- self.Ks_inv = L_inv.T.dot(L_inv)
- self.logdet_Ks = 2. * numpy.sum(numpy.log(L.diagonal()))
- self.kappa_X = self.Ks_inv.dot(self.ks_X)
- self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points)
-
- def calculate_hyperparam_derivative(self):
- """ Calculates the derivative of the hyperparameters.
-
- :return: numpy.ndarray [D + 1]
- Derivatives of hyperparameters.
- """
-
- theta1, theta2 = self.cov_params[0], numpy.copy(
- self.cov_params[1])
- Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s)
- dks_X = numpy.empty([self.ks_X.shape[0], self.ks_X.shape[1],
- 1 + theta2.shape[0]])
- dks_int_points = numpy.empty(
- [self.ks_int_points.shape[0], self.ks_int_points.shape[1],
- 1 + theta2.shape[0]])
- dKs = numpy.empty([self.Ks.shape[0], self.Ks.shape[1],
- 1 + theta2.shape[0]])
- dKss = numpy.zeros([1 + theta2.shape[0]])
- dKss[0] = 1.
-
- # kernel derivatives wrt theta1
- dks_X[:, :, 0] = self.ks_X / theta1
- dks_int_points[:, :, 0] = self.ks_int_points / theta1
- dKs[:, :, 0] = self.Ks / theta1
- # kernel derivatives wrt theta2
- dx = numpy.subtract(self.induced_points[:, None],
- self.X[None])
- dks_X[:, :, 1:] = self.ks_X[:, :, None] * (dx ** 2) / \
- (theta2[None, None] ** 3)
- dx = numpy.subtract(self.induced_points[:, None],
- self.integration_points[None])
- dks_int_points[:, :, 1:] = self.ks_int_points[:, :, None] * \
- (dx ** 2) / (theta2[None, None] ** 3)
- dx = numpy.subtract(self.induced_points[:, None],
- self.induced_points[None])
- dKs[:, :, 1:] = self.Ks[:, :, None] * (dx ** 2) / (
- theta2[None, None] ** 3)
- dL_dtheta = numpy.empty(1 + len(theta2))
-
- for itheta in range(1 + len(theta2)):
- dKs_inv = -self.Ks_inv.dot(dKs[:, :, itheta].dot(self.Ks_inv))
-
- dkappa_X = self.Ks_inv.dot(dks_X[:, :, itheta]) + dKs_inv.dot(
- self.ks_X)
- dkappa_int_points = self.Ks_inv.dot(
- dks_int_points[:, :, itheta]) + dKs_inv.dot(
- self.ks_int_points)
-
- dKtilde_X = dKss[itheta] - numpy.sum(
- dks_X[:, :, itheta] * self.kappa_X, axis=0) - numpy.sum(
- self.ks_X * dkappa_X, axis=0)
- dKtilde_int_points = dKss[itheta] - numpy.sum(
- dks_int_points[:, :, itheta] * self.kappa_int_points,
- axis=0) - numpy.sum(self.ks_int_points * dkappa_int_points,
- axis=0)
-
- dg1_X = self.mu_g_s.dot(dkappa_X)
- dg1_int_points = self.mu_g_s.dot(dkappa_int_points)
-
- dg2_X = (dKtilde_X + 2. * numpy.sum(
- self.kappa_X * Sigma_s_mugmug.dot(dkappa_X),
- axis=0)) * self.mu_omega_X
- dg2_int_points = (dKtilde_int_points + 2. * numpy.sum(
- self.kappa_int_points * Sigma_s_mugmug.dot(dkappa_int_points),
- axis=0)) * self.mu_omega_int_points
-
- dL_dtheta[itheta] = .5 * (numpy.sum(dg1_X) - numpy.sum(dg2_X))
- dL_dtheta[itheta] += .5 * numpy.dot(
- -dg1_int_points - dg2_int_points,
- self.lmbda_q2) / self.num_integration_points * self.R
- dL_dtheta[itheta] -= .5 * numpy.trace(self.Ks_inv.dot(
- dKs[:, :, itheta]))
- dL_dtheta[itheta] += .5 * numpy.trace(
- self.Ks_inv.dot(dKs[:, :, itheta].dot(
- self.Ks_inv.dot(Sigma_s_mugmug))))
-
- return dL_dtheta
-
- def update_hyperparameters(self):
- """ Updates the hyperparameters with Adam.
- """
- dL_dtheta = self.calculate_hyperparam_derivative()
- logtheta1, logtheta2 = numpy.log(self.cov_params[0]), \
- numpy.log(self.cov_params[1])
- dL_dlogtheta1 = dL_dtheta[0] * numpy.exp(logtheta1)
- dL_dlogtheta2 = dL_dtheta[1:] * numpy.exp(logtheta2)
-
- self.m_hyper_adam[0] = self.beta1_adam * self.m_hyper_adam[0] + \
- (1. - self.beta1_adam) * dL_dlogtheta1
- self.v_hyper_adam[0] = self.beta2_adam * self.v_hyper_adam[0] + \
- (1. - self.beta2_adam) * dL_dlogtheta1 ** 2
- self.m_hyper_adam[1:] = self.beta1_adam * self.m_hyper_adam[1:] + \
- (1. - self.beta1_adam) * dL_dlogtheta2
- self.v_hyper_adam[1:] = self.beta2_adam * self.v_hyper_adam[1:] + \
- (1. - self.beta2_adam) * dL_dlogtheta2 ** 2
- m_hat = self.m_hyper_adam / (1. - self.beta1_adam)
- v_hat = self.v_hyper_adam / (1. - self.beta2_adam)
- logtheta1 += self.epsilon * m_hat[0] / (numpy.sqrt(v_hat[0]) +
- self.epsilon_adam)
- logtheta2 += self.epsilon * m_hat[1:] / (numpy.sqrt(v_hat[1:]) +
- self.epsilon_adam)
- self.cov_params[0] = numpy.exp(logtheta1)
- self.cov_params[1] = numpy.exp(logtheta2)
- self.update_kernels()
- self.update_predictive_posterior()
-
- def update_predictive_posterior(self, only_int_points=False):
- """ Updates the function g (mean & variance) at each point (observed
- and points for monte carlo integral)
-
- :param only_int_points: bool
- If True it only updates the integration points. (Default=False)
- """
-
- if not only_int_points:
- mu_g_X, var_g_X = self.predictive_posterior_GP(
- self.X, points='X')
- self.mu_g_X = mu_g_X
- self.mu_g2_X = var_g_X + mu_g_X ** 2
- mu_g_int_points, var_g_int_points = self.predictive_posterior_GP(
- self.integration_points, points='int_points')
- self.mu_g_int_points = mu_g_int_points
- self.mu_g2_int_points = var_g_int_points + mu_g_int_points ** 2
-
- def predictive_intensity_function(self, X_eval):
- """ Computes the predictive intensity function at X_eval by Gaussian
- quadrature.
-
- :param X_eval: numpy.ndarray [num_points_eval x D]
- Points where the intensity function should be evaluated.
-
- :returns:
- numpy.ndarray [num_points]: mean of predictive posterior intensity
- numpy.ndarray [num_points]: variance of predictive posterior
- intensity
- """
- num_preds = X_eval.shape[0]
- mu_pred, var_pred = self.predictive_posterior_GP(X_eval)
-
- mean_lmbda_pred, var_lmbda_pred = numpy.empty(num_preds), \
- numpy.empty(num_preds)
-
- mean_lmbda_q1 = self.lmbda_star_q1
- var_lmbda_q1 = self.alpha_q1 / (self.beta_q1 ** 2)
- mean_lmbda_q1_squared = var_lmbda_q1 + mean_lmbda_q1 ** 2
-
- for ipred in range(num_preds):
- mu, std = mu_pred[ipred], numpy.sqrt(var_pred[ipred])
- func1 = lambda g_pred: 1. / (1. + numpy.exp(-g_pred)) * \
- numpy.exp(-.5 * (g_pred - mu) ** 2 / std ** 2) / \
- numpy.sqrt(2. * numpy.pi * std ** 2)
- a, b = mu - 10. * std, mu + 10. * std
- mean_lmbda_pred[ipred] = mean_lmbda_q1 * quadrature(func1, a, b,
- maxiter=500)[0]
- func2 = lambda g_pred: (1. / (1. + numpy.exp(-g_pred))) ** 2 * \
- numpy.exp(
- -.5 * (g_pred - mu) ** 2 / std ** 2) / \
- numpy.sqrt(2. * numpy.pi * std ** 2)
- a, b = mu - 10. * std, mu + 10. * std
- mean_lmbda_pred_squared = mean_lmbda_q1_squared * \
- quadrature(func2, a, b, maxiter=500)[0]
- var_lmbda_pred[ipred] = mean_lmbda_pred_squared - mean_lmbda_pred[
- ipred] ** 2
-
- return mean_lmbda_pred, var_lmbda_pred
-
- def sample_posterior(self, X_test, num_samples=1):
- """ Samples log predictive likelihood for test set from posterior.
-
- :param X_test: [num_X_test x D]
- Observations in test set.
- :param num_samples: int
- How many samples of the intensity function should be drawn from
- the posterior. (Default=1e4)
-
- :return: numpy.ndarray [num_samples]
- Returns the array of sampled likelihoods.
- """
-
- num_events = X_test.shape[0]
- num_samples = int(num_samples)
- X = numpy.concatenate([X_test, self.integration_points])
- K = self.cov_func(X, X)
- kx = self.cov_func(X, self.induced_points)
- kappa = kx.dot(self.Ks_inv)
- Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T))
- mu_post = kappa.dot(self.mu_g_s)
- L_post = numpy.linalg.cholesky(Sigma_post + self.noise * numpy.eye(
- Sigma_post.shape[0]))
-
- num_points = X.shape[0]
- num_hundreds = int(num_samples)
- pred_log_likelihood = numpy.empty([num_samples])
-
- samples = []
- # samples hundred instances at a time
- for ihundreds in range(num_hundreds):
- rand_nums = numpy.random.randn(num_points, 1)
- g_sample = mu_post[:, None] + L_post.dot(rand_nums)
- lmbda_max_sample = numpy.random.gamma(shape=self.alpha_q1,
- scale=1. / self.beta_q1,
- size=1)
- lmbda_sample = lmbda_max_sample / (1. + numpy.exp(-g_sample))
- samples.append(lmbda_sample)
- return samples
-
- def predictive_log_likelihood(self, X_test, num_samples=1e4):
- """ Samples log predictive likelihood for test set from posterior.
-
- :param X_test: [num_X_test x D]
- Observations in test set.
- :param num_samples: int
- How many samples of the intensity function should be drawn from
- the posterior. (Default=1e4)
-
- :return: numpy.ndarray [num_samples]
- Returns the array of sampled likelihoods.
- """
-
- num_events = X_test.shape[0]
- num_samples = int(num_samples)
- X = numpy.concatenate([X_test, self.integration_points])
- K = self.cov_func(X, X)
- kx = self.cov_func(X, self.induced_points)
- kappa = kx.dot(self.Ks_inv)
- Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T))
- mu_post = kappa.dot(self.mu_g_s)
- L_post = numpy.linalg.cholesky(Sigma_post + self.noise * numpy.eye(
- Sigma_post.shape[0]))
-
- num_points = X.shape[0]
- num_hundreds = int(num_samples / 1e2)
- pred_log_likelihood = numpy.empty([num_samples])
-
- # samples hundred instances at a time
- for ihundreds in range(num_hundreds):
- rand_nums = numpy.random.randn(num_points, 100)
- g_sample = mu_post[:, None] + L_post.dot(rand_nums)
- lmbda_max_sample = numpy.random.gamma(shape=self.alpha_q1,
- scale=1. / self.beta_q1,
- size=100)
- lmbda_sample = lmbda_max_sample / (1. + numpy.exp(-g_sample))
-
- pred_log_likelihood[ihundreds * 100:(ihundreds + 1) * 100] = \
- numpy.sum(numpy.log(lmbda_sample[:num_events]), axis=0)
- pred_log_likelihood[ihundreds * 100:(ihundreds + 1) * 100] -= \
- numpy.mean(lmbda_sample[num_events:], axis=0) * self.R
-
- return pred_log_likelihood
-
- def expanded_predictive_log_likelihood(self, X_test):
- """ Fast approximation for log predictive test likelihood (Eq. 33 in
- paper).
-
- :param X_test: [num_X_test x D]
- Observations in test set.
-
- :return: float
- Approximation of log predictive test likelihood.
- """
- self.update_predictive_posterior(only_int_points=True)
- N = X_test.shape[0]
- ks_x_test = self.cov_func(self.induced_points, X_test)
- mu_g_X_test = ks_x_test.T.dot(self.Ks_inv.dot(self.mu_g_s))
- u_mean = -self.lmbda_star_q1 * numpy.mean(
- 1. / (1. + numpy.exp(-self.mu_g_int_points))) * self.R - \
- numpy.sum(numpy.log(1. + numpy.exp(-mu_g_X_test))) + \
- N * numpy.log(self.lmbda_star_q1)
-
- log_pred_likelihood = u_mean
- du_dg = numpy.empty(N + self.num_integration_points)
- du_dg[:N] = 1. / (1. + numpy.exp(mu_g_X_test))
- du_dg[N:] = - self.lmbda_star_q1 / (1. + numpy.exp(
- -self.mu_g_int_points)) * (1. - 1. / (1. + numpy.exp(
- -self.mu_g_int_points))) \
- / self.num_integration_points * self.R
- du_dg2 = numpy.empty(N + self.num_integration_points)
- du_dg2[:N] = - (1. - 1. / (1. + numpy.exp(mu_g_X_test))) / \
- (1. + numpy.exp(mu_g_X_test))
- du_dg2[N:] = - self.lmbda_star_q1 / (1. + numpy.exp(
- -self.mu_g_int_points)) * (1. - 1. / (1. + numpy.exp(
- -self.mu_g_int_points))) * (1. - 2. / (1. + numpy.exp(
- -self.mu_g_int_points))) / self.num_integration_points * self.R
-
- du_dlambda = - self.R * numpy.mean(
- 1. / (1. + numpy.exp(-self.mu_g_int_points))) + N / self.lmbda_star_q1
- du_dlmbda2 = - N / self.lmbda_star_q1 ** 2
-
- C = numpy.empty([N + self.num_integration_points,
- N + self.num_integration_points])
- inner_matrix = self.Ks_inv.dot(
- numpy.identity(self.num_inducing_points) -
- self.Sigma_g_s.dot(self.Ks_inv))
-
- K_X = self.cov_func(X_test, X_test) + self.noise * numpy.identity(
- X_test.shape[0])
-
- C[:N, :N] = K_X - ks_x_test.T.dot(inner_matrix.dot(
- ks_x_test))
- del K_X
- K_int_points = self.cov_func(self.integration_points,
- self.integration_points) + \
- self.noise * numpy.identity(
- self.integration_points.shape[0])
-
- C[N:, N:] = K_int_points - self.ks_int_points.T.dot(inner_matrix.dot(
- self.ks_int_points))
- del K_int_points
-
- K_X_int_points = self.cov_func(self.integration_points, X_test)
- C[N:, :N] = K_X_int_points - self.ks_int_points.T.dot(inner_matrix.dot(
- ks_x_test))
- del K_X_int_points
-
- C[:N, N:] = C[N:, :N].T
-
- log_pred_likelihood_corr = .5 * numpy.trace(C.dot(numpy.diag(
- du_dg2) + numpy.outer(du_dg, du_dg))) \
- + .5 * (du_dlmbda2 + du_dlambda ** 2) * self.alpha_q1 / self.beta_q1 ** 2
- log_pred_likelihood += log_pred_likelihood_corr
-
- return log_pred_likelihood
+class VMF_SGCP:
+
+ def __init__(
+ self,
+ S_borders,
+ X,
+ cov_params,
+ num_inducing_points,
+ lmbda_star=None,
+ conv_crit=1e-4,
+ num_integration_points=1000,
+ output=False,
+ update_hyperparams=True,
+ noise=1e-4,
+ epsilon=5e-2,
+ ):
+ """Class initialisation for variational mean field inference for
+ sigmoidal Gaussian Cox process.
+
+ :param S_borders: numpy.ndarray [D x 2]
+ Limits of the region of interest.
+ :param X: numpy.ndarray [num_points x D]
+ Positions of the observations.
+ :param cov_params: numpy.ndarray [D + 1]
+ Hyperparameters of the covariance functions. First is amplitude,
+ and the others the length scale for each dimension.
+ :param num_inducing_points: int
+ Number of inducing points (Should be a power of dimensions)
+ :param lmbda_star: float
+ Maximal intensity. If None it is initialized as twice the mean
+ observation rate for a homogeneous process. (Default=None)
+ :param conv_crit:
+ Convergence criterion, when algorithm should stop. (Default=1e-4)
+ :param num_integration_points: int
+ Number of points that should be used for Monte Carlo integration.
+ (Default = 1000)
+ :param output: bool
+ Prints info after each optimisation step. (Default=False)
+ :param update_hyperparams: bool
+ Whether the hyperparameters are updated (by Adam) or not. (
+ Default=False)
+ :param noise: float
+ Noise added to the diagonal of the covariance matrix (should be
+ small). (Default=1e-4)
+ param epsilon: float
+ Step size for Adam in the hyperparameter update. (Default=5e-2)
+ """
+
+ self.S_borders = S_borders
+ self.S = S_borders[:, 1] - S_borders[:, 0]
+ self.R = numpy.prod(self.S)
+ self.D = S_borders.shape[0]
+ self.noise = noise
+ self.cov_params = cov_params
+ self.num_integration_points = num_integration_points
+ self.num_inducing_points = num_inducing_points # must be power of D
+ self.X = X
+
+ self.place_inducing_points()
+ self.mu_g_s = numpy.zeros(self.induced_points.shape[0])
+ self.Sigma_g_s = numpy.identity(self.induced_points.shape[0])
+ self.Sigma_g_s_inv = numpy.identity(self.induced_points.shape[0])
+ self.Ks = self.cov_func(self.induced_points, self.induced_points)
+ L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye(self.Ks.shape[0]))
+ L_inv = solve_triangular(
+ L, numpy.eye(L.shape[0]), lower=True, check_finite=False
+ )
+ self.Ks_inv = L_inv.T.dot(L_inv)
+ self.logdet_Ks = 2.0 * numpy.sum(numpy.log(L.diagonal()))
+
+ self.place_integration_points()
+ self.ks_X = self.cov_func(self.induced_points, self.X)
+ self.LB_list = []
+ self.times = []
+
+ self.kappa_X = self.Ks_inv.dot(self.ks_X)
+ self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points)
+ self.mu_g_X, var_g_X = self.predictive_posterior_GP(self.X, "X")
+ self.mu_g2_X = var_g_X + self.mu_g_X**2
+ self.mu_g_int_points, var_g_int_points = self.predictive_posterior_GP(
+ self.integration_points, "int_points"
+ )
+ self.mu_g2_int_points = var_g_int_points + self.mu_g_int_points**2
+ self.epsilon = epsilon
+ self.alpha0 = 4.0
+ self.beta0 = 2.0 / (float(self.X.shape[0] / self.R))
+ if lmbda_star is None:
+ self.lmbda_star_q1 = self.alpha0 / self.beta0
+ self.log_lmbda_star_q1 = digamma(self.alpha0) - numpy.log(self.beta0)
+ else:
+ self.lmbda_star_q1 = lmbda_star
+ self.log_lmbda_star_q1 = numpy.log(lmbda_star)
+ self.alpha_q1 = self.alpha0
+ self.beta_q1 = self.beta0
+ self.convergence = numpy.inf
+ self.conv_crit = conv_crit
+ self.num_iterations = 0
+ self.output = output
+ self.update_hyperparams = update_hyperparams
+
+ # ADAM parameters
+ self.beta1_adam = 0.9
+ self.beta2_adam = 0.99
+ self.epsilon_adam = 1e-5
+ self.m_hyper_adam = numpy.zeros(self.D + 1)
+ self.v_hyper_adam = numpy.zeros(self.D + 1)
+ self.m_bm_adam = numpy.zeros(self.D)
+ self.v_bm_adam = numpy.zeros(self.D)
+
+ def place_inducing_points(self):
+ """Places the induced points for sparse GP."""
+
+ num_per_dim = int(numpy.ceil(self.num_inducing_points ** (1.0 / self.D)))
+ induced_grid = numpy.empty([num_per_dim, self.D])
+ for di in range(self.D):
+ dist_between_points = self.S[di] / num_per_dim
+ induced_grid[:, di] = numpy.arange(
+ 0.5 * dist_between_points, self.S[di], dist_between_points
+ )
+
+ self.induced_points = numpy.meshgrid(*induced_grid.T.tolist())
+ self.induced_points = numpy.array(self.induced_points).reshape([self.D, -1]).T
+
+ def run(self):
+ """Fitting function for the variational mean-field algorithm."""
+
+ # Initialisation
+ self.times.append(time.perf_counter())
+ self.calculate_PG_expectations()
+ self.calculate_posterior_intensity()
+ converged = False
+ while not converged:
+ self.num_iterations += 1
+ # Update second factor q2
+ self.calculate_postrior_GP()
+ self.update_predictive_posterior()
+ self.update_max_intensity()
+ # Update first factor q1
+ self.calculate_PG_expectations()
+ self.calculate_posterior_intensity()
+ # Update hyperparameters
+ if self.update_hyperparams:
+ self.update_hyperparameters()
+ # Calculate lower bound
+ self.LB_list.append(self.calculate_lower_bound())
+ # Check for convergence
+ if self.num_iterations > 1:
+ self.convergence = numpy.absolute(
+ self.LB_list[-1] - self.LB_list[-2]
+ ) / numpy.amax(
+ [numpy.abs(self.LB_list[-1]), numpy.abs(self.LB_list[-2]), 1]
+ )
+ converged = self.convergence < self.conv_crit
+ self.times.append(time.perf_counter())
+ if self.output:
+ self.print_info()
+
+ def print_info(self):
+ """Functions to print info, while iteratively updating posterior."""
+ print(
+ (
+ " +-----------------+ "
+ + "\n | Iteration %4d |"
+ + "\n | Conv. = %.4f |"
+ + "\n +-----------------+"
+ )
+ % (self.num_iterations, self.convergence_inner)
+ )
+
+ def place_integration_points(self):
+ """Places the integration points for Monte Carlo integration and
+ updates all related kernels.
+ """
+
+ self.integration_points = numpy.random.rand(self.num_integration_points, self.D)
+ self.integration_points *= self.S[numpy.newaxis]
+ self.ks_int_points = self.cov_func(self.induced_points, self.integration_points)
+ self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points)
+
+ def calculate_posterior_intensity(self):
+ """The rate of the posterior process is updated."""
+
+ self.lmbda_q2 = (
+ 0.5
+ * numpy.exp(-0.5 * self.mu_g_int_points + self.log_lmbda_star_q1)
+ / numpy.cosh(0.5 * self.c_int_points)
+ )
+
+ def calculate_PG_expectations(self):
+ """The Polya-Gamma posterior is updated."""
+
+ self.c_X = numpy.sqrt(self.mu_g2_X)
+ self.mu_omega_X = 0.5 / self.c_X * numpy.tanh(0.5 * self.c_X)
+ self.c_int_points = numpy.sqrt(self.mu_g2_int_points)
+ self.mu_omega_int_points = (
+ 0.5 / self.c_int_points * numpy.tanh(0.5 * self.c_int_points)
+ )
+
+ def calculate_predictive_posterior_intensity(self, X_prime):
+ """Calculates the posterior intensity at X_prime for the latent
+ Poisson process. (Not the intensity of the observed Poisson process!!!)
+
+ :param X_prime: numpy.ndarray [num_points x D]
+ Position of points, that should be evaluated.
+
+ :return: numpy.ndarray [num_points]
+ Posterior intensity.
+ """
+ mu_g, var_g = self.predictive_posterior_GP(X_prime)
+ mu_g = mu_g
+ mu_g2 = var_g + mu_g**2
+ c = numpy.sqrt(mu_g2)
+ pred_lmbda_q2 = (
+ 0.5 * numpy.exp(-0.5 * mu_g + self.log_lmbda_star_q1) / numpy.cosh(0.5 * c)
+ )
+ return pred_lmbda_q2
+
+ def calculate_postrior_GP(self):
+ """The new GP at the inducing points is calculated."""
+
+ A_int_points = self.lmbda_q2 * self.mu_omega_int_points
+ A_X = self.mu_omega_X
+ kAk = (
+ self.kappa_X.dot(A_X[:, numpy.newaxis] * self.kappa_X.T)
+ + self.kappa_int_points.dot(
+ A_int_points[:, numpy.newaxis] * self.kappa_int_points.T
+ )
+ / self.num_integration_points
+ * self.R
+ )
+ self.Sigma_g_s_inv = kAk + self.Ks_inv
+ L_inv = numpy.linalg.cholesky(
+ self.Sigma_g_s_inv + self.noise * numpy.eye(self.Sigma_g_s_inv.shape[0])
+ )
+ L = solve_triangular(
+ L_inv, numpy.eye(L_inv.shape[0]), lower=True, check_finite=False
+ )
+ self.Sigma_g_s = L.T.dot(L)
+ self.logdet_Sigma_g_s = 2 * numpy.sum(numpy.log(L.diagonal()))
+ b_int_points = -0.5 * self.lmbda_q2
+ b_X = 0.5 * numpy.ones(self.X.shape[0])
+ kb = (
+ self.ks_X.dot(b_X)
+ + self.ks_int_points.dot(b_int_points)
+ / self.num_integration_points
+ * self.R
+ )
+ self.mu_g_s = self.Sigma_g_s.dot(kb.dot(self.Ks_inv))
+
+ def predictive_posterior_GP(self, x_prime, points=None):
+ """Computes the predictive posterior for given points
+
+ :param x_prime: numpy.ndarray [num_points x D]
+ Points, which should be predicted for.
+ :param points: str
+ If 'int_points' or 'X' posterior for integration points or
+ observation points is calculated, respectively. (Default=None)
+ :returns:
+ numpy.ndarray [num_points]: mean of predictive posterior
+ numpy.ndarray [num_points]: variance of predictive posterior
+ """
+ if points is None:
+ ks_x_prime = self.cov_func(self.induced_points, x_prime)
+ kappa = self.Ks_inv.dot(ks_x_prime)
+ elif points is "int_points":
+ ks_x_prime = self.ks_int_points
+ kappa = self.kappa_int_points
+ elif points is "X":
+ ks_x_prime = self.ks_X
+ kappa = self.kappa_X
+
+ mu_g_x_prime = kappa.T.dot(self.mu_g_s)
+ K_xx = self.cov_func(x_prime, x_prime, only_diagonal=True)
+ var_g_x_prime = K_xx - numpy.sum(
+ kappa * (ks_x_prime - kappa.T.dot(self.Sigma_g_s).T), axis=0
+ )
+ return mu_g_x_prime, var_g_x_prime
+
+ def cov_func(self, x, x_prime, only_diagonal=False):
+ """Computes the covariance functions between x and x_prime.
+
+ :param x: numpy.ndarray [num_points x D]
+ Contains coordinates for points of x
+ :param x_prime: numpy.ndarray [num_points_prime x D]
+ Contains coordinates for points of x_prime
+ :param only_diagonal: bool
+ If true only diagonal is computed (Works only if x and x_prime
+ are the same, Default=False)
+
+ :return: numpy.ndarray [num_points x num_points_prime]
+ ([num_points_prime] if only diagonal)
+ Kernel matrix.
+ """
+
+ theta_1, theta_2 = self.cov_params[0], self.cov_params[1]
+ if only_diagonal:
+ return theta_1 * numpy.ones(x.shape[0])
+
+ else:
+ x_theta2 = x / theta_2
+ xprime_theta2 = x_prime / theta_2
+ h = (
+ numpy.sum(x_theta2**2, axis=1)[:, None]
+ - 2.0 * numpy.dot(x_theta2, xprime_theta2.T)
+ + numpy.sum(xprime_theta2**2, axis=1)[None]
+ )
+ return theta_1 * numpy.exp(-0.5 * h)
+
+ def calculate_lower_bound(self):
+ """Calculates the variational lower bound for current posterior.
+
+ :return: float
+ Variational lower bound.
+ """
+
+ Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s)
+ f_int_points = 0.5 * (
+ -self.mu_g_int_points - self.mu_g2_int_points * self.mu_omega_int_points
+ ) - numpy.log(2)
+ integrand = (
+ f_int_points
+ - numpy.log(self.lmbda_q2 * numpy.cosh(0.5 * self.c_int_points))
+ + self.log_lmbda_star_q1
+ + 0.5 * self.c_int_points**2 * self.mu_omega_int_points
+ + 1.0
+ )
+ f_X = 0.5 * (self.mu_g_X - self.mu_g2_X * self.mu_omega_X) - numpy.log(2)
+ summand = (
+ f_X
+ + self.log_lmbda_star_q1
+ - numpy.log(numpy.cosh(0.5 * self.c_X))
+ + 0.5 * self.c_X**2 * self.mu_omega_X
+ )
+
+ L = integrand.dot(self.lmbda_q2) / self.num_integration_points * self.R
+ L -= self.lmbda_star_q1 * self.R
+ L += numpy.sum(summand)
+ L -= 0.5 * numpy.trace(self.Ks_inv.dot(Sigma_s_mugmug))
+ L -= 0.5 * self.logdet_Ks
+ L += 0.5 * self.logdet_Sigma_g_s + 0.5 * self.num_inducing_points
+ L += (
+ self.alpha0 * numpy.log(self.beta0)
+ - gammaln(self.alpha0)
+ + (self.alpha0 - 1) * self.log_lmbda_star_q1
+ - self.beta0 * self.lmbda_star_q1
+ )
+ L += (
+ self.alpha_q1
+ - numpy.log(self.beta_q1)
+ + gammaln(self.alpha_q1)
+ + (1.0 - self.alpha_q1) * digamma(self.alpha_q1)
+ )
+
+ return L
+
+ def update_max_intensity(self):
+ """Updates the posterior for the maximal intensity."""
+ self.alpha_q1 = (
+ self.X.shape[0]
+ + numpy.sum(self.lmbda_q2) / self.num_integration_points * self.R
+ + self.alpha0
+ )
+ self.beta_q1 = self.beta0 + self.R
+ self.lmbda_star_q1 = self.alpha_q1 / self.beta_q1
+ self.log_lmbda_star_q1 = digamma(self.alpha_q1) - numpy.log(self.beta_q1)
+
+ def update_kernels(self):
+ """Updates all kernels (for inducing, observed and integration points)."""
+ self.ks_int_points = self.cov_func(self.induced_points, self.integration_points)
+ self.ks_X = self.cov_func(self.induced_points, self.X)
+ self.Ks = self.cov_func(self.induced_points, self.induced_points)
+ L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye(self.Ks.shape[0]))
+ L_inv = solve_triangular(
+ L, numpy.eye(L.shape[0]), lower=True, check_finite=False
+ )
+ self.Ks_inv = L_inv.T.dot(L_inv)
+ self.logdet_Ks = 2.0 * numpy.sum(numpy.log(L.diagonal()))
+ self.kappa_X = self.Ks_inv.dot(self.ks_X)
+ self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points)
+
+ def calculate_hyperparam_derivative(self):
+ """Calculates the derivative of the hyperparameters.
+
+ :return: numpy.ndarray [D + 1]
+ Derivatives of hyperparameters.
+ """
+
+ theta1, theta2 = self.cov_params[0], numpy.copy(self.cov_params[1])
+ Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s)
+ dks_X = numpy.empty(
+ [self.ks_X.shape[0], self.ks_X.shape[1], 1 + theta2.shape[0]]
+ )
+ dks_int_points = numpy.empty(
+ [
+ self.ks_int_points.shape[0],
+ self.ks_int_points.shape[1],
+ 1 + theta2.shape[0],
+ ]
+ )
+ dKs = numpy.empty([self.Ks.shape[0], self.Ks.shape[1], 1 + theta2.shape[0]])
+ dKss = numpy.zeros([1 + theta2.shape[0]])
+ dKss[0] = 1.0
+
+ # kernel derivatives wrt theta1
+ dks_X[:, :, 0] = self.ks_X / theta1
+ dks_int_points[:, :, 0] = self.ks_int_points / theta1
+ dKs[:, :, 0] = self.Ks / theta1
+ # kernel derivatives wrt theta2
+ dx = numpy.subtract(self.induced_points[:, None], self.X[None])
+ dks_X[:, :, 1:] = self.ks_X[:, :, None] * (dx**2) / (theta2[None, None] ** 3)
+ dx = numpy.subtract(self.induced_points[:, None], self.integration_points[None])
+ dks_int_points[:, :, 1:] = (
+ self.ks_int_points[:, :, None] * (dx**2) / (theta2[None, None] ** 3)
+ )
+ dx = numpy.subtract(self.induced_points[:, None], self.induced_points[None])
+ dKs[:, :, 1:] = self.Ks[:, :, None] * (dx**2) / (theta2[None, None] ** 3)
+ dL_dtheta = numpy.empty(1 + len(theta2))
+
+ for itheta in range(1 + len(theta2)):
+ dKs_inv = -self.Ks_inv.dot(dKs[:, :, itheta].dot(self.Ks_inv))
+
+ dkappa_X = self.Ks_inv.dot(dks_X[:, :, itheta]) + dKs_inv.dot(self.ks_X)
+ dkappa_int_points = self.Ks_inv.dot(
+ dks_int_points[:, :, itheta]
+ ) + dKs_inv.dot(self.ks_int_points)
+
+ dKtilde_X = (
+ dKss[itheta]
+ - numpy.sum(dks_X[:, :, itheta] * self.kappa_X, axis=0)
+ - numpy.sum(self.ks_X * dkappa_X, axis=0)
+ )
+ dKtilde_int_points = (
+ dKss[itheta]
+ - numpy.sum(
+ dks_int_points[:, :, itheta] * self.kappa_int_points, axis=0
+ )
+ - numpy.sum(self.ks_int_points * dkappa_int_points, axis=0)
+ )
+
+ dg1_X = self.mu_g_s.dot(dkappa_X)
+ dg1_int_points = self.mu_g_s.dot(dkappa_int_points)
+
+ dg2_X = (
+ dKtilde_X
+ + 2.0 * numpy.sum(self.kappa_X * Sigma_s_mugmug.dot(dkappa_X), axis=0)
+ ) * self.mu_omega_X
+ dg2_int_points = (
+ dKtilde_int_points
+ + 2.0
+ * numpy.sum(
+ self.kappa_int_points * Sigma_s_mugmug.dot(dkappa_int_points),
+ axis=0,
+ )
+ ) * self.mu_omega_int_points
+
+ dL_dtheta[itheta] = 0.5 * (numpy.sum(dg1_X) - numpy.sum(dg2_X))
+ dL_dtheta[itheta] += (
+ 0.5
+ * numpy.dot(-dg1_int_points - dg2_int_points, self.lmbda_q2)
+ / self.num_integration_points
+ * self.R
+ )
+ dL_dtheta[itheta] -= 0.5 * numpy.trace(self.Ks_inv.dot(dKs[:, :, itheta]))
+ dL_dtheta[itheta] += 0.5 * numpy.trace(
+ self.Ks_inv.dot(dKs[:, :, itheta].dot(self.Ks_inv.dot(Sigma_s_mugmug)))
+ )
+
+ return dL_dtheta
+
+ def update_hyperparameters(self):
+ """Updates the hyperparameters with Adam."""
+ dL_dtheta = self.calculate_hyperparam_derivative()
+ logtheta1, logtheta2 = numpy.log(self.cov_params[0]), numpy.log(
+ self.cov_params[1]
+ )
+ dL_dlogtheta1 = dL_dtheta[0] * numpy.exp(logtheta1)
+ dL_dlogtheta2 = dL_dtheta[1:] * numpy.exp(logtheta2)
+
+ self.m_hyper_adam[0] = (
+ self.beta1_adam * self.m_hyper_adam[0]
+ + (1.0 - self.beta1_adam) * dL_dlogtheta1
+ )
+ self.v_hyper_adam[0] = (
+ self.beta2_adam * self.v_hyper_adam[0]
+ + (1.0 - self.beta2_adam) * dL_dlogtheta1**2
+ )
+ self.m_hyper_adam[1:] = (
+ self.beta1_adam * self.m_hyper_adam[1:]
+ + (1.0 - self.beta1_adam) * dL_dlogtheta2
+ )
+ self.v_hyper_adam[1:] = (
+ self.beta2_adam * self.v_hyper_adam[1:]
+ + (1.0 - self.beta2_adam) * dL_dlogtheta2**2
+ )
+ m_hat = self.m_hyper_adam / (1.0 - self.beta1_adam)
+ v_hat = self.v_hyper_adam / (1.0 - self.beta2_adam)
+ logtheta1 += (
+ self.epsilon * m_hat[0] / (numpy.sqrt(v_hat[0]) + self.epsilon_adam)
+ )
+ logtheta2 += (
+ self.epsilon * m_hat[1:] / (numpy.sqrt(v_hat[1:]) + self.epsilon_adam)
+ )
+ self.cov_params[0] = numpy.exp(logtheta1)
+ self.cov_params[1] = numpy.exp(logtheta2)
+ self.update_kernels()
+ self.update_predictive_posterior()
+
+ def update_predictive_posterior(self, only_int_points=False):
+ """Updates the function g (mean & variance) at each point (observed
+ and points for monte carlo integral)
+
+ :param only_int_points: bool
+ If True it only updates the integration points. (Default=False)
+ """
+
+ if not only_int_points:
+ mu_g_X, var_g_X = self.predictive_posterior_GP(self.X, points="X")
+ self.mu_g_X = mu_g_X
+ self.mu_g2_X = var_g_X + mu_g_X**2
+ mu_g_int_points, var_g_int_points = self.predictive_posterior_GP(
+ self.integration_points, points="int_points"
+ )
+ self.mu_g_int_points = mu_g_int_points
+ self.mu_g2_int_points = var_g_int_points + mu_g_int_points**2
+
+ def predictive_intensity_function(self, X_eval):
+ """Computes the predictive intensity function at X_eval by Gaussian
+ quadrature.
+
+ :param X_eval: numpy.ndarray [num_points_eval x D]
+ Points where the intensity function should be evaluated.
+
+ :returns:
+ numpy.ndarray [num_points]: mean of predictive posterior intensity
+ numpy.ndarray [num_points]: variance of predictive posterior
+ intensity
+ """
+ num_preds = X_eval.shape[0]
+ mu_pred, var_pred = self.predictive_posterior_GP(X_eval)
+
+ mean_lmbda_pred, var_lmbda_pred = numpy.empty(num_preds), numpy.empty(num_preds)
+
+ mean_lmbda_q1 = self.lmbda_star_q1
+ var_lmbda_q1 = self.alpha_q1 / (self.beta_q1**2)
+ mean_lmbda_q1_squared = var_lmbda_q1 + mean_lmbda_q1**2
+
+ for ipred in range(num_preds):
+ mu, std = mu_pred[ipred], numpy.sqrt(var_pred[ipred])
+ func1 = (
+ lambda g_pred: 1.0
+ / (1.0 + numpy.exp(-g_pred))
+ * numpy.exp(-0.5 * (g_pred - mu) ** 2 / std**2)
+ / numpy.sqrt(2.0 * numpy.pi * std**2)
+ )
+ a, b = mu - 10.0 * std, mu + 10.0 * std
+ mean_lmbda_pred[ipred] = (
+ mean_lmbda_q1 * quadrature(func1, a, b, maxiter=500)[0]
+ )
+ func2 = (
+ lambda g_pred: (1.0 / (1.0 + numpy.exp(-g_pred))) ** 2
+ * numpy.exp(-0.5 * (g_pred - mu) ** 2 / std**2)
+ / numpy.sqrt(2.0 * numpy.pi * std**2)
+ )
+ a, b = mu - 10.0 * std, mu + 10.0 * std
+ mean_lmbda_pred_squared = (
+ mean_lmbda_q1_squared * quadrature(func2, a, b, maxiter=500)[0]
+ )
+ var_lmbda_pred[ipred] = (
+ mean_lmbda_pred_squared - mean_lmbda_pred[ipred] ** 2
+ )
+
+ return mean_lmbda_pred, var_lmbda_pred
+
+ def sample_posterior(self, X_test, num_samples=1):
+ """Samples log predictive likelihood for test set from posterior.
+
+ :param X_test: [num_X_test x D]
+ Observations in test set.
+ :param num_samples: int
+ How many samples of the intensity function should be drawn from
+ the posterior. (Default=1e4)
+
+ :return: numpy.ndarray [num_samples]
+ Returns the array of sampled likelihoods.
+ """
+
+ num_events = X_test.shape[0]
+ num_samples = int(num_samples)
+ X = numpy.concatenate([X_test, self.integration_points])
+ K = self.cov_func(X, X)
+ kx = self.cov_func(X, self.induced_points)
+ kappa = kx.dot(self.Ks_inv)
+ Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T))
+ mu_post = kappa.dot(self.mu_g_s)
+ L_post = numpy.linalg.cholesky(
+ Sigma_post + self.noise * numpy.eye(Sigma_post.shape[0])
+ )
+
+ num_points = X.shape[0]
+ num_hundreds = int(num_samples)
+ pred_log_likelihood = numpy.empty([num_samples])
+
+ samples = []
+ # samples hundred instances at a time
+ for ihundreds in range(num_hundreds):
+ rand_nums = numpy.random.randn(num_points, 1)
+ g_sample = mu_post[:, None] + L_post.dot(rand_nums)
+ lmbda_max_sample = numpy.random.gamma(
+ shape=self.alpha_q1, scale=1.0 / self.beta_q1, size=1
+ )
+ lmbda_sample = lmbda_max_sample / (1.0 + numpy.exp(-g_sample))
+ samples.append(lmbda_sample)
+ return samples
+
+ def predictive_log_likelihood(self, X_test, num_samples=1e4):
+ """Samples log predictive likelihood for test set from posterior.
+
+ :param X_test: [num_X_test x D]
+ Observations in test set.
+ :param num_samples: int
+ How many samples of the intensity function should be drawn from
+ the posterior. (Default=1e4)
+
+ :return: numpy.ndarray [num_samples]
+ Returns the array of sampled likelihoods.
+ """
+
+ num_events = X_test.shape[0]
+ num_samples = int(num_samples)
+ X = numpy.concatenate([X_test, self.integration_points])
+ K = self.cov_func(X, X)
+ kx = self.cov_func(X, self.induced_points)
+ kappa = kx.dot(self.Ks_inv)
+ Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T))
+ mu_post = kappa.dot(self.mu_g_s)
+ L_post = numpy.linalg.cholesky(
+ Sigma_post + self.noise * numpy.eye(Sigma_post.shape[0])
+ )
+
+ num_points = X.shape[0]
+ num_hundreds = int(num_samples / 1e2)
+ pred_log_likelihood = numpy.empty([num_samples])
+
+ # samples hundred instances at a time
+ for ihundreds in range(num_hundreds):
+ rand_nums = numpy.random.randn(num_points, 100)
+ g_sample = mu_post[:, None] + L_post.dot(rand_nums)
+ lmbda_max_sample = numpy.random.gamma(
+ shape=self.alpha_q1, scale=1.0 / self.beta_q1, size=100
+ )
+ lmbda_sample = lmbda_max_sample / (1.0 + numpy.exp(-g_sample))
+
+ pred_log_likelihood[ihundreds * 100 : (ihundreds + 1) * 100] = numpy.sum(
+ numpy.log(lmbda_sample[:num_events]), axis=0
+ )
+ pred_log_likelihood[ihundreds * 100 : (ihundreds + 1) * 100] -= (
+ numpy.mean(lmbda_sample[num_events:], axis=0) * self.R
+ )
+
+ return pred_log_likelihood
+
+ def expanded_predictive_log_likelihood(self, X_test):
+ """Fast approximation for log predictive test likelihood (Eq. 33 in
+ paper).
+
+ :param X_test: [num_X_test x D]
+ Observations in test set.
+
+ :return: float
+ Approximation of log predictive test likelihood.
+ """
+ self.update_predictive_posterior(only_int_points=True)
+ N = X_test.shape[0]
+ ks_x_test = self.cov_func(self.induced_points, X_test)
+ mu_g_X_test = ks_x_test.T.dot(self.Ks_inv.dot(self.mu_g_s))
+ u_mean = (
+ -self.lmbda_star_q1
+ * numpy.mean(1.0 / (1.0 + numpy.exp(-self.mu_g_int_points)))
+ * self.R
+ - numpy.sum(numpy.log(1.0 + numpy.exp(-mu_g_X_test)))
+ + N * numpy.log(self.lmbda_star_q1)
+ )
+
+ log_pred_likelihood = u_mean
+ du_dg = numpy.empty(N + self.num_integration_points)
+ du_dg[:N] = 1.0 / (1.0 + numpy.exp(mu_g_X_test))
+ du_dg[N:] = (
+ -self.lmbda_star_q1
+ / (1.0 + numpy.exp(-self.mu_g_int_points))
+ * (1.0 - 1.0 / (1.0 + numpy.exp(-self.mu_g_int_points)))
+ / self.num_integration_points
+ * self.R
+ )
+ du_dg2 = numpy.empty(N + self.num_integration_points)
+ du_dg2[:N] = -(1.0 - 1.0 / (1.0 + numpy.exp(mu_g_X_test))) / (
+ 1.0 + numpy.exp(mu_g_X_test)
+ )
+ du_dg2[N:] = (
+ -self.lmbda_star_q1
+ / (1.0 + numpy.exp(-self.mu_g_int_points))
+ * (1.0 - 1.0 / (1.0 + numpy.exp(-self.mu_g_int_points)))
+ * (1.0 - 2.0 / (1.0 + numpy.exp(-self.mu_g_int_points)))
+ / self.num_integration_points
+ * self.R
+ )
+
+ du_dlambda = (
+ -self.R * numpy.mean(1.0 / (1.0 + numpy.exp(-self.mu_g_int_points)))
+ + N / self.lmbda_star_q1
+ )
+ du_dlmbda2 = -N / self.lmbda_star_q1**2
+
+ C = numpy.empty(
+ [N + self.num_integration_points, N + self.num_integration_points]
+ )
+ inner_matrix = self.Ks_inv.dot(
+ numpy.identity(self.num_inducing_points) - self.Sigma_g_s.dot(self.Ks_inv)
+ )
+
+ K_X = self.cov_func(X_test, X_test) + self.noise * numpy.identity(
+ X_test.shape[0]
+ )
+
+ C[:N, :N] = K_X - ks_x_test.T.dot(inner_matrix.dot(ks_x_test))
+ del K_X
+ K_int_points = self.cov_func(
+ self.integration_points, self.integration_points
+ ) + self.noise * numpy.identity(self.integration_points.shape[0])
+
+ C[N:, N:] = K_int_points - self.ks_int_points.T.dot(
+ inner_matrix.dot(self.ks_int_points)
+ )
+ del K_int_points
+
+ K_X_int_points = self.cov_func(self.integration_points, X_test)
+ C[N:, :N] = K_X_int_points - self.ks_int_points.T.dot(
+ inner_matrix.dot(ks_x_test)
+ )
+ del K_X_int_points
+
+ C[:N, N:] = C[N:, :N].T
+
+ log_pred_likelihood_corr = (
+ 0.5 * numpy.trace(C.dot(numpy.diag(du_dg2) + numpy.outer(du_dg, du_dg)))
+ + 0.5 * (du_dlmbda2 + du_dlambda**2) * self.alpha_q1 / self.beta_q1**2
+ )
+ log_pred_likelihood += log_pred_likelihood_corr
+
+ return log_pred_likelihood
diff --git a/stpy/borel_set.py b/stpy/borel_set.py
index 2760ffc..2440e4f 100644
--- a/stpy/borel_set.py
+++ b/stpy/borel_set.py
@@ -6,299 +6,314 @@
from stpy.helpers.helper import cartesian
-class BorelSet():
-
- def __init__(self, d, bounds):
- self.d = d
- self.bounds = bounds
- self.calculate_volume()
- self.type = "box"
-
- def description(self):
- return self.bounds
-
- def calculate_volume(self):
- self.vol = 1.
- for i in range(self.d):
- self.vol = self.vol * (self.bounds[i, 1] - self.bounds[i, 0])
-
- def volume(self):
- return self.vol
-
- def center_point(self):
- return (self.bounds[:, 1] + self.bounds[:, 0]) / 2
-
- def perimeter(self):
- cir = 0.
- for i in range(self.d):
- cir += 2 * (self.bounds[i, 1] - self.bounds[i, 0])
- return cir
-
- def uniform_sample(self, n):
- sample = torch.zeros(n, self.d).double()
- for i in range(self.d):
- sample_i = torch.from_numpy(np.random.uniform(self.bounds[i, 0], self.bounds[i, 1], n))
- sample[:, i] = sample_i
- return sample
-
- def return_legendre_discretization(self, n):
- nodes, weights = np.polynomial.legendre.leggauss(n)
- nodes_arr = []
- weights_arr = []
- for i in range(self.d):
- a, b = float(self.bounds[i, 0]), float(self.bounds[i, 1])
- nodes = nodes * (b - a) / 2. + (a + b) / 2.
- nodes_arr.append(nodes)
- weights_arr.append(weights * 0.5 * (b - a))
-
- nodes = cartesian(nodes_arr)
- weights = cartesian(weights_arr)
- return torch.prod(torch.from_numpy(weights), dim=1), torch.from_numpy(nodes)
-
- def return_discretization(self, n, offsets=None):
- dis = []
- for i in range(self.d):
- if offsets is None:
- x = np.linspace(self.bounds[i, 0], self.bounds[i, 1], n)
- else:
- x = np.linspace(self.bounds[i, 0] - offsets[i], self.bounds[i, 1] + offsets[i], n)
- dis.append(x)
- r = cartesian(dis)
- r = torch.from_numpy(r)
- return r
-
- def inside(self, set):
- """
- Tests if set is inside this set
- :param set:
- :return:
- """
- for i in range(self.d):
- if self.bounds[i, 0] > set.bounds[i, 0] or self.bounds[i, 1] < set.bounds[i, 1]:
- return False
- return True
-
- def is_inside(self, x):
- """
- :param x: (n,d) to check if a<=x set.bounds[i, 0]
+ or self.bounds[i, 1] < set.bounds[i, 1]
+ ):
+ return False
+ return True
+
+ def is_inside(self, x):
+ """
+ :param x: (n,d) to check if a<=x self.center[i] - self.radius or set.bounds[i, 1] < self.center[i] - self.radius:
- return False
- ## set is round
- else:
- if (self.center - set.center) ** 2 > self.radius ** 2:
- return False
- return True
-
- def is_inside(self, x):
- """
- :param x: (n,d) to check if a<=x= (x - torch.tile(self.center, (n, 1))) ** 2
- return mask
+ def __init__(self, d, center, radius):
+ self.d = d
+ self.center = center
+ self.radius = radius
+ self.calculate_volume()
+ self.type = "round"
+
+ def calculate_volume(self):
+ self.vol = (
+ (self.radius**self.d)
+ * np.pi ** (self.d // 2)
+ / (scipy.special.gamma(self.d // 2 + 1))
+ )
+
+ def description(self):
+ return self.center, self.radius
+
+ def return_discretization(self, n):
+ if self.d == 1:
+ dis = []
+ x = np.linspace(self.center - self.radius, self.center + self.radius, n)
+ dis.append(x)
+ r = cartesian(dis)
+ r = torch.tensor(r)
+ return r
+
+ elif self.d == 2:
+
+ p, w = np.polynomial.legendre.leggauss(n)
+ mu = np.arange(1, n + 1)
+ points = np.array(
+ [
+ np.tile(self.radius * np.cos(mu * np.pi / (n + 1)), n),
+ np.outer(p, self.radius * np.sin(mu * np.pi / (n + 1))).flatten(),
+ ]
+ ).T
+ points[:, 0] += float(self.center[0])
+ points[:, 1] += float(self.center[1])
+
+ # k = n - 2
+ # theta = 2 * np.pi * np.arange(1, k + 2) / (k + 1)
+ # p, w = np.polynomial.legendre.leggauss(n + 1)
+ # # scale points to [r0, r1] (where r0 = 0, r1 = 1 for now)
+ # p = np.sqrt(0.5 * (p + 1.0))
+ # p_theta = np.dstack(np.meshgrid(p, theta)).reshape(-1, 2).T
+ # points = np.array(
+ # [p_theta[0] * self.radius * np.cos(p_theta[1]), p_theta[0] * self.radius * np.sin(p_theta[1])]
+ # ).T
+ # points[:,0] += float(self.center[0])
+ # points[:,1] += float(self.center[1])
+ #
+ # points = np.concatenate((points,self.center.view(-1,self.d).numpy()))
+
+ return torch.tensor(points)
+
+ def return_legendre_discretization(self, n):
+ if self.d == 2:
+ p, w = np.polynomial.legendre.leggauss(n)
+ mu = np.arange(1, n + 1)
+ points = np.array(
+ [
+ np.tile(self.radius * np.cos(mu * np.pi / (n + 1)), n),
+ np.outer(p, self.radius * np.sin(mu * np.pi / (n + 1))).flatten(),
+ ]
+ ).T
+ points[:, 0] += float(self.center[0])
+ points[:, 1] += float(self.center[1])
+ weights = np.outer(w, np.sin(mu * np.pi / (n + 1)) ** 2).flatten() / (n + 1)
+ return torch.tensor(weights), torch.tensor(points)
+ else:
+ raise AssertionError("Wrong type of set considered.")
+
+ def inside(self, set):
+ """
+ Tests if set is inside this set
+ :param set:
+ :return:
+ """
+
+ ## the tested set is box
+ if set.type == "box":
+ for i in range(self.d):
+ if (
+ set.bounds[i, 0] > self.center[i] - self.radius
+ or set.bounds[i, 1] < self.center[i] - self.radius
+ ):
+ return False
+ ## set is round
+ else:
+ if (self.center - set.center) ** 2 > self.radius**2:
+ return False
+ return True
+
+ def is_inside(self, x):
+ """
+ :param x: (n,d) to check if a<=x= (x - torch.tile(self.center, (n, 1))) ** 2
+ return mask
class Node(BorelSet):
- def __init__(self, d, bounds, parent):
- super().__init__(d, bounds)
- self.left = None
- self.right = None
- self.children = None
- self.parent = parent
-
- if self.parent is None:
- self.level = 1
- else:
- self.level = parent.level + 1
-
-
-class HierarchicalBorelSets():
-
- def __init__(self, d, interval, levels):
- if d == 1:
- self.top_node = Node(d, torch.Tensor([interval]), None)
- elif d == 2:
- self.top_node = Node(d, torch.Tensor(interval), None)
-
- self.Sets = [self.top_node]
- self.levels = levels
- if d == 1:
- self.construct_1d(interval, levels, self.Sets, self.top_node)
- else:
- self.construct_2d(self.top_node.bounds, levels, self.Sets, self.top_node)
- self.d = d
-
- def get_parent_set(self):
- return self.top_node
-
- def get_sets_level(self, l):
- out = []
- for s in self.Sets:
- if s.level == l:
- out.append(s)
- return out
-
- def get_all_sets(self):
- return self.Sets
-
- def get_ball_coverings(self, n, radius='auto'):
- D = self.get_parent_set()
- centers = D.return_discretization(n)
- n = centers.size()[0]
- sets = []
- for i in range(n):
- if radius == 'auto':
- sets.append(BallSet(D.d, centers[i, :], 2. / n))
- else:
- sets.append(BallSet(D.d, centers[i, :], radius))
- return sets
-
- def construct_1d(self, interval, levels, S, parent):
-
- if levels > 1:
- a, b = interval
- c = (a + b) / 2.
-
- S_1 = Node(1, torch.Tensor([[a, c]]), parent)
- S_2 = Node(1, torch.Tensor([[c, b]]), parent)
-
- parent.left = S_1
- parent.right = S_2
-
- S.append(S_1)
- self.construct_1d((a, c), levels - 1, S, S_1)
- S.append(S_2)
- self.construct_1d((c, b), levels - 1, S, S_2)
-
- else:
- return None
-
- def construct_2d(self, interval, levels, S, parent):
- if levels > 1:
- xa = interval[0, 0]
- xb = interval[0, 1]
- ya = interval[1, 0]
- yb = interval[1, 1]
-
- midx = xa + (xb - xa) / 2.
- midy = ya + (yb - ya) / 2.
-
- S1 = Node(2, torch.Tensor([[xa, midx], [ya, midy]]), parent)
- S2 = Node(2, torch.Tensor([[xa, midx], [midy, yb]]), parent)
- S3 = Node(2, torch.Tensor([[midx, xb], [ya, midy]]), parent)
- S4 = Node(2, torch.Tensor([[midx, xb], [midy, yb]]), parent)
-
- parent.children = [S1, S2, S3, S4]
-
- for child in parent.children:
- S.append(child)
- self.construct_2d(child.bounds, levels - 1, S, child)
- else:
- return None
+ def __init__(self, d, bounds, parent):
+ super().__init__(d, bounds)
+ self.left = None
+ self.right = None
+ self.children = None
+ self.parent = parent
+
+ if self.parent is None:
+ self.level = 1
+ else:
+ self.level = parent.level + 1
+
+
+class HierarchicalBorelSets:
+
+ def __init__(self, d, interval, levels):
+ if d == 1:
+ self.top_node = Node(d, torch.tensor([interval]), None)
+ elif d == 2:
+ self.top_node = Node(d, torch.tensor(interval), None)
+
+ self.Sets = [self.top_node]
+ self.levels = levels
+ if d == 1:
+ self.construct_1d(interval, levels, self.Sets, self.top_node)
+ else:
+ self.construct_2d(self.top_node.bounds, levels, self.Sets, self.top_node)
+ self.d = d
+
+ def get_parent_set(self):
+ return self.top_node
+
+ def get_sets_level(self, l):
+ out = []
+ for s in self.Sets:
+ if s.level == l:
+ out.append(s)
+ return out
+
+ def get_all_sets(self):
+ return self.Sets
+
+ def get_ball_coverings(self, n, radius="auto"):
+ D = self.get_parent_set()
+ centers = D.return_discretization(n)
+ n = centers.size()[0]
+ sets = []
+ for i in range(n):
+ if radius == "auto":
+ sets.append(BallSet(D.d, centers[i, :], 2.0 / n))
+ else:
+ sets.append(BallSet(D.d, centers[i, :], radius))
+ return sets
+
+ def construct_1d(self, interval, levels, S, parent):
+
+ if levels > 1:
+ a, b = interval
+ c = (a + b) / 2.0
+
+ S_1 = Node(1, torch.tensor([[a, c]]), parent)
+ S_2 = Node(1, torch.tensor([[c, b]]), parent)
+
+ parent.left = S_1
+ parent.right = S_2
+
+ S.append(S_1)
+ self.construct_1d((a, c), levels - 1, S, S_1)
+ S.append(S_2)
+ self.construct_1d((c, b), levels - 1, S, S_2)
+
+ else:
+ return None
+
+ def construct_2d(self, interval, levels, S, parent):
+ if levels > 1:
+ xa = interval[0, 0]
+ xb = interval[0, 1]
+ ya = interval[1, 0]
+ yb = interval[1, 1]
+
+ midx = xa + (xb - xa) / 2.0
+ midy = ya + (yb - ya) / 2.0
+
+ S1 = Node(2, torch.tensor([[xa, midx], [ya, midy]]), parent)
+ S2 = Node(2, torch.tensor([[xa, midx], [midy, yb]]), parent)
+ S3 = Node(2, torch.tensor([[midx, xb], [ya, midy]]), parent)
+ S4 = Node(2, torch.tensor([[midx, xb], [midy, yb]]), parent)
+
+ parent.children = [S1, S2, S3, S4]
+
+ for child in parent.children:
+ S.append(child)
+ self.construct_2d(child.bounds, levels - 1, S, child)
+ else:
+ return None
if __name__ == "__main__":
- center = torch.Tensor([0.5, 0.5]).double()
- radius = 0.1
- d = 2
- B = BallSet(d, center, radius)
-
- weights, xtest = B.return_legendre_discretization(10)
- xtest2 = B.return_discretization(10)
- print(torch.sum(weights))
- plt.plot(xtest[:, 0], xtest[:, 1], 'ko')
- plt.plot(xtest2[:, 0], xtest2[:, 1], 'ro')
- plt.show()
+ center = torch.tensor([0.5, 0.5]).double()
+ radius = 0.1
+ d = 2
+ B = BallSet(d, center, radius)
+
+ weights, xtest = B.return_legendre_discretization(10)
+ xtest2 = B.return_discretization(10)
+ print(torch.sum(weights))
+ plt.plot(xtest[:, 0], xtest[:, 1], "ko")
+ plt.plot(xtest2[:, 0], xtest2[:, 1], "ro")
+ plt.show()
diff --git a/stpy/candidate_set.py b/stpy/candidate_set.py
index 663a2aa..e3333e7 100644
--- a/stpy/candidate_set.py
+++ b/stpy/candidate_set.py
@@ -2,60 +2,61 @@
import torch
-class CandidateSet():
+class CandidateSet:
+
+ def __init__(self):
+ pass
- def __init__(self):
- pass
class CandidateDiscreteSet(CandidateSet):
- def __init__(self, xtest):
- super().__init__()
- self.xtest = xtest
- self.embedded = False
-
- def get_set_size(self):
- return self.xtest.size()[0]
-
- def get_dim(self):
- return self.xtest.size()[1]
-
- def get_emb_dim(self):
- if self.embedded:
- return self.emb_xtest.size()[1]
- else:
- return self.xtest.size()[1]
-
- def get_random_elements(self, size = 1):
- n = self.get_set_size()
- indices = np.random.choice(np.arange(0,n,1), size)
- print (indices)
- if self.embedded:
- elem = self.emb_xtest[indices, :]
- else:
- elem = self.xtest[indices,:]
- print (elem)
- return elem
-
- def debug_subsample(self):
- self.xtest = self.xtest[0:20000,:]
-
- def get_options_per_dim(self):
- d = {}
- dims = self.get_dim()
- for i in range(dims):
- d[i] = torch.unique(self.xtest[:,i])
- return d
-
- def get_options(self):
- if self.embedded:
- return self.emb_xtest
- else:
- return self.xtest
-
- def get_options_raw(self):
- return self.xtest
-
- def use_embedding(self, embed):
- self.embedded = True
- self.emb_xtest = embed(self.xtest)
+ def __init__(self, xtest):
+ super().__init__()
+ self.xtest = xtest
+ self.embedded = False
+
+ def get_set_size(self):
+ return self.xtest.size()[0]
+
+ def get_dim(self):
+ return self.xtest.size()[1]
+
+ def get_emb_dim(self):
+ if self.embedded:
+ return self.emb_xtest.size()[1]
+ else:
+ return self.xtest.size()[1]
+
+ def get_random_elements(self, size=1):
+ n = self.get_set_size()
+ indices = np.random.choice(np.arange(0, n, 1), size)
+ print(indices)
+ if self.embedded:
+ elem = self.emb_xtest[indices, :]
+ else:
+ elem = self.xtest[indices, :]
+ print(elem)
+ return elem
+
+ def debug_subsample(self):
+ self.xtest = self.xtest[0:20000, :]
+
+ def get_options_per_dim(self):
+ d = {}
+ dims = self.get_dim()
+ for i in range(dims):
+ d[i] = torch.unique(self.xtest[:, i])
+ return d
+
+ def get_options(self):
+ if self.embedded:
+ return self.emb_xtest
+ else:
+ return self.xtest
+
+ def get_options_raw(self):
+ return self.xtest
+
+ def use_embedding(self, embed):
+ self.embedded = True
+ self.emb_xtest = embed(self.xtest)
diff --git a/stpy/continuous_processes/categorical_mixture.py b/stpy/continuous_processes/categorical_mixture.py
index c22bff8..8dc3689 100755
--- a/stpy/continuous_processes/categorical_mixture.py
+++ b/stpy/continuous_processes/categorical_mixture.py
@@ -8,179 +8,188 @@
class CategoricalMixture(GaussianProcess):
- def __init__(self, processes, init_weights=None, d=1, bounds=None):
- if init_weights is None:
- self.k = len(processes)
- init_weights = torch.ones(size=(self.k, 1)).view(-1).double() * 1. / float(self.k)
- else:
- self.k = len(processes)
-
- if len(processes) != init_weights.shape[0]:
- raise AssertionError("Not the same number")
-
- self.processes = processes
- self.bounds = bounds
- self.beta = 2.
- self.d = d
- self.x = None
- self.y = None
- self.init_weights = init_weights
- if torch.sum(self.init_weights) > 1.:
- self.init_weights = self.init_weights / torch.sum(self.init_weights)
- self.weights = self.init_weights
-
- def add_data_point(self, x, y):
- for model in self.processes:
- model.add_data_point(x, y)
-
- def log_prob_normal(self, K, y):
- Knumpy = K.detach().numpy()
- ynumpy = y.detach().numpy()
-
- decomp = scipy.linalg.lu_factor(Knumpy)
- alpha = scipy.linalg.lu_solve(decomp, ynumpy)
-
- logprob = -0.5 * ynumpy.T.dot(alpha) - 0.5 * np.linalg.slogdet(Knumpy)[1] - 0.5 * ynumpy.shape[0] * np.log(
- 2 * np.pi)
-
- return float(logprob)
-
- def fit_gp(self, x, y, iterative=False):
- self.x = x
- self.y = y
-
- logprobs = torch.zeros(size=(self.k, 1)).view(-1).double()
-
- for j in range(self.k):
- GP = self.processes[j]
- GP.fit(x, y)
- K = GP.get_kernel()
- logprobs[j] = self.log_prob_normal(K, y)
-
- # print("Neg. log likelihood vector:", -logprobs)
-
- log_init_prob = torch.log(self.init_weights)
- log_posterior = log_init_prob + logprobs
- log_evidence = torch.logsumexp(log_posterior, dim=0)
- self.weights = torch.exp(log_posterior - log_evidence)
-
- # print ("Categorical Probability: ",self.weights)
- # print ("---------------------------------")
-
- self.fit = True
- return True
-
- def mean_std(self, xtest):
- mu = torch.zeros(size=(xtest.size()[0], 1)).double()
- s = torch.zeros(size=(xtest.size()[0], 1)).double()
- for j in range(self.k):
- (a1, a2) = self.processes[j].mean_std(xtest)
-
- mu = mu + self.weights[j] * a1
- s = s + self.weights[j] * a2 ** 2
- s = torch.sqrt(s)
- return (mu, s)
-
- def sample(self, xtest, size=1, with_mask=False):
- # sample a GP
- k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten())
- mask = [k]
- if self.fit == True:
- self.processes[k].fit(self.x, self.y)
- samples = self.processes[k].sample(xtest, size=1)
- else:
- samples = self.processes[k].sample(xtest, size=1)
-
- for s in range(size - 1):
- k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten())
- mask.append(k)
- if self.fit == True:
- self.processes[k].fit(self.x, self.y)
- sample = self.processes[k].sample(xtest, size=1)
- samples = torch.cat((samples, sample), dim=1)
- else:
- sample = self.processes[k].sample(xtest, size=1)
- samples = torch.cat((samples, sample), dim=1)
- if with_mask == True:
- return (samples, mask)
- else:
- return samples
+ def __init__(self, processes, init_weights=None, d=1, bounds=None):
+ if init_weights is None:
+ self.k = len(processes)
+ init_weights = (
+ torch.ones(size=(self.k, 1)).view(-1).double() * 1.0 / float(self.k)
+ )
+ else:
+ self.k = len(processes)
+
+ if len(processes) != init_weights.shape[0]:
+ raise AssertionError("Not the same number")
+
+ self.processes = processes
+ self.bounds = bounds
+ self.beta = 2.0
+ self.d = d
+ self.x = None
+ self.y = None
+ self.init_weights = init_weights
+ if torch.sum(self.init_weights) > 1.0:
+ self.init_weights = self.init_weights / torch.sum(self.init_weights)
+ self.weights = self.init_weights
+
+ def add_data_point(self, x, y):
+ for model in self.processes:
+ model.add_data_point(x, y)
+
+ def log_prob_normal(self, K, y):
+ Knumpy = K.detach().numpy()
+ ynumpy = y.detach().numpy()
+
+ decomp = scipy.linalg.lu_factor(Knumpy)
+ alpha = scipy.linalg.lu_solve(decomp, ynumpy)
+
+ logprob = (
+ -0.5 * ynumpy.T.dot(alpha)
+ - 0.5 * np.linalg.slogdet(Knumpy)[1]
+ - 0.5 * ynumpy.shape[0] * np.log(2 * np.pi)
+ )
+
+ return float(logprob)
+
+ def fit_gp(self, x, y, iterative=False):
+ self.x = x
+ self.y = y
+
+ logprobs = torch.zeros(size=(self.k, 1)).view(-1).double()
+
+ for j in range(self.k):
+ GP = self.processes[j]
+ GP.fit(x, y)
+ K = GP.get_kernel()
+ logprobs[j] = self.log_prob_normal(K, y)
+
+ # print("Neg. log likelihood vector:", -logprobs)
+
+ log_init_prob = torch.log(self.init_weights)
+ log_posterior = log_init_prob + logprobs
+ log_evidence = torch.logsumexp(log_posterior, dim=0)
+ self.weights = torch.exp(log_posterior - log_evidence)
+
+ # print ("Categorical Probability: ",self.weights)
+ # print ("---------------------------------")
+
+ self.fit = True
+ return True
+
+ def mean_std(self, xtest):
+ mu = torch.zeros(size=(xtest.size()[0], 1)).double()
+ s = torch.zeros(size=(xtest.size()[0], 1)).double()
+ for j in range(self.k):
+ (a1, a2) = self.processes[j].mean_std(xtest)
+
+ mu = mu + self.weights[j] * a1
+ s = s + self.weights[j] * a2**2
+ s = torch.sqrt(s)
+ return (mu, s)
+
+ def sample(self, xtest, size=1, with_mask=False):
+ # sample a GP
+ k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten())
+ mask = [k]
+ if self.fit == True:
+ self.processes[k].fit(self.x, self.y)
+ samples = self.processes[k].sample(xtest, size=1)
+ else:
+ samples = self.processes[k].sample(xtest, size=1)
+
+ for s in range(size - 1):
+ k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten())
+ mask.append(k)
+ if self.fit == True:
+ self.processes[k].fit(self.x, self.y)
+ sample = self.processes[k].sample(xtest, size=1)
+ samples = torch.cat((samples, sample), dim=1)
+ else:
+ sample = self.processes[k].sample(xtest, size=1)
+ samples = torch.cat((samples, sample), dim=1)
+ if with_mask == True:
+ return (samples, mask)
+ else:
+ return samples
if __name__ == "__main__":
- # domain size
- L_infinity_ball = 5
- # dimension
- d = 1
- # error variance
- s = 0.001
- # grid density
- n = 512
- # number of intial points
- N = 15
-
- # model
- # GP1 = GaussianProcess(kernel="squared_exponential", s=s, gamma = 1.5, diameter=L_infinity_ball)
- GP1 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1.5)
- GP2 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=1, gamma=0.7)
- # GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1)
- GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1)
- GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.)
-
- # data
- # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball)
- # GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2., kappa = 1)
- GPTrue = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1.1)
-
- # test environment
-
- d = 1
- from stpy.test_functions.benchmarks import GaussianProcessSample
-
- BenchmarkFunc = GaussianProcessSample(d=d, n=n, sigma=0., gamma=0.2, name="squared_exponential")
- x = BenchmarkFunc.initial_guess(N)
- xtest = BenchmarkFunc.interval(n)
- BenchmarkFunc.optimize(xtest, s)
- gamma = BenchmarkFunc.bandwidth()
- bounds = BenchmarkFunc.bounds()
- BenchmarkFunc.scale_max(xtest=xtest)
- F = lambda x: BenchmarkFunc.eval(x, sigma=s)
-
- # targets
- y = F(x)
- GPs = [GP1, GP2, GP3, GP4]
- # Mix = CategoricalMixture(GPs,init_weights=np.array([0.01,0.01,0.98]))
- Mix = CategoricalMixture(GPs)
-
- for j in range(N):
- plt.figure(1)
- plt.clf()
- X = x[0:j + 1, :].reshape(-1, 1)
- y = F(X)
- Mix.fit_gp(X, y)
- (mu, var) = Mix.mean_std(xtest)
- samples = Mix.sample(xtest, size=5)
- f = F(xtest).numpy()
- mu = mu.numpy()
- var = var.numpy()
- samples = samples.numpy()
- xtest2 = xtest.numpy()
-
- plt.plot(xtest2, samples, '--', linewidth=2, alpha=0.3)
- plt.plot(xtest2, mu, 'k', linewidth=3)
- plt.plot(xtest2, mu, 'k', linewidth=3)
- plt.fill_between(xtest2.flat, (mu - 2 * var).flat, (mu + 2 * var).flat, color="#dddddd")
- plt.plot(X, y, 'ro', markersize=10)
- plt.plot(xtest2, f, 'g', linewidth=3)
- plt.draw()
-
- plt.figure(2)
- plt.clf()
- plt.title("Probability of Category")
- plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs)) * 0.5)
- plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30)
- plt.subplots_adjust(bottom=0.35)
- plt.plot()
- plt.show()
+ # domain size
+ L_infinity_ball = 5
+ # dimension
+ d = 1
+ # error variance
+ s = 0.001
+ # grid density
+ n = 512
+ # number of intial points
+ N = 15
+
+ # model
+ # GP1 = GaussianProcess(kernel="squared_exponential", s=s, gamma = 1.5, diameter=L_infinity_ball)
+ GP1 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1.5)
+ GP2 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=1, gamma=0.7)
+ # GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1)
+ GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1)
+ GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.0)
+
+ # data
+ # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball)
+ # GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2., kappa = 1)
+ GPTrue = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1.1)
+
+ # test environment
+
+ d = 1
+ from stpy.test_functions.benchmarks import GaussianProcessSample
+
+ BenchmarkFunc = GaussianProcessSample(
+ d=d, n=n, sigma=0.0, gamma=0.2, name="squared_exponential"
+ )
+ x = BenchmarkFunc.initial_guess(N)
+ xtest = BenchmarkFunc.interval(n)
+ BenchmarkFunc.optimize(xtest, s)
+ gamma = BenchmarkFunc.bandwidth()
+ bounds = BenchmarkFunc.bounds()
+ BenchmarkFunc.scale_max(xtest=xtest)
+ F = lambda x: BenchmarkFunc.eval(x, sigma=s)
+
+ # targets
+ y = F(x)
+ GPs = [GP1, GP2, GP3, GP4]
+ # Mix = CategoricalMixture(GPs,init_weights=np.array([0.01,0.01,0.98]))
+ Mix = CategoricalMixture(GPs)
+
+ for j in range(N):
+ plt.figure(1)
+ plt.clf()
+ X = x[0 : j + 1, :].reshape(-1, 1)
+ y = F(X)
+ Mix.fit_gp(X, y)
+ (mu, var) = Mix.mean_std(xtest)
+ samples = Mix.sample(xtest, size=5)
+ f = F(xtest).numpy()
+ mu = mu.numpy()
+ var = var.numpy()
+ samples = samples.numpy()
+ xtest2 = xtest.numpy()
+
+ plt.plot(xtest2, samples, "--", linewidth=2, alpha=0.3)
+ plt.plot(xtest2, mu, "k", linewidth=3)
+ plt.plot(xtest2, mu, "k", linewidth=3)
+ plt.fill_between(
+ xtest2.flat, (mu - 2 * var).flat, (mu + 2 * var).flat, color="#dddddd"
+ )
+ plt.plot(X, y, "ro", markersize=10)
+ plt.plot(xtest2, f, "g", linewidth=3)
+ plt.draw()
+
+ plt.figure(2)
+ plt.clf()
+ plt.title("Probability of Category")
+ plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs)) * 0.5)
+ plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30)
+ plt.subplots_adjust(bottom=0.35)
+ plt.plot()
+ plt.show()
# plt.pause(4)
diff --git a/stpy/continuous_processes/convex_rkhs.py b/stpy/continuous_processes/convex_rkhs.py
index 30f3775..a3df5e1 100644
--- a/stpy/continuous_processes/convex_rkhs.py
+++ b/stpy/continuous_processes/convex_rkhs.py
@@ -2,17 +2,21 @@
import torch
from torchmin import minimize
from stpy.candidate_set import CandidateDiscreteSet
-from stpy.generative_models.conditional_generative_model import ConditionalGenerativeModel
+from stpy.generative_models.conditional_generative_model import (
+ ConditionalGenerativeModel,
+)
+
+
class ConvexRKHS(KernelizedFeatures):
- """
- """
+ """ """
- def __init__(self, embedding, m, lam = 0. , s = 0.01):
+ def __init__(self, embedding, m, lam=0.0, s=0.01):
super().__init__(embedding, m)
self.Gamma = torch.eye(m, requires_grad=True).double()
self.lam = lam
self.s = s
- def fit(self,x=None,y=None):
+
+ def fit(self, x=None, y=None):
"""
legacy method
:param x:
@@ -20,32 +24,53 @@ def fit(self,x=None,y=None):
:return:
"""
pass
+
def weight_scaling(self, Gamma, scale, x_single, y, Phi):
x = torch.tile(x_single, (y.size()[0], 1))
- return torch.exp(-torch.sum(((Phi(x) - Phi(y)) @ Gamma /scale) ** 2, axis=1))
+ return torch.exp(-torch.sum(((Phi(x) - Phi(y)) @ Gamma / scale) ** 2, axis=1))
def local_fit(self, weights):
D = torch.diag(weights)
X = self.embed(self.x)
- theta = torch.linalg.inv((X.T @ D @ X) + self.lam * torch.eye(self.m)) @ X.T @ D @ self.y
+ theta = (
+ torch.linalg.inv((X.T @ D @ X) + self.lam * torch.eye(self.m))
+ @ X.T
+ @ D
+ @ self.y
+ )
return theta
- def optimize_params(self, type='bandwidth', restarts=10, regularizer=None,
- maxiter=1000, mingradnorm=1e-4, verbose=False, optimizer="pymanopt", scale=1., weight=1., save = False,
- save_name = 'model.np', init_func = None, bounds = None, parallel = False, cores = None):
+
+ def optimize_params(
+ self,
+ type="bandwidth",
+ restarts=10,
+ regularizer=None,
+ maxiter=1000,
+ mingradnorm=1e-4,
+ verbose=False,
+ optimizer="pymanopt",
+ scale=1.0,
+ weight=1.0,
+ save=False,
+ save_name="model.np",
+ init_func=None,
+ bounds=None,
+ parallel=False,
+ cores=None,
+ ):
x_data = self.x
y_data = self.y
Phi = lambda x: self.embedding.embed(x)
m = self.get_basis_size()
-
def total_loss(gamma):
weights = []
predictions = []
for i in range(x_data.size()[0]):
x = x_data[i]
- Gamma = torch.diag(gamma)
- w = self.weight_scaling(Gamma, 1., x, x_data, Phi)
+ Gamma = torch.diag(gamma)
+ w = self.weight_scaling(Gamma, 1.0, x, x_data, Phi)
X = Phi(x_data)
# local fit in the new coordinates
@@ -61,21 +86,21 @@ def total_loss(gamma):
for p1, w1 in zip(predictions, weights):
# loss that makes sure we predict correctly
- loss = 1* torch.sum(((p1 - y_data) ** 2)/(self.s**2) * (w1)) / 2
+ loss = 1 * torch.sum(((p1 - y_data) ** 2) / (self.s**2) * (w1)) / 2
for p2, w2 in zip(predictions, weights):
# loss that makes sure the predictions are consistent (this can be a larger set)
- loss += 1* torch.sum((p1 - p2)**2/(self.s**2) * (w1 * w2))
+ loss += 1 * torch.sum((p1 - p2) ** 2 / (self.s**2) * (w1 * w2))
- return loss + 0.001*torch.sum(gamma**2)
+ return loss + 0.001 * torch.sum(gamma**2)
# optimize this
vals = []
args = []
for _ in range(restarts):
- gamma = torch.randn(m, requires_grad=True).double()**2
+ gamma = torch.randn(m, requires_grad=True).double() ** 2
total_loss(gamma)
- result = minimize(total_loss, gamma, method='bfgs', disp=2)
+ result = minimize(total_loss, gamma, method="bfgs", disp=2)
vals.append(result.fun)
args.append(result.x)
@@ -83,11 +108,11 @@ def total_loss(gamma):
def mean(self, xtest):
phitest = self.embed(xtest)
- out = torch.zeros(size = (phitest.size()[0],1)).double()
+ out = torch.zeros(size=(phitest.size()[0], 1)).double()
for i, x in enumerate(xtest):
- w = self.weight_scaling(self.Gamma, 1., x, self.x, self.embed)
- out[i] = 0.
- f = self.embed(x)@self.local_fit(w)
+ w = self.weight_scaling(self.Gamma, 1.0, x, self.x, self.embed)
+ out[i] = 0.0
+ f = self.embed(x) @ self.local_fit(w)
out[i] = f
return out
@@ -96,17 +121,17 @@ def best_points_so_far(self):
get all points which are above max - 2*s
:return:
"""
- conservative_best_value = torch.max(self.y) - 2*self.s
+ conservative_best_value = torch.max(self.y) - 2 * self.s
mask = self.y > conservative_best_value
- return self.x[mask,:]
+ return self.x[mask, :]
- def sample_neighbourhood_sample(self, x_loc, candidate_set, cut_off = 0.01, size = 10):
- if isinstance(CandidateDiscreteSet,candidate_set):
+ def sample_neighbourhood_sample(self, x_loc, candidate_set, cut_off=0.01, size=10):
+ if isinstance(CandidateDiscreteSet, candidate_set):
xtest = self.embed(candidate_set.get_options_raw)
- w = self.weight_scaling(self.Gamma, 1., x_loc,xtest, self.embed)
+ w = self.weight_scaling(self.Gamma, 1.0, x_loc, xtest, self.embed)
selection = xtest[w > cut_off]
max_v = selection.size()[0]
- indices = np.random.choice(max_v, size = size)
+ indices = np.random.choice(max_v, size=size)
out = selection[indices]
return out
elif isinstance(ConditionalGenerativeModel, candidate_set):
@@ -115,7 +140,7 @@ def sample_neighbourhood_sample(self, x_loc, candidate_set, cut_off = 0.01, size
NotImplementedError("The requested candidate set method is not implemented")
def func_gradient(self, x):
- w = self.weight_scaling(self.Gamma, 1., x, self.x, self.embed)
+ w = self.weight_scaling(self.Gamma, 1.0, x, self.x, self.embed)
return self.local_fit(weights=w)
@@ -129,23 +154,23 @@ def func_gradient(self, x):
n = 256
N = 4
lam = 1e-6
- gamma_original = torch.randn(size = (embedding.get_m(),)).double()
+ gamma_original = torch.randn(size=(embedding.get_m(),)).double()
xtest = interval_torch(d=1, n=n)
- x = torch.zeros(size =(N,1)).double()
+ x = torch.zeros(size=(N, 1)).double()
x = x.uniform_()
Phi_original = lambda x: embedding.embed(x) @ torch.diag(gamma_original)
Phi = lambda x: embedding.embed(x)
y = torch.sum(Phi_original(x) ** 2, axis=1).view(-1)
- ytest= torch.sum(Phi_original(xtest) ** 2, axis=1).view(-1)
- Estimator = ConvexRKHS(embedding, embedding.get_m(), lam = lam )
- #Estimator = torch.compile(Estimator)
+ ytest = torch.sum(Phi_original(xtest) ** 2, axis=1).view(-1)
+ Estimator = ConvexRKHS(embedding, embedding.get_m(), lam=lam)
+ # Estimator = torch.compile(Estimator)
Estimator.load_data((x, y))
Estimator.optimize_params()
- print ("True gamma:",gamma_original)
- print ("Optimized gamma:", torch.diag(Estimator.Gamma))
+ print("True gamma:", gamma_original)
+ print("Optimized gamma:", torch.diag(Estimator.Gamma))
offset = 20
Phi = lambda x: embedding.embed(x)
fig, ax1 = plt.subplots()
@@ -153,24 +178,31 @@ def func_gradient(self, x):
for i in range(xtest.size()[0]):
x = xtest[i]
- w = Estimator.weight_scaling(Estimator.Gamma, 1., x, xtest, Phi)
+ w = Estimator.weight_scaling(Estimator.Gamma, 1.0, x, xtest, Phi)
D = torch.diag(w)
X = Phi(xtest)
- theta = torch.linalg.inv((X.T@D@X) + lam * torch.eye(embedding.get_m()))@X.T@D@ytest
- prediction = (X@theta).detach()
-
- if i%64 == 0:
- p = ax1.plot(xtest[i],
- prediction[i],'o',ms = 10)
-
- ax1.plot(xtest[np.max([0,i-offset]):np.min([i+offset,n])],
- prediction[np.max([0,i-offset]):np.min([i+offset,n])], color = p[0].get_color())
- ax2.plot(xtest, w, color = p[0].get_color())
+ theta = (
+ torch.linalg.inv((X.T @ D @ X) + lam * torch.eye(embedding.get_m()))
+ @ X.T
+ @ D
+ @ ytest
+ )
+ prediction = (X @ theta).detach()
+
+ if i % 64 == 0:
+ p = ax1.plot(xtest[i], prediction[i], "o", ms=10)
+
+ ax1.plot(
+ xtest[np.max([0, i - offset]) : np.min([i + offset, n])],
+ prediction[np.max([0, i - offset]) : np.min([i + offset, n])],
+ color=p[0].get_color(),
+ )
+ ax2.plot(xtest, w, color=p[0].get_color())
mu = Estimator.mean(xtest)
- ax1.plot(xtest, mu, 'b')
- ax1.plot(xtest,ytest,'k--')
- ax1.plot(Estimator.x,Estimator.y,'ko')
+ ax1.plot(xtest, mu, "b")
+ ax1.plot(xtest, ytest, "k--")
+ ax1.plot(Estimator.x, Estimator.y, "ko")
- plt.show()
\ No newline at end of file
+ plt.show()
diff --git a/stpy/continuous_processes/dirichlet_mixture.py b/stpy/continuous_processes/dirichlet_mixture.py
index 2839b61..cb6a6ff 100755
--- a/stpy/continuous_processes/dirichlet_mixture.py
+++ b/stpy/continuous_processes/dirichlet_mixture.py
@@ -7,117 +7,127 @@
class DirichletMixture(Estimator):
- def __init__(self, processes):
- self.processes = processes
- self.k = len(self.processes)
- self.s = processes[0].s
-
- def fit_GP(self, X, y, xtest=None, N=200):
- self.X = X
- self.y = y
- n = X.shape[0]
- self.fit = True
- return True
-
- def custom_kernel(self, a, b, alpha):
- kernel = alpha[0] * self.processes[0].kernel(a, b)
- for j in np.arange(1, self.k, 1):
- kernel = kernel + alpha[j] * self.processes[j].kernel(a, b)
- return kernel
-
- def mean_var(self, xtest, N=100):
-
- self.K_mix = np.zeros(shape=(n, n))
-
- mu = xtest * 0
- s = xtest * 0
-
- samples = np.zeros(shape=(N, xtest.shape[0], xtest.shape[1]))
-
- for i in range(N):
- alpha = np.random.dirichlet(np.ones(shape=(self.k)) * (1. / float(self.k)), 1)[0]
- print("Dirichlet sample:", alpha)
- kernel = lambda a, b: self.custom_kernel(a, b, alpha)
- GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s)
- GP_mix.fit_GP(self.X, self.y)
- samples[i, :, :] = GP_mix.sample(xtest)
-
- mu = np.mean(samples, axis=0)
- s = np.var(samples, axis=0)
- s = np.sqrt(s)
-
- return (mu, s)
-
- def sample(self, xtest, size=1, with_mask=False):
- # sample a GP
- if self.fit == True:
- alpha = np.random.dirichlet(np.ones(shape=(self.k)) * (1. / float(self.k)), 1)[0]
- kernel = lambda a, b: self.custom_kernel(a, b, alpha)
- GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s)
- GP_mix.fit_GP(self.X, self.y)
- return GP_mix.sample(xtest)
- else:
- alpha = np.random.dirichlet(np.ones(shape=(self.k)) * (1. / float(self.k)), 1)[0]
- kernel = lambda a, b: self.custom_kernel(a, b, alpha)
- GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s)
- return GP_mix.sample(xtest)
+ def __init__(self, processes):
+ self.processes = processes
+ self.k = len(self.processes)
+ self.s = processes[0].s
+
+ def fit_GP(self, X, y, xtest=None, N=200):
+ self.X = X
+ self.y = y
+ n = X.shape[0]
+ self.fit = True
+ return True
+
+ def custom_kernel(self, a, b, alpha):
+ kernel = alpha[0] * self.processes[0].kernel(a, b)
+ for j in np.arange(1, self.k, 1):
+ kernel = kernel + alpha[j] * self.processes[j].kernel(a, b)
+ return kernel
+
+ def mean_var(self, xtest, N=100):
+
+ self.K_mix = np.zeros(shape=(n, n))
+
+ mu = xtest * 0
+ s = xtest * 0
+
+ samples = np.zeros(shape=(N, xtest.shape[0], xtest.shape[1]))
+
+ for i in range(N):
+ alpha = np.random.dirichlet(
+ np.ones(shape=(self.k)) * (1.0 / float(self.k)), 1
+ )[0]
+ print("Dirichlet sample:", alpha)
+ kernel = lambda a, b: self.custom_kernel(a, b, alpha)
+ GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s)
+ GP_mix.fit_GP(self.X, self.y)
+ samples[i, :, :] = GP_mix.sample(xtest)
+
+ mu = np.mean(samples, axis=0)
+ s = np.var(samples, axis=0)
+ s = np.sqrt(s)
+
+ return (mu, s)
+
+ def sample(self, xtest, size=1, with_mask=False):
+ # sample a GP
+ if self.fit == True:
+ alpha = np.random.dirichlet(
+ np.ones(shape=(self.k)) * (1.0 / float(self.k)), 1
+ )[0]
+ kernel = lambda a, b: self.custom_kernel(a, b, alpha)
+ GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s)
+ GP_mix.fit_GP(self.X, self.y)
+ return GP_mix.sample(xtest)
+ else:
+ alpha = np.random.dirichlet(
+ np.ones(shape=(self.k)) * (1.0 / float(self.k)), 1
+ )[0]
+ kernel = lambda a, b: self.custom_kernel(a, b, alpha)
+ GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s)
+ return GP_mix.sample(xtest)
if __name__ == "__main__":
- # domain size
- L_infinity_ball = 5
- # dimension
- d = 1
- # error variance
- s = 0.001
- # grid density
- n = 1024
- # number of intial points
- N = 15
- # smoothness
- gamma = 2
-
- # model
- GP1 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.5, diameter=L_infinity_ball)
- GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1)
- GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1.1)
- GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.)
-
- # data
- # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball)
- GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2., kappa=1)
- # GPTrue = GaussianProcess(kernel = "modified_matern", s =s, kappa = 1., nu = 2, gamma = 1.1)
-
- # test environment
- TT = code.test_problems.test_functions.test_function()
- (d, xtest, x, gamma) = TT.sample_ss_bounds(N, n, d=d, L_infinity_ball=L_infinity_ball)
- f = lambda x: TT.sample_ss(x, sigma=0, GP=GPTrue)
-
- # targets
- y = f(x)
- GPs = [GP1, GP2, GP3, GP4]
- Mix = DirichletMixture(GPs)
- for j in range(N):
- plt.figure(1)
- plt.clf()
- X = x[0:j + 1, :].reshape(-1, 1)
- y = f(X)
- Mix.fit_GP(X, y)
- (mu, var) = Mix.mean_var(xtest)
- samples = Mix.sample(xtest, size=5)
- plt.plot(xtest, samples, '--', linewidth=3, alpha=0.1)
- plt.plot(xtest, mu, 'k', linewidth=4)
- plt.plot(xtest, mu, 'k', linewidth=4)
- plt.fill_between(xtest.flat, (mu - var).flat, (mu + var).flat, color="#dddddd")
- plt.plot(X, y, 'ro', markersize=10)
- plt.plot(xtest, f(xtest), 'g', linewidth=4)
- plt.draw()
- # plt.figure(2)
- # plt.clf()
- # plt.title("Probability of Category")
- # plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs))*0.5)
- # plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30)
- # plt.subplots_adjust(bottom=0.35)
- # plt.draw()
- plt.pause(4)
+ # domain size
+ L_infinity_ball = 5
+ # dimension
+ d = 1
+ # error variance
+ s = 0.001
+ # grid density
+ n = 1024
+ # number of intial points
+ N = 15
+ # smoothness
+ gamma = 2
+
+ # model
+ GP1 = GaussianProcess(
+ kernel="squared_exponential", s=s, gamma=1.5, diameter=L_infinity_ball
+ )
+ GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1)
+ GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1.1)
+ GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.0)
+
+ # data
+ # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball)
+ GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2.0, kappa=1)
+ # GPTrue = GaussianProcess(kernel = "modified_matern", s =s, kappa = 1., nu = 2, gamma = 1.1)
+
+ # test environment
+ TT = code.test_problems.test_functions.test_function()
+ (d, xtest, x, gamma) = TT.sample_ss_bounds(
+ N, n, d=d, L_infinity_ball=L_infinity_ball
+ )
+ f = lambda x: TT.sample_ss(x, sigma=0, GP=GPTrue)
+
+ # targets
+ y = f(x)
+ GPs = [GP1, GP2, GP3, GP4]
+ Mix = DirichletMixture(GPs)
+ for j in range(N):
+ plt.figure(1)
+ plt.clf()
+ X = x[0 : j + 1, :].reshape(-1, 1)
+ y = f(X)
+ Mix.fit_GP(X, y)
+ (mu, var) = Mix.mean_var(xtest)
+ samples = Mix.sample(xtest, size=5)
+ plt.plot(xtest, samples, "--", linewidth=3, alpha=0.1)
+ plt.plot(xtest, mu, "k", linewidth=4)
+ plt.plot(xtest, mu, "k", linewidth=4)
+ plt.fill_between(xtest.flat, (mu - var).flat, (mu + var).flat, color="#dddddd")
+ plt.plot(X, y, "ro", markersize=10)
+ plt.plot(xtest, f(xtest), "g", linewidth=4)
+ plt.draw()
+ # plt.figure(2)
+ # plt.clf()
+ # plt.title("Probability of Category")
+ # plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs))*0.5)
+ # plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30)
+ # plt.subplots_adjust(bottom=0.35)
+ # plt.draw()
+ plt.pause(4)
diff --git a/stpy/continuous_processes/fourier_fea.py b/stpy/continuous_processes/fourier_fea.py
index b635c1c..c39333c 100755
--- a/stpy/continuous_processes/fourier_fea.py
+++ b/stpy/continuous_processes/fourier_fea.py
@@ -5,500 +5,662 @@
class GaussianProcessFF(KernelizedFeatures):
- '''
- Random Fourier Features for Gaussian Kernel
- '''
-
- def __init__(self, project=None, gamma=0.1, s=0.001, approx="rff", m=100, d=1, diameter=1.0, verbose=True,
- groups=None,
- bounds=None, scale=1.0, kernel="squared_exponential", nu=0.5, kappa=1.0):
-
- self.gamma = gamma
- self.s = s
- self.x = None
- self.K = 0
- self.mu = 0.0
- self.fit = False
- self.beta = None
- self.m = m
- self.project = None
- self.nu = nu
- self.lam = 1.
- if groups is None:
- self.no_groups = 1
- else:
- self.no_groups = len(groups)
-
- self.approx = approx
- self.d = d
- self.bounds = bounds
- self.groups = groups
- self.diameter = diameter
- self.admits_first_order = True
- self.verbose = verbose
- self.kernel = kernel
- self.scale = scale
- self.m_old = None
- self.kappa = kappa
- self.heuristic_variance = False
- if self.groups is None:
- self.embedding_map = self.sample_embedding(self.d, self.m, self.gamma)
- self.m = self.embedding_map.m
- else:
- self.no_groups = float(len(self.groups))
- self.embedding_map = self.sample_embedding_group()
-
- def resample(self):
- self.embedding_map = self.sample_embedding_group()
-
- def description(self):
- """
- Description of GP in text
- :return: string with description
- """
- return "Fourier Features object\n" + "Appprox: " + self.approx + "\n" + "Bandwidth: " + str(
- self.gamma) + "\n" + "Groups:" + str(self.groups) + "\n noise: " + str(self.s)
-
- def get_gamma(self, t):
- if self.kernel == "squared_exponential" and self.groups is None:
- return (np.log(t)) ** self.d
- elif self.kernel == "linear":
- return 10 * self.m
- elif self.kernel == "squared_exponential" and self.groups is not None:
- return len(self.groups) * (np.log(t))
- elif self.kernel == "matern":
- return (np.log(t)) ** self.d
- elif self.kernel == "modified_matern":
- return (np.log(t)) ** self.d
-
- def sample_embedding_group(self):
- # self.m is a vector of ms
- # self.gamma is a vector of gammas
- embedding_map = []
-
- self.d_effective = int(self.d / self.no_groups)
-
- if self.groups is not None:
- self.d_group_sizes = [len(group) for group in self.groups]
- self.d_effective = max(self.d_group_sizes)
-
- if np.sum(np.array(list(self.gamma.size()))) > 1:
- self.gamma = self.gamma
- else:
- self.gamma = torch.ones(int(self.no_groups), dtype=torch.float64) * self.gamma
-
- for i, group in enumerate(self.groups):
- embedding_map.append(self.sample_embedding(len(group), self.m[i], self.gamma[i]))
- self.m[i] = embedding_map[i].m
- return embedding_map
-
- def sample_embedding(self, d_effective, m, gamma):
- if self.m_old is not None:
- self.m = self.m_old
-
- if self.approx == "quad":
- embedding_map = QuadratureEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "rff":
- embedding_map = RFFEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "rff2":
- embedding_map = RFFEmbedding(biased=True, gamma=gamma, nu=self.nu, m=m, d=d_effective,
- diameter=self.diameter, groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "halton":
- embedding_map = RFFEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "hermite":
- embedding_map = HermiteEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "trapezoidal":
- embedding_map = TrapezoidalEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "ccff":
- embedding_map = ClenshawCurtisEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "matern_secific":
- embedding_map = MaternEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "quad_periodic":
- embedding_map = QuadPeriodicEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- elif self.approx == "kl":
- embedding_map = KLEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective,
- diameter=self.diameter, groups=None, kernel=self.kernel, approx=self.approx)
- elif self.approx == "orf":
- embedding_map = RFFEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- else:
- embedding_map = QuadratureEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter,
- groups=None,
- kernel=self.kernel, approx=self.approx)
- self.m_old = self.m
-
- return embedding_map
-
- def embed(self, x):
- if self.groups is None:
-
- if self.project is not None:
- x = self.project(x)
-
- return self.embedding_map.embed(x)
-
- else:
- return self.embed_whole(x)
-
- def embed_group(self, x, group):
- return self.embedding_map[group].embed(x) / (np.sqrt(self.no_groups))
-
- def embed_whole(self, x):
- if self.project is not None:
- x = self.project(x)
-
- if self.groups == None:
- return self.embed(x)
- else:
- n = x.size()[0]
- M = torch.zeros(int(torch.sum(self.m)), n, dtype=torch.float64)
- for i, group in enumerate(self.groups):
- embeding = self.embed_group(x[:, group], i)
- index = int(torch.sum(self.m[0:i], dim=0))
- index_next = int(torch.sum(self.m[0:i + 1], dim=0))
- M[index:index_next, :] = torch.t(embeding)
- return torch.t(M)
-
- def get_basis_size(self):
- return self.m
-
- def set_basis_size(self, m):
- self.m_old = None
- self.m = m
-
- def right_kernel(self):
- embeding = self.embed(self.x)
- Z = self.linear_kernel(embeding, embeding)
- K = (Z + self.s * self.s * torch.eye(self.n, dtype=torch.float64))
- return K
-
- def fit_gp(self, x, y, iterative=False):
- '''
- Function to Fit GP
- '''
-
- self.x = x
- self.y = y
- self.n = list(self.x.size())[0]
- self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel
-
- if self.groups == None:
- embeding = self.embed(x)
- self.Z_ = self.linear_kernel(torch.t(embeding), torch.t(embeding))
- self.K = (self.Z_ + self.s * self.s * torch.eye(self.m, dtype=torch.float64))
- self.Q = torch.t(embeding)
-
- else: ## additive models
- M = torch.t(self.embed_whole(x))
- self.Q = M
- self.Z_ = self.linear_kernel(M, M)
- self.K = self.kappa * self.Z_ + self.s * self.s * torch.eye(int(torch.sum(self.m)), dtype=torch.float64)
-
- self.fit = True
-
- return None
-
- def log_marginal_likelihood_self(self):
- return self.log_marginal_likelihood(self.gamma, torch.eye(self.d, dtype=torch.float64), self.kappa)
-
- def log_marginal_likelihood(self, gamma, Rot, kappa, kernel="default"):
- """
- Calculated the log marginal likelihood
- :param kernel: custom kenrel object
- :return: float
- """
- # func = self.kernel_object.get_kernel_function()
-
- self.x = torch.mm(self.x, Rot)
- L = torch.torch.cholesky(self.K, upper=False)
- logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L)))
-
- Q = self.embed_whole(self.x)
- rhs = torch.mm(torch.t(Q), self.y)
- alpha, _ = torch.solve(rhs, self.K)
- logprob = -0.5 * (torch.mm(torch.t(self.y), self.y) - torch.mm(torch.t(rhs),
- alpha)) / self.s ** 2 + logdet # - 0.5*self.n*np.log(2*np.pi)
- logprob = -logprob
-
- return logprob
-
- def mean_std(self, xtest, reuse=False):
- '''
- Calculate mean and variance for GP at xtest points
- '''
- # compute the mean at our test points.
-
- if self.project is not None:
- self.project(xtest)
-
- if self.groups == None:
- embeding = self.embed(xtest)
- Q = self.embed(self.x)
- else:
- self.Z_ = self.K - self.s * self.s * torch.eye(int(torch.sum(self.m)), dtype=torch.float64)
- embeding = self.embed_whole(xtest)
- Q = self.embed_whole(self.x)
-
- theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K)
- ymean = torch.mm(embeding, theta_mean)
-
- temp = torch.t(torch.solve(torch.t(embeding), self.K)[0])
- diagonal = self.s * self.s * torch.einsum('ij,ji->i', (temp, torch.t(embeding))).view(-1, 1)
- yvar = torch.sqrt(diagonal)
-
- return (ymean, yvar)
-
- # def posterior_inf(self, xtest, tol=10e-5, max_int=20000):
- # alpha = np.random.randn(self.n, 1)
- # err = 10.
- # F = 10.0
- # counter = 0
- # embeding = self.embed(self.x)
- # K = (linear_kernel(embeding.T, embeding.T) + self.s * self.s * np.eye(self.n))
- # Kinv = np.linalg.pinv(K)
- #
- # q = []
- # for index in range(self.n):
- # q.append(self.embed(self.x[index, :].reshape(1, -1)))
- # q = np.array(q)
- #
- # while (counter < max_int and err / F > tol):
- # # first find which index gives maximum
- # # print (K.shape)
- # index = np.argmax(np.abs(K.dot(alpha) - self.y))
- # sign = np.sign(K.dot(alpha)[index] - self.y[index])
- #
- # k = linear_kernel(embeding.T, q[index, :, :].T).reshape(-1, 1)
- # # print ("k: ", k.shape)
- # oldalpha = alpha
- # alpha = alpha - 1. / np.sqrt(counter + 1) * Kinv.dot(self.s * K.dot(alpha) + sign * k)
- # err = np.linalg.norm(oldalpha - alpha)
- # counter += 1
- # F = np.max(np.abs(K.dot(alpha) - self.y)) + self.s * alpha.T.dot(K.dot(alpha))[0][0]
- #
- # y_inf = linear_kernel(self.embed(self.x).T, self.embed(xtest).T).T.dot(alpha)
- # return y_inf
-
- def sample_theta(self, size=1):
- if self.groups is None:
- basis = self.m
- else:
- basis = int(int(torch.sum(self.m)))
- zeros = torch.zeros(basis, size, dtype=torch.float64)
- random_vector = torch.normal(mean=zeros, std=1.)
-
- if self.fit == True:
- # random vector
- Z = torch.pinverse(self.K)
- self.L = torch.cholesky(Z, upper=False)
- theta_mean = torch.mm(Z, torch.mm(self.Q, self.y))
- theta = torch.mm(self.s * self.L, random_vector)
- theta = theta + theta_mean
- else:
- theta_mean = 0
- Z = (1. + self.s * self.s) * torch.eye(basis, dtype=torch.float64)
- L = torch.cholesky(Z, upper=False)
- theta = torch.mm(L, random_vector) + theta_mean
- return theta
-
- def sample(self, xtest, size=1):
- '''
- Sample functions from Gaussian Process
- '''
- theta = self.sample_theta(size=size)
- if self.groups == None:
- f = torch.mm(self.embed(xtest), theta)
- else:
- f = torch.zeros(xtest.size()[0], size, dtype=torch.float64)
- for i, group in enumerate(self.groups):
- embeding = self.embed_group(xtest[:, group], i)
- index = int(torch.sum(self.m[0:i], dim=0))
- index_next = int(torch.sum(self.m[0:i + 1], dim=0))
- f += torch.mm(embeding, theta[index:index_next, :])
- return f
-
- def sample_and_max(self, xtest, size=1):
- '''
- Sample functions from Gaussian Process and take Maximum
- '''
- f = self.sample(xtest, size=size)
-
- index = np.argmax(f.detach(), axis=0)
- return (xtest[index, :], f[index, :])
-
- def ucb_optimize(self, beta, multistart=25):
-
- mean = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[0][0][0]
- sigma = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[1][0][0]
-
- fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x))
- # grad = lambda x: -complex_step_derivative(fun,1e-10,x.reshape(1,-1))
-
- mybounds = self.bounds
- results = []
- from scipy.optimize import minimize
-
- for i in range(multistart):
- x0 = np.random.randn(self.d)
- for i in range(self.d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
-
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds)
- solution = res.x
- results.append([solution, -fun(solution)])
-
- results = np.array(results)
- index = np.argmax(results[:, 1])
- solution = results[index, 0]
-
- return (solution, -fun(solution))
-
- def special_embed_eval(self, x, theta):
- f = 0
- x = torch.from_numpy(x)
- # print (x)
- for i, group in enumerate(self.groups):
- embeding = self.embed_group(x[group].view(-1, len(group)), i)
- index = torch.sum(self.m[0:i], dim=0)
- index_next = torch.sum(self.m[0:i + 1], dim=0)
- f += torch.mm(embeding, theta[int(index):int(index_next), :])
- return f.numpy()
-
- def special_embed_eval_grad(self, x, theta):
- ff = lambda x: self.special_embed_eval(x.flatten(), theta)
- grad = complex_step_derivative(ff, 1e-10, x.reshape(-1, 1).T).flatten()
- return grad
-
- def get_lambdas_additive(self, theta):
- fun = lambda x: -self.special_embed_eval(x, theta)
- grad = lambda x: -self.special_embed_eval_grad(x, theta)
- return [fun, grad]
-
- def get_lambdas(self, theta):
-
- # complex step differentiation
- fun = lambda x: -(torch.mm(self.embed(torch.from_numpy(x).view(1, self.d)), theta).numpy()).flatten()
- grad = lambda x: -complex_step_derivative(fun, 1e-10, x.reshape(self.d, 1).T).flatten()
- return [fun, grad]
-
- def sample_and_optimize(self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0):
- '''
- Sample functions from Gaussian Process and take Maximum using
- first order maximization
- '''
-
- # sample linear approximating
- theta = self.sample_theta()
- from scipy.optimize import minimize
-
- # get bounds
- if self.bounds == None:
- mybounds = tuple([(-self.diameter, self.diameter) for i in range(self.d)])
- else:
- mybounds = self.bounds
-
- fun = lambda x: -torch.mm(torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))).numpy()
-
- results = []
- for j in range(multistart):
- x0 = np.random.randn(self.d)
- for i in range(self.d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
-
- if minimizer == "L-BFGS-B":
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds)
- solution = res.x
- elif minimizer == "ProjGD":
- res = projected_gradient_descent(fun, grad, x0, mybounds, tol=0.001,
- nu=1. / (self.m * np.max(np.abs(theta))))
- solution = res.x
- elif minimizer == "coordinate-wise":
-
- solution = np.random.randn(self.d)
- for i in range(self.d):
- if verbose > 0:
- print("Dimension: ", i)
- fun_cw = lambda x: lambda_coordinate(fun, x0, i, x)
- ranges = [slice(mybounds[i][0], mybounds[i][1], 1. / float(grid))]
- out = scipy.optimize.brute(fun_cw, ranges, finish=None)
- solution[i] = out
- if verbose > 0:
- print("Soln:", out.T)
- elif minimizer == "CD_cw":
- raise BaseException("Not implemented yet")
- else:
- raise AssertionError("Wrong optimizer selected.")
-
- results.append([solution, -fun(solution)])
-
- results = np.array(results)
- index = np.argmax(results[:, 1])
- solution = results[index, 0]
-
- return (torch.from_numpy(solution), -torch.from_numpy(fun(solution)))
+ """
+ Random Fourier Features for Gaussian Kernel
+ """
+
+ def __init__(
+ self,
+ project=None,
+ gamma=0.1,
+ s=0.001,
+ approx="rff",
+ m=100,
+ d=1,
+ diameter=1.0,
+ verbose=True,
+ groups=None,
+ bounds=None,
+ scale=1.0,
+ kernel="squared_exponential",
+ nu=0.5,
+ kappa=1.0,
+ ):
+
+ self.gamma = gamma
+ self.s = s
+ self.x = None
+ self.K = 0
+ self.mu = 0.0
+ self.fit = False
+ self.beta = None
+ self.m = m
+ self.project = None
+ self.nu = nu
+ self.lam = 1.0
+ if groups is None:
+ self.no_groups = 1
+ else:
+ self.no_groups = len(groups)
+
+ self.approx = approx
+ self.d = d
+ self.bounds = bounds
+ self.groups = groups
+ self.diameter = diameter
+ self.admits_first_order = True
+ self.verbose = verbose
+ self.kernel = kernel
+ self.scale = scale
+ self.m_old = None
+ self.kappa = kappa
+ self.heuristic_variance = False
+ if self.groups is None:
+ self.embedding_map = self.sample_embedding(self.d, self.m, self.gamma)
+ self.m = self.embedding_map.m
+ else:
+ self.no_groups = float(len(self.groups))
+ self.embedding_map = self.sample_embedding_group()
+
+ def resample(self):
+ self.embedding_map = self.sample_embedding_group()
+
+ def description(self):
+ """
+ Description of GP in text
+ :return: string with description
+ """
+ return (
+ "Fourier Features object\n"
+ + "Appprox: "
+ + self.approx
+ + "\n"
+ + "Bandwidth: "
+ + str(self.gamma)
+ + "\n"
+ + "Groups:"
+ + str(self.groups)
+ + "\n noise: "
+ + str(self.s)
+ )
+
+ def get_gamma(self, t):
+ if self.kernel == "squared_exponential" and self.groups is None:
+ return (np.log(t)) ** self.d
+ elif self.kernel == "linear":
+ return 10 * self.m
+ elif self.kernel == "squared_exponential" and self.groups is not None:
+ return len(self.groups) * (np.log(t))
+ elif self.kernel == "matern":
+ return (np.log(t)) ** self.d
+ elif self.kernel == "modified_matern":
+ return (np.log(t)) ** self.d
+
+ def sample_embedding_group(self):
+ # self.m is a vector of ms
+ # self.gamma is a vector of gammas
+ embedding_map = []
+
+ self.d_effective = int(self.d / self.no_groups)
+
+ if self.groups is not None:
+ self.d_group_sizes = [len(group) for group in self.groups]
+ self.d_effective = max(self.d_group_sizes)
+
+ if np.sum(np.array(list(self.gamma.size()))) > 1:
+ self.gamma = self.gamma
+ else:
+ self.gamma = (
+ torch.ones(int(self.no_groups), dtype=torch.float64) * self.gamma
+ )
+
+ for i, group in enumerate(self.groups):
+ embedding_map.append(
+ self.sample_embedding(len(group), self.m[i], self.gamma[i])
+ )
+ self.m[i] = embedding_map[i].m
+ return embedding_map
+
+ def sample_embedding(self, d_effective, m, gamma):
+ if self.m_old is not None:
+ self.m = self.m_old
+
+ if self.approx == "quad":
+ embedding_map = QuadratureEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "rff":
+ embedding_map = RFFEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "rff2":
+ embedding_map = RFFEmbedding(
+ biased=True,
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "halton":
+ embedding_map = RFFEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "hermite":
+ embedding_map = HermiteEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "trapezoidal":
+ embedding_map = TrapezoidalEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "ccff":
+ embedding_map = ClenshawCurtisEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "matern_secific":
+ embedding_map = MaternEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "quad_periodic":
+ embedding_map = QuadPeriodicEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "kl":
+ embedding_map = KLEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ elif self.approx == "orf":
+ embedding_map = RFFEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ else:
+ embedding_map = QuadratureEmbedding(
+ gamma=gamma,
+ nu=self.nu,
+ m=m,
+ d=d_effective,
+ diameter=self.diameter,
+ groups=None,
+ kernel=self.kernel,
+ approx=self.approx,
+ )
+ self.m_old = self.m
+
+ return embedding_map
+
+ def embed(self, x):
+ if self.groups is None:
+
+ if self.project is not None:
+ x = self.project(x)
+
+ return self.embedding_map.embed(x)
+
+ else:
+ return self.embed_whole(x)
+
+ def embed_group(self, x, group):
+ return self.embedding_map[group].embed(x) / (np.sqrt(self.no_groups))
+
+ def embed_whole(self, x):
+ if self.project is not None:
+ x = self.project(x)
+
+ if self.groups == None:
+ return self.embed(x)
+ else:
+ n = x.size()[0]
+ M = torch.zeros(int(torch.sum(self.m)), n, dtype=torch.float64)
+ for i, group in enumerate(self.groups):
+ embeding = self.embed_group(x[:, group], i)
+ index = int(torch.sum(self.m[0:i], dim=0))
+ index_next = int(torch.sum(self.m[0 : i + 1], dim=0))
+ M[index:index_next, :] = torch.t(embeding)
+ return torch.t(M)
+
+ def get_basis_size(self):
+ return self.m
+
+ def set_basis_size(self, m):
+ self.m_old = None
+ self.m = m
+
+ def right_kernel(self):
+ embeding = self.embed(self.x)
+ Z = self.linear_kernel(embeding, embeding)
+ K = Z + self.s * self.s * torch.eye(self.n, dtype=torch.float64)
+ return K
+
+ def fit_gp(self, x, y, iterative=False):
+ """
+ Function to Fit GP
+ """
+
+ self.x = x
+ self.y = y
+ self.n = list(self.x.size())[0]
+ self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel
+
+ if self.groups == None:
+ embeding = self.embed(x)
+ self.Z_ = self.linear_kernel(torch.t(embeding), torch.t(embeding))
+ self.K = self.Z_ + self.s * self.s * torch.eye(self.m, dtype=torch.float64)
+ self.Q = torch.t(embeding)
+
+ else: ## additive models
+ M = torch.t(self.embed_whole(x))
+ self.Q = M
+ self.Z_ = self.linear_kernel(M, M)
+ self.K = self.kappa * self.Z_ + self.s * self.s * torch.eye(
+ int(torch.sum(self.m)), dtype=torch.float64
+ )
+
+ self.fit = True
+
+ return None
+
+ def log_marginal_likelihood_self(self):
+ return self.log_marginal_likelihood(
+ self.gamma, torch.eye(self.d, dtype=torch.float64), self.kappa
+ )
+
+ def log_marginal_likelihood(self, gamma, Rot, kappa, kernel="default"):
+ """
+ Calculated the log marginal likelihood
+ :param kernel: custom kenrel object
+ :return: float
+ """
+ # func = self.kernel_object.get_kernel_function()
+
+ self.x = torch.mm(self.x, Rot)
+ L = torch.torch.cholesky(self.K, upper=False)
+ logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L)))
+
+ Q = self.embed_whole(self.x)
+ rhs = torch.mm(torch.t(Q), self.y)
+ alpha, _ = torch.solve(rhs, self.K)
+ logprob = (
+ -0.5
+ * (torch.mm(torch.t(self.y), self.y) - torch.mm(torch.t(rhs), alpha))
+ / self.s**2
+ + logdet
+ ) # - 0.5*self.n*np.log(2*np.pi)
+ logprob = -logprob
+
+ return logprob
+
+ def mean_std(self, xtest, reuse=False):
+ """
+ Calculate mean and variance for GP at xtest points
+ """
+ # compute the mean at our test points.
+
+ if self.project is not None:
+ self.project(xtest)
+
+ if self.groups == None:
+ embeding = self.embed(xtest)
+ Q = self.embed(self.x)
+ else:
+ self.Z_ = self.K - self.s * self.s * torch.eye(
+ int(torch.sum(self.m)), dtype=torch.float64
+ )
+ embeding = self.embed_whole(xtest)
+ Q = self.embed_whole(self.x)
+
+ theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K)
+ ymean = torch.mm(embeding, theta_mean)
+
+ temp = torch.t(torch.solve(torch.t(embeding), self.K)[0])
+ diagonal = (
+ self.s
+ * self.s
+ * torch.einsum("ij,ji->i", (temp, torch.t(embeding))).view(-1, 1)
+ )
+ yvar = torch.sqrt(diagonal)
+
+ return (ymean, yvar)
+
+ # def posterior_inf(self, xtest, tol=10e-5, max_int=20000):
+ # alpha = np.random.randn(self.n, 1)
+ # err = 10.
+ # F = 10.0
+ # counter = 0
+ # embeding = self.embed(self.x)
+ # K = (linear_kernel(embeding.T, embeding.T) + self.s * self.s * np.eye(self.n))
+ # Kinv = np.linalg.pinv(K)
+ #
+ # q = []
+ # for index in range(self.n):
+ # q.append(self.embed(self.x[index, :].reshape(1, -1)))
+ # q = np.array(q)
+ #
+ # while (counter < max_int and err / F > tol):
+ # # first find which index gives maximum
+ # # print (K.shape)
+ # index = np.argmax(np.abs(K.dot(alpha) - self.y))
+ # sign = np.sign(K.dot(alpha)[index] - self.y[index])
+ #
+ # k = linear_kernel(embeding.T, q[index, :, :].T).reshape(-1, 1)
+ # # print ("k: ", k.shape)
+ # oldalpha = alpha
+ # alpha = alpha - 1. / np.sqrt(counter + 1) * Kinv.dot(self.s * K.dot(alpha) + sign * k)
+ # err = np.linalg.norm(oldalpha - alpha)
+ # counter += 1
+ # F = np.max(np.abs(K.dot(alpha) - self.y)) + self.s * alpha.T.dot(K.dot(alpha))[0][0]
+ #
+ # y_inf = linear_kernel(self.embed(self.x).T, self.embed(xtest).T).T.dot(alpha)
+ # return y_inf
+
+ def sample_theta(self, size=1):
+ if self.groups is None:
+ basis = self.m
+ else:
+ basis = int(int(torch.sum(self.m)))
+ zeros = torch.zeros(basis, size, dtype=torch.float64)
+ random_vector = torch.normal(mean=zeros, std=1.0)
+
+ if self.fit == True:
+ # random vector
+ Z = torch.pinverse(self.K)
+ self.L = torch.cholesky(Z, upper=False)
+ theta_mean = torch.mm(Z, torch.mm(self.Q, self.y))
+ theta = torch.mm(self.s * self.L, random_vector)
+ theta = theta + theta_mean
+ else:
+ theta_mean = 0
+ Z = (1.0 + self.s * self.s) * torch.eye(basis, dtype=torch.float64)
+ L = torch.cholesky(Z, upper=False)
+ theta = torch.mm(L, random_vector) + theta_mean
+ return theta
+
+ def sample(self, xtest, size=1):
+ """
+ Sample functions from Gaussian Process
+ """
+ theta = self.sample_theta(size=size)
+ if self.groups == None:
+ f = torch.mm(self.embed(xtest), theta)
+ else:
+ f = torch.zeros(xtest.size()[0], size, dtype=torch.float64)
+ for i, group in enumerate(self.groups):
+ embeding = self.embed_group(xtest[:, group], i)
+ index = int(torch.sum(self.m[0:i], dim=0))
+ index_next = int(torch.sum(self.m[0 : i + 1], dim=0))
+ f += torch.mm(embeding, theta[index:index_next, :])
+ return f
+
+ def sample_and_max(self, xtest, size=1):
+ """
+ Sample functions from Gaussian Process and take Maximum
+ """
+ f = self.sample(xtest, size=size)
+
+ index = np.argmax(f.detach(), axis=0)
+ return (xtest[index, :], f[index, :])
+
+ def ucb_optimize(self, beta, multistart=25):
+
+ mean = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[0][0][0]
+ sigma = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[1][0][0]
+
+ fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x))
+ # grad = lambda x: -complex_step_derivative(fun,1e-10,x.reshape(1,-1))
+
+ mybounds = self.bounds
+ results = []
+ from scipy.optimize import minimize
+
+ for i in range(multistart):
+ x0 = np.random.randn(self.d)
+ for i in range(self.d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds
+ )
+ solution = res.x
+ results.append([solution, -fun(solution)])
+
+ results = np.array(results)
+ index = np.argmax(results[:, 1])
+ solution = results[index, 0]
+
+ return (solution, -fun(solution))
+
+ def special_embed_eval(self, x, theta):
+ f = 0
+ x = torch.from_numpy(x)
+ # print (x)
+ for i, group in enumerate(self.groups):
+ embeding = self.embed_group(x[group].view(-1, len(group)), i)
+ index = torch.sum(self.m[0:i], dim=0)
+ index_next = torch.sum(self.m[0 : i + 1], dim=0)
+ f += torch.mm(embeding, theta[int(index) : int(index_next), :])
+ return f.numpy()
+
+ def special_embed_eval_grad(self, x, theta):
+ ff = lambda x: self.special_embed_eval(x.flatten(), theta)
+ grad = complex_step_derivative(ff, 1e-10, x.reshape(-1, 1).T).flatten()
+ return grad
+
+ def get_lambdas_additive(self, theta):
+ fun = lambda x: -self.special_embed_eval(x, theta)
+ grad = lambda x: -self.special_embed_eval_grad(x, theta)
+ return [fun, grad]
+
+ def get_lambdas(self, theta):
+
+ # complex step differentiation
+ fun = lambda x: -(
+ torch.mm(self.embed(torch.from_numpy(x).view(1, self.d)), theta).numpy()
+ ).flatten()
+ grad = lambda x: -complex_step_derivative(
+ fun, 1e-10, x.reshape(self.d, 1).T
+ ).flatten()
+ return [fun, grad]
+
+ def sample_and_optimize(
+ self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0
+ ):
+ """
+ Sample functions from Gaussian Process and take Maximum using
+ first order maximization
+ """
+
+ # sample linear approximating
+ theta = self.sample_theta()
+ from scipy.optimize import minimize
+
+ # get bounds
+ if self.bounds == None:
+ mybounds = tuple([(-self.diameter, self.diameter) for i in range(self.d)])
+ else:
+ mybounds = self.bounds
+
+ fun = lambda x: -torch.mm(
+ torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))
+ ).numpy()
+
+ results = []
+ for j in range(multistart):
+ x0 = np.random.randn(self.d)
+ for i in range(self.d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+
+ if minimizer == "L-BFGS-B":
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds
+ )
+ solution = res.x
+ elif minimizer == "ProjGD":
+ res = projected_gradient_descent(
+ fun,
+ grad,
+ x0,
+ mybounds,
+ tol=0.001,
+ nu=1.0 / (self.m * np.max(np.abs(theta))),
+ )
+ solution = res.x
+ elif minimizer == "coordinate-wise":
+
+ solution = np.random.randn(self.d)
+ for i in range(self.d):
+ if verbose > 0:
+ print("Dimension: ", i)
+ fun_cw = lambda x: lambda_coordinate(fun, x0, i, x)
+ ranges = [slice(mybounds[i][0], mybounds[i][1], 1.0 / float(grid))]
+ out = scipy.optimize.brute(fun_cw, ranges, finish=None)
+ solution[i] = out
+ if verbose > 0:
+ print("Soln:", out.T)
+ elif minimizer == "CD_cw":
+ raise BaseException("Not implemented yet")
+ else:
+ raise AssertionError("Wrong optimizer selected.")
+
+ results.append([solution, -fun(solution)])
+
+ results = np.array(results)
+ index = np.argmax(results[:, 1])
+ solution = results[index, 0]
+
+ return (torch.from_numpy(solution), -torch.from_numpy(fun(solution)))
if __name__ == "__main__":
- # domain size
- L_infinity_ball = 1
- # dimension
- d = 2
- # error variance
- s = 0.001
- # grid density
- n = 50
- # number of intial points
- N = 200
- # smoothness
- gamma = torch.from_numpy(np.array([0.4, 0.4]))
- # test problem
-
- xtest = torch.from_numpy(interval(n, d))
- x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)))
-
- f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
- # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1)
-
- f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.,
- out=None) * s
- # targets
- y = f(x)
-
- # GP model with squared exponential
- m = torch.from_numpy(np.array([100, 100]))
-
- groups = [[0], [1]]
- GP = GaussianProcessFF(kernel="squared_exponential", s=s, m=m, d=d, gamma=gamma, groups=groups, approx="hermite")
- # GP2 = GaussianProcess(kernel="ard", s=s, d=d, gamma=gamma, groups=None)
-
- # fit GP
- GP.fit_gp(x, y)
- # GP2.fit_gp(x,y)
-
- GP.optimize_params("rots", 10, optimizer="pymanopt")
-
- print("Log probability:", GP.log_marginal_likelihood_self())
- # print ("Log probability:", GP2.log_marginal_likelihood_self() )
-
- GP.visualize(xtest, f_true=f_no_noise)
+ # domain size
+ L_infinity_ball = 1
+ # dimension
+ d = 2
+ # error variance
+ s = 0.001
+ # grid density
+ n = 50
+ # number of intial points
+ N = 200
+ # smoothness
+ gamma = torch.from_numpy(np.array([0.4, 0.4]))
+ # test problem
+
+ xtest = torch.from_numpy(interval(n, d))
+ x = torch.from_numpy(
+ np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))
+ )
+
+ f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
+ # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1)
+
+ f = (
+ lambda q: f_no_noise(q)
+ + torch.normal(
+ mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.0, out=None
+ )
+ * s
+ )
+ # targets
+ y = f(x)
+
+ # GP model with squared exponential
+ m = torch.from_numpy(np.array([100, 100]))
+
+ groups = [[0], [1]]
+ GP = GaussianProcessFF(
+ kernel="squared_exponential",
+ s=s,
+ m=m,
+ d=d,
+ gamma=gamma,
+ groups=groups,
+ approx="hermite",
+ )
+ # GP2 = GaussianProcess(kernel="ard", s=s, d=d, gamma=gamma, groups=None)
+
+ # fit GP
+ GP.fit_gp(x, y)
+ # GP2.fit_gp(x,y)
+
+ GP.optimize_params("rots", 10, optimizer="pymanopt")
+
+ print("Log probability:", GP.log_marginal_likelihood_self())
+ # print ("Log probability:", GP2.log_marginal_likelihood_self() )
+
+ GP.visualize(xtest, f_true=f_no_noise)
# GP2.visualize(xtest, f_true=f_no_noise)
diff --git a/stpy/continuous_processes/ga_process.py b/stpy/continuous_processes/ga_process.py
index c0a1537..6317b08 100755
--- a/stpy/continuous_processes/ga_process.py
+++ b/stpy/continuous_processes/ga_process.py
@@ -5,208 +5,234 @@
class GammaContProcess(Estimator):
- def __init__(self, gamma=1, s=0.001, kappa=1., kernel="squared_exponential", diameter=1.0,
- groups=None, bounds=None, nu=2, safe=False, kernel_custom=None, d=1):
- """
-
- :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel
- :param s: level of noise
- :param kernel: choose from a list
- :param diameter: diameter of the set (deprecated)
- :param groups: additive groups
- :param bounds: bounds for the continuous optimization
- :param v: parameter for matern kernel
- """
-
- ## GP properties
- self.s = s
- self.d = d
- self.x = None
- self.K = np.array([1.0])
- self.mu = 0.0
- self.safe = False
- self.fit = False
- self.diameter = diameter
- self.bounds = bounds
- self.admits_first_order = False
- self.back_prop = True
-
- ## kernel hyperparameters
- if kernel_custom is not None:
- self.kernel_object = kernel_custom
- self.kernel = kernel_custom.kernel
- else:
- self.kernel_object = KernelFunction(kernel_name=kernel, gamma=gamma, nu=nu, groups=groups, kappa=kappa)
- self.kernel = self.kernel_object.kernel
-
- self.gamma = gamma
- self.v = nu
- self.groups = groups
- self.kappa = kappa
- self.custom = kernel_custom
- self.optkernel = kernel
-
- def description(self):
- """
- Description of GP in text
- :return: string with description
- """
- return self.kernel_object.description() + "\n noise: " + str(self.s)
-
- def get_gamma(self, t):
- """
- ??
- :param t:
- :return:
- """
- if self.optkernel == "squared_exponential" and self.groups is None:
- return (np.log(t)) ** self.d
- elif self.optkernel == "linear":
- return 10 * self.d
- elif self.optkernel == "squared_exponential" and self.groups is not None:
- return len(self.groups) * (np.log(t))
- elif self.optkernel == "matern":
- return (np.log(t)) ** self.d
- elif self.optkernel == "modified_matern":
- return (np.log(t)) ** self.d
-
- def make_safe(self, x):
- """
- Make the input dataset numerically stable by removing duplicates?
- :param x:
- :return:
- """
- self.epsilon = 0.001
- # remove vectors that are very close to each other
- return x
-
- def fit_gp(self, x, y, iterative=False, extrapoint=False):
- """
- Fits the Gaussian process, possible update is via iterative inverse
- :param x: data x
- :param y: values y
- :param iterative: iterative inverse, where only last point of x is used
- :param extrapoint: iterative inverse must be allowed, x is the only addition
- :return:
- """
- # first fit
- if (self.fit == False or iterative == False):
- if self.safe == True:
- x = self.make_safe(x)
-
- self.x = x
- self.y = y
- try:
- self.n, self.d = list(x.size())
- except:
- self.n, self.d = x.shape
- self.K = self.kernel(x, x) + self.s * self.s * torch.eye(self.n, dtype=torch.float64)
-
- self.fit = True
- else:
- # iterative inverse
- if (iterative == True):
- if extrapoint == False:
- last_point = self.x[-1, :].view(1, -1)
- else:
- last_point = x
- old_K = self.K
- old_Kinv = self.Kinv
- else:
- pass
-
- return None
-
- def beta(self, delta=1e-12, norm=1):
- beta_value = self.s * norm + torch.sqrt(
- 2 * torch.log(1. / delta + torch.log(torch.det(self.K) / self.s ** self.n)))
- return beta_value
-
- def execute(self, xtest):
- if self.fit == True:
- K_star = self.kernel(self.x, xtest)
- else:
- K_star = None
- K_star_star = self.kernel(xtest, xtest)
- return (K_star, K_star_star)
-
- # @check_numpy(1)
- def mean_var(self, xtest, full=False):
- """
- Return posterior mean and variance as tuple
- :param xtest: grid, numpy array (2D)
- :param full: Instead of just poinwise variance, full covariance can be outputed (bool)
- :return: (tensor,tensor)
- """
-
- (K_star, K_star_star) = self.execute(xtest)
-
- if self.fit == False:
- if full == False:
-
- x = torch.sum(xtest, dim=1)
- first = torch.diag(K_star_star).view(-1, 1)
- variance = first
- yvar = torch.sqrt(variance)
- else:
- first = K_star_star
- yvar = first
-
- return (0 * x.view(-1, 1), yvar)
-
- if self.back_prop == False:
- decomp = torch.btrifact(self.K.unsqueeze(0))
- A = torch.btrisolve(self.y.unsqueeze(0), *decomp)[0, :, :]
- self.B = torch.t(torch.btrisolve(torch.t(K_star).unsqueeze(0), *decomp)[0, :, :])
- else:
- A, _ = torch.gesv(self.y, self.K)
- self.B = torch.t(torch.gesv(torch.t(K_star), self.K)[0])
-
- ymean = torch.mm(K_star, A)
-
- if full == False:
- first = torch.diag(K_star_star).view(-1, 1)
- second = torch.einsum('ij,ji->i', (self.B, torch.t(K_star))).view(-1, 1)
- variance = first - second
- yvar = torch.sqrt(variance)
- else:
- first = K_star_star
- second = torch.mm(self.B, torch.t(K_star))
- yvar = first - second
-
- return (ymean, yvar)
-
- def sample(self, xtest, size=1):
- """
- Samples Path from GP, return a numpy array evaluated over grid
- :param xtest: grid
- :param size: number of samples
- :return: numpy array
- """
- nn = list(xtest.size())[0]
-
- if self.fit == True:
- (ymean, yvar) = self.mean_var(xtest, full=True)
- Cov = yvar + self.s * self.s * torch.eye(nn, dtype=torch.float64)
- L = torch.cholesky(Cov, upper=False)
- random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.)
- f = ymean + torch.abs(torch.mm(L, random_vector))
- else:
- (K_star, K_star_star) = self.execute(xtest)
- L = torch.cholesky(K_star_star + (10e-10 + self.s * self.s) * torch.eye(nn, dtype=torch.float64),
- upper=False)
- random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.)
- f = self.mu + torch.mm(L, random_vector)
- return f
-
- def sample_and_max(self, xtest, size=1):
- """
- Samples Path from GP and takes argmax
- :param xtest: grid
- :param size: number of samples
- :return: (argmax, max)
- """
- f = self.sample(xtest, size=size)
- self.temp = f
- val, index = torch.max(f, dim=0)
- return (xtest[index, :], val)
+ def __init__(
+ self,
+ gamma=1,
+ s=0.001,
+ kappa=1.0,
+ kernel="squared_exponential",
+ diameter=1.0,
+ groups=None,
+ bounds=None,
+ nu=2,
+ safe=False,
+ kernel_custom=None,
+ d=1,
+ ):
+ """
+
+ :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel
+ :param s: level of noise
+ :param kernel: choose from a list
+ :param diameter: diameter of the set (deprecated)
+ :param groups: additive groups
+ :param bounds: bounds for the continuous optimization
+ :param v: parameter for matern kernel
+ """
+
+ ## GP properties
+ self.s = s
+ self.d = d
+ self.x = None
+ self.K = np.array([1.0])
+ self.mu = 0.0
+ self.safe = False
+ self.fit = False
+ self.diameter = diameter
+ self.bounds = bounds
+ self.admits_first_order = False
+ self.back_prop = True
+
+ ## kernel hyperparameters
+ if kernel_custom is not None:
+ self.kernel_object = kernel_custom
+ self.kernel = kernel_custom.kernel
+ else:
+ self.kernel_object = KernelFunction(
+ kernel_name=kernel, gamma=gamma, nu=nu, groups=groups, kappa=kappa
+ )
+ self.kernel = self.kernel_object.kernel
+
+ self.gamma = gamma
+ self.v = nu
+ self.groups = groups
+ self.kappa = kappa
+ self.custom = kernel_custom
+ self.optkernel = kernel
+
+ def description(self):
+ """
+ Description of GP in text
+ :return: string with description
+ """
+ return self.kernel_object.description() + "\n noise: " + str(self.s)
+
+ def get_gamma(self, t):
+ """
+ ??
+ :param t:
+ :return:
+ """
+ if self.optkernel == "squared_exponential" and self.groups is None:
+ return (np.log(t)) ** self.d
+ elif self.optkernel == "linear":
+ return 10 * self.d
+ elif self.optkernel == "squared_exponential" and self.groups is not None:
+ return len(self.groups) * (np.log(t))
+ elif self.optkernel == "matern":
+ return (np.log(t)) ** self.d
+ elif self.optkernel == "modified_matern":
+ return (np.log(t)) ** self.d
+
+ def make_safe(self, x):
+ """
+ Make the input dataset numerically stable by removing duplicates?
+ :param x:
+ :return:
+ """
+ self.epsilon = 0.001
+ # remove vectors that are very close to each other
+ return x
+
+ def fit_gp(self, x, y, iterative=False, extrapoint=False):
+ """
+ Fits the Gaussian process, possible update is via iterative inverse
+ :param x: data x
+ :param y: values y
+ :param iterative: iterative inverse, where only last point of x is used
+ :param extrapoint: iterative inverse must be allowed, x is the only addition
+ :return:
+ """
+ # first fit
+ if self.fit == False or iterative == False:
+ if self.safe == True:
+ x = self.make_safe(x)
+
+ self.x = x
+ self.y = y
+ try:
+ self.n, self.d = list(x.size())
+ except:
+ self.n, self.d = x.shape
+ self.K = self.kernel(x, x) + self.s * self.s * torch.eye(
+ self.n, dtype=torch.float64
+ )
+
+ self.fit = True
+ else:
+ # iterative inverse
+ if iterative == True:
+ if extrapoint == False:
+ last_point = self.x[-1, :].view(1, -1)
+ else:
+ last_point = x
+ old_K = self.K
+ old_Kinv = self.Kinv
+ else:
+ pass
+
+ return None
+
+ def beta(self, delta=1e-12, norm=1):
+ beta_value = self.s * norm + torch.sqrt(
+ 2 * torch.log(1.0 / delta + torch.log(torch.det(self.K) / self.s**self.n))
+ )
+ return beta_value
+
+ def execute(self, xtest):
+ if self.fit == True:
+ K_star = self.kernel(self.x, xtest)
+ else:
+ K_star = None
+ K_star_star = self.kernel(xtest, xtest)
+ return (K_star, K_star_star)
+
+ # @check_numpy(1)
+ def mean_var(self, xtest, full=False):
+ """
+ Return posterior mean and variance as tuple
+ :param xtest: grid, numpy array (2D)
+ :param full: Instead of just poinwise variance, full covariance can be outputed (bool)
+ :return: (tensor,tensor)
+ """
+
+ (K_star, K_star_star) = self.execute(xtest)
+
+ if self.fit == False:
+ if full == False:
+
+ x = torch.sum(xtest, dim=1)
+ first = torch.diag(K_star_star).view(-1, 1)
+ variance = first
+ yvar = torch.sqrt(variance)
+ else:
+ first = K_star_star
+ yvar = first
+
+ return (0 * x.view(-1, 1), yvar)
+
+ if self.back_prop == False:
+ decomp = torch.btrifact(self.K.unsqueeze(0))
+ A = torch.btrisolve(self.y.unsqueeze(0), *decomp)[0, :, :]
+ self.B = torch.t(
+ torch.btrisolve(torch.t(K_star).unsqueeze(0), *decomp)[0, :, :]
+ )
+ else:
+ A, _ = torch.gesv(self.y, self.K)
+ self.B = torch.t(torch.gesv(torch.t(K_star), self.K)[0])
+
+ ymean = torch.mm(K_star, A)
+
+ if full == False:
+ first = torch.diag(K_star_star).view(-1, 1)
+ second = torch.einsum("ij,ji->i", (self.B, torch.t(K_star))).view(-1, 1)
+ variance = first - second
+ yvar = torch.sqrt(variance)
+ else:
+ first = K_star_star
+ second = torch.mm(self.B, torch.t(K_star))
+ yvar = first - second
+
+ return (ymean, yvar)
+
+ def sample(self, xtest, size=1):
+ """
+ Samples Path from GP, return a numpy array evaluated over grid
+ :param xtest: grid
+ :param size: number of samples
+ :return: numpy array
+ """
+ nn = list(xtest.size())[0]
+
+ if self.fit == True:
+ (ymean, yvar) = self.mean_var(xtest, full=True)
+ Cov = yvar + self.s * self.s * torch.eye(nn, dtype=torch.float64)
+ L = torch.cholesky(Cov, upper=False)
+ random_vector = torch.normal(
+ mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0
+ )
+ f = ymean + torch.abs(torch.mm(L, random_vector))
+ else:
+ (K_star, K_star_star) = self.execute(xtest)
+ L = torch.cholesky(
+ K_star_star
+ + (10e-10 + self.s * self.s) * torch.eye(nn, dtype=torch.float64),
+ upper=False,
+ )
+ random_vector = torch.normal(
+ mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0
+ )
+ f = self.mu + torch.mm(L, random_vector)
+ return f
+
+ def sample_and_max(self, xtest, size=1):
+ """
+ Samples Path from GP and takes argmax
+ :param xtest: grid
+ :param size: number of samples
+ :return: (argmax, max)
+ """
+ f = self.sample(xtest, size=size)
+ self.temp = f
+ val, index = torch.max(f, dim=0)
+ return (xtest[index, :], val)
diff --git a/stpy/continuous_processes/gauss_procc.py b/stpy/continuous_processes/gauss_procc.py
index d7379ff..e8882e1 100755
--- a/stpy/continuous_processes/gauss_procc.py
+++ b/stpy/continuous_processes/gauss_procc.py
@@ -4,8 +4,6 @@
import scipy as scipy
import torch
from cvxpylayers.torch import CvxpyLayer
-#from functorch import hessian
-import functorch
from pymanopt.manifolds import Euclidean, Stiefel, PSDFixedRank
from torch.autograd import grad
from torchmin import minimize as minimize_torch
@@ -17,1132 +15,1524 @@
class GaussianProcess(Estimator):
- def __init__(self, gamma=1, s=0.001, kappa=1., kernel_name="squared_exponential", diameter=1.0,
- groups=None, bounds=None, nu=1.5, kernel=None, d=1, power=2, lam=1., loss = 'squared', huber_delta = 1.35,
- hyper = 'classical', B = 1., svr_eps = 0.1):
- """
-
- :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel
- :param s: level of noise
- :param kernel: choose from a list
- :param diameter: diameter of the set (deprecated)
- :param groups: additive groups
- :param bounds: bounds for the continuous optimization
- :param v: parameter for matern kernel
- """
-
- ## GP properties
- self.s = s
- self.d = d
- self.x = None
- self.K = np.array([1.0])
- self.mu = 0.0
- self.lam = lam
- self.total_bound = B
- self.prob = 0.5
- self.svr_eps = svr_eps
- self.safe = False
- self.fitted = False
- self.diameter = diameter
- self.bounds = bounds
- self.admits_first_order = False
- self.back_prop = True
- self.loss = loss
- self.huber_delta = huber_delta
- self.hyper = hyper
- self.prepared_log_marginal = False
- self.warm_start_solution = None
- self.max_size = 10000
- ## kernel hyperparameters
- if kernel is not None:
- self.kernel_object = kernel
- self.kernel = kernel.kernel
- self.d = kernel.d
- else:
- self.kernel_object = KernelFunction(kernel_name=kernel_name, gamma=gamma, nu=nu, groups=groups, kappa=kappa,
- power=power, d=d)
- self.kernel = self.kernel_object.kernel
-
- self.gamma = gamma
- self.v = nu
- self.groups = groups
- self.kappa = kappa
- self.custom = kernel
- self.optkernel = kernel_name
-
- def residuals(self,x,y):
- res = (self.mean(x) - y)
- return res
-
- def description(self):
- """
- Description of GP in text
- :return: string with description
- """
- return self.kernel_object.description() + "\nlambda=" + str(self.s)
-
- def embed(self, x):
- return self.kernel_object.embed(x)
-
- def get_basis_size(self):
- return self.kernel_object.get_basis_size()
-
- def make_safe(self, x):
- """
- Make the input dataset numerically stable by removing duplicates?
- :param x:
- :return:
- """
- self.epsilon = 0.001
- # remove vectors that are very close to each other
- return x
-
- def add_data_point(self, x, y, Sigma = None):
-
- if self.x is not None:
- self.x = torch.cat((self.x, x), dim=0)
- self.y = torch.cat((self.y, y), dim=0)
- if Sigma is None:
- self.Sigma = torch.block_diag(self.Sigma, torch.eye(x.size()[0],dtype = torch.double) * self.s)
- else:
- self.x = x
- self.y = y
- self.Sigma = Sigma
- self.fit_gp(self.x, self.y, Sigma = self.Sigma)
-
- def fit(self, x=None, y=None):
- if x is not None:
- self.fit_gp(x,y)
- else:
- self.fit_gp(self.x,self.y)
-
- def lcb(self, xtest):
- """
- Lower confidence bound
- :return:
- """
- mu, s = self.mean_std(xtest)
- return mu - 2 * s
-
- def ucb(self, xtest):
- """
- Upper confidence bound
- :param xtest:
- :return:
- """
- mu, s = self.mean_std(xtest)
- return mu + 2*s
-
- def fit_gp(self, x, y, Sigma = None, iterative=False, extrapoint=False):
- """
- Fits the Gaussian process, possible update is via iterative inverse
- :param x: data x
- :param y: values y
- :param iterative: iterative inverse, where only last point of x is used
- :param extrapoint: iterative inverse must be allowed, x is the only addition
- :return:
- """
- # first fit
- try:
- self.n, self.d = list(x.size())
- except:
- self.n, self.d = x.shape
-
- if Sigma is None:
- self.Sigma = (self.s) * torch.eye(self.n, dtype=torch.float64)
- else:
- self.Sigma = Sigma
-
- if (self.fitted == False or iterative == False):
-
- if self.safe == True:
- x = self.make_safe(x)
-
- self.x = x
- self.y = y
- self.K = self.kernel(x, x) + self.Sigma.T @ self.Sigma
- self.fitted = True
- else:
- # iterative inverse
- if (iterative == True):
- if extrapoint == False:
- last_point = self.x[-1, :].view(1, -1)
- else:
- last_point = x
- old_K = self.K
- old_Kinv = self.Kinv
- else:
- pass
- self.mean_std(x)
- return None
-
- def norm(self):
- if self.fitted:
- val = torch.sqrt(self.A.T @ self.kernel(self.x, self.x) @ self.A)
- return val
- else:
- return None
-
- def beta(self, delta=1e-3, norm=1):
- """
- return concentration parameter given the current estimates
-
- :param delta: failure probability
- :param norm: norm assumption
- :return:
- """
- beta_value = self.s * norm + \
- torch.sqrt(2 * torch.log(1. / delta + torch.log(torch.det(self.K) / self.s ** self.n)))
- return beta_value
-
- def execute(self, xtest):
- """
- Calculates the covariance between data and xtest
- :param xtest:
- :return:
- """
- if self.fitted == True:
- K_star = self.kernel(self.x, xtest)
- else:
- K_star = None
- K_star_star = self.kernel(xtest, xtest)
- return (K_star, K_star_star)
-
- def _huber_fit(self, K_star, newK = None):
- alpha = cp.Variable(self.n)
- self.jitter = 10e-5
- if newK is None:
- K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64)
- else:
- K = newK.detach()
- K = cp.atoms.affine.wraps.psd_wrap(K)
- objective = cp.Minimize(cp.sum(cp.huber((K @ alpha - self.y.view(-1).numpy())/self.s,M = self.huber_delta)) + self.lam * cp.quad_form(alpha, K))
- prob = cp.Problem(objective)
- prob.solve(solver = cp.MOSEK, enforce_dpp = False)
- if K_star is not None:
- return K_star@torch.from_numpy(alpha.value).view(-1,1)
- else:
- return torch.from_numpy(alpha.value).view(-1,1)
-
- def _svr_fit(self, K_star, newK = None):
- alpha = cp.Variable(self.n)
- self.jitter = 10e-5
- if newK is None:
- K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64)
- else:
- K = newK.detach()
-
- K = cp.atoms.affine.wraps.psd_wrap(K)
- objective = cp.Minimize(self.lam * cp.quad_form(alpha, K))
- constraints = [cp.abs(K @ alpha - self.y.view(-1).numpy()) <= self.svr_eps ]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver = cp.MOSEK, enforce_dpp = False)
- if K_star is not None:
- return K_star@torch.from_numpy(alpha.value).view(-1,1)
- else:
- return torch.from_numpy(alpha.value).view(-1,1)
-
-
- def _unif_fit(self, K_star, newK = None):
- alpha = cp.Variable((self.n,1))
- self.jitter = 10e-5
- if newK is None:
- K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64)
- else:
- K = newK.detach()
-
- K = cp.atoms.affine.wraps.psd_wrap(K)
- con = 2*self.total_bound*self.prob/((1-self.prob)*np.sqrt(2*np.pi*self.s**2))
- objective = cp.Minimize(cp.sum(cp.logistic(cp.square(
- (K @ alpha - self.y.view(-1, 1).numpy())/ (np.sqrt(2)*self.s)) + np.log(con) )) + self.lam * cp.quad_form(alpha, K))
- prob = cp.Problem(objective)
- prob.solve(solver = cp.MOSEK, enforce_dpp = False)
- if K_star is not None:
- return K_star@torch.from_numpy(alpha.value).view(-1,1)
- else:
- return torch.from_numpy(alpha.value).view(-1,1)
-
- def _unif_fit_torch(self, K_star, newK = None, warm_start = None):
- self.jitter = 10e-5
- if newK is None:
- K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64)
- else:
- K = newK.detach()
-
- con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2))
- unif = lambda alpha: torch.sum(torch.log(torch.exp( ((K@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ) + 1 ) ) \
- + self.lam * alpha @ K@ alpha
- if warm_start is None:
- x_init = torch.zeros(size = (self.n,1)).view(-1).double()
- else:
- x_init = warm_start.view(-1)
-
- res = minimize_torch(unif, x_init, method='l-bfgs', tol=1e-3, disp=0,
- options={'max_iter': 200, 'gtol': 1e-3})
- alpha = res.x
-
- if K_star is not None:
- return K_star @ alpha.view(-1, 1)
- else:
- return alpha.view(-1, 1)
-
- def _huber_fit_torch(self, K_star, newK = None):
- self.jitter = 10e-5
- if newK is None:
- K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64)
- else:
- K = newK
- L = torch.linalg.cholesky(K)
-
- huber = lambda beta: torch.nn.functional.huber_loss(L @ beta / self.s, self.y.view(-1) / self.s,
- reduction='sum',
- delta=self.huber_delta) + self.lam * beta @ beta
- #x_init = torch.linalg.solve(L.T@L+torch.eye(self.n).double()*self.s**2*self.lam, self.y)
- x_init = torch.zeros(size = (self.n,1)).view(-1).double()
- res = minimize_torch(huber, x_init, method='l-bfgs', tol=1e-4, disp=0,
- options={'max_iter': 10**3, 'gtol': 1e-4})
- alpha = torch.linalg.solve(L,res.x)
- if K_star is not None:
- return K_star @ alpha.view(-1, 1)
- else:
- return alpha.view(-1,1)
-
- def mean_std(self, xtest, full=False, reuse=False):
- if xtest.size()[0] 0:
- mu[xtest.size()[0] - xtest.size()[0] % stepby:], std[
- xtest.size()[0] - xtest.size()[0] % stepby:] = self.mean_std_sub(
- xtest[xtest.size()[0] - xtest.size()[0] % stepby:, :], reuse=True)
-
- return mu, std
-
- def mean_std_sub(self, xtest, full=False, reuse=False):
- """
- Return posterior mean and variance as tuple
- :param xtest: grid, numpy array (2D)
- :param full: Instead of just poinwise variance, full covariance can be outputed (bool)
- :return: (tensor,tensor)
- """
- if full:
- (K_star, K_star_star) = self.execute(xtest)
- else:
- K_star = self.kernel(self.x, xtest)
- diag_K_star_star = torch.hstack([self.kernel(xtest[i,:].view(1,-1),xtest[i,:].view(1,-1)).view(1) for i in range(xtest.size()[0])])
-
- if self.fitted == False:
- # the process is not fitted
-
- if full == False:
- x = torch.sum(xtest, dim=1)
- #first = torch.diag(K_star_star).view(-1, 1)
- first = diag_K_star_star.view(-1,1)
- variance = first
- yvar = torch.sqrt(variance)
- else:
- x = torch.sum(xtest, dim=1)
- first = K_star_star
- yvar = first
-
- return (0 * x.view(-1, 1), yvar)
-
- else:
-
- if self.back_prop == False:
- if reuse == False:
- #self.decomp = torch.lu(self.K.unsqueeze(0))
- self.LU, self.pivot = torch.linalg.lu_factor(self.K.unsqueeze(0))
- #self.A = torch.lu_solve(self.y.unsqueeze(0), *self.decomp)[0, :, :]
- self.A = torch.linalg.lu_solve(self.LU, self.pivot, self.y.unsqueeze(0))[0,:,:]
- self.B = torch.t(torch.linalg.lu_solve(self.LU, self.pivot ,torch.t(K_star).unsqueeze(0))[0, :, :])
- else:
- if reuse == False:
- self.A = torch.linalg.lstsq(self.K, self.y)[0]
- #self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star)))
- self.B = torch.t(torch.linalg.lstsq(self.K, torch.t(K_star))[0])
-
- if self.loss == "squared":
- ymean = torch.mm(K_star, self.A)
- elif self.loss == "huber":
- ymean = self._huber_fit(K_star)
- elif self.loss == "svr":
- ymean = self._svr_fit(K_star)
- elif self.loss == "unif" or self.loss == "unif_new":
- ymean = self._unif_fit_torch(K_star)
- else:
- raise AssertionError("Loss function not implemented.")
-
- if full == False:
- first = diag_K_star_star.view(-1,1)
- second = torch.einsum('ij,ji->i', (self.B, torch.t(K_star))).view(-1, 1)
- variance = first - second
- yvar = torch.sqrt(variance)
- else:
- first = K_star_star
- second = torch.mm(self.B, torch.t(K_star))
- yvar = first - second
-
- return (ymean, yvar)
-
- def mean(self, xtest):
- """
- Calculates the mean prediction over a specific input space
- :param xtest: input
- :return:
- """
- K_star = self.kernel(self.x, xtest)
-
- if self.loss == "squared":
- ymean = torch.mm(K_star, self.A)
- elif self.loss == "huber":
- ymean = self._huber_fit(K_star)
- else:
- raise AssertionError("Loss function not implemented.")
-
- return ymean
-
- def gradient_mean_var(self, point, hessian=True):
- """
- Can calculate gradient at single point atm.
-
- :param point:
- :return:
- """
-
- # mean
- point.requires_grad_(True)
- mu = self.mean_std(point)[0]
- nabla_mu = grad(mu, point, create_graph=True)[0][0]
-
- if hessian == True:
- # variance
- H = self.kernel_object.get_2_der(point)
- C = self.kernel_object.get_1_der(point, self.x)
-
- V = H - torch.t(C) @ self.K @ C
-
- return [nabla_mu, V]
- else:
- return nabla_mu
-
- def mean_gradient_hessian(self, xtest, hessian=False):
- hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64)
- xtest.requires_grad_(True)
- # xtest.retain_grad()
- mu = self.mean_std(xtest)[0]
- # mu.backward(retain_graph=True)
-
- # nabla_mu = xtest.grad
- nabla_mu = grad(mu, xtest, create_graph=True)[0][0]
-
- if hessian == False:
- return nabla_mu
- else:
- for i in range(self.d):
- hessian_mu[i, :] = grad(nabla_mu[i], xtest, create_graph=True, retain_graph=True)[0][0]
- return [nabla_mu, hessian_mu]
-
- def sample(self, xtest, size=1, jitter=10e-8):
- """
- Samples Path from GP, return a numpy array evaluated over grid
- :param xtest: grid
- :param size: number of samples
- :return: numpy array
- """
- nn = list(xtest.size())[0]
-
- if self.fitted == True:
- (ymean, yvar) = self.mean_std(xtest, full=True)
- Cov = yvar + 10e-10 * torch.eye(nn, dtype=torch.float64)
- L = torch.linalg.cholesky(Cov)
- # L = torch.from_numpy(np.linalg.cholesky(Cov.numpy()))
- random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.)
- f = ymean + torch.mm(L, random_vector)
- else:
- (K_star, K_star_star) = self.execute(xtest)
- L = torch.linalg.cholesky(K_star_star + jitter * torch.eye(nn, dtype=torch.float64))
- random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.)
- f = self.mu + torch.mm(L, random_vector)
- return f
-
- def sample_and_max(self, xtest, size=1):
- """
- Samples Path from GP and takes argmax
- :param xtest: grid
- :param size: number of samples
- :return: (argmax, max)
- """
- f = self.sample(xtest, size=size)
- self.temp = f
- val, index = torch.max(f, dim=0)
- return (xtest[index, :], val)
-
-
- def log_marginal(self, kernel, X, weight):
-
- if self.loss == "squared":
- return self._log_marginal_squared(kernel, X, weight)
- elif self.loss == "unif_new":
- return self._log_marginal_unif(kernel, X, weight)
- else:
- return self._log_marginal_map(kernel, X, weight)
-
- def _log_marginal_unif(self,kernel,X,weight):
- if not self.prepared_log_marginal:
- self._prepare_log_marginal_unif()
-
- func = kernel.get_kernel()
- self.jitter = 10e-4
- K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.jitter
- #print ("Kernel")
- #print (K)
- L = torch.linalg.cholesky(K)
- self.L_unif.value = (L.data.numpy())
-
- self.prob_unif.solve(solver=cp.MOSEK, enforce_dpp=False, warm_start=True)
-
- solution = torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double()
- solution.data = torch.from_numpy(self.beta_unif.value)
- con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2))
-
- loglikelihood = lambda beta: torch.sum(torch.log(torch.exp( ((L@beta-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ) + 1 ) ) \
- + self.lam * beta.T @ beta
-
- H = hessian(loglikelihood)(solution)
- logdet = - 0.5* torch.slogdet(H)[1] * weight
- logprob = -0.5* loglikelihood(solution) + logdet
- logprob = -logprob
- return logprob
-
- def _prepare_log_marginal_unif(self):
-
- self.beta_unif = cp.Variable(self.n)
- self.L_unif = cp.Parameter((self.n, self.n))
-
- con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2))
- #self.objective_unif = cp.Minimize(cp.sum(cp.logistic(cp.square(
- # (self.K_unif @ self.alpha_unif - self.y.view(-1).numpy()) / (np.sqrt(2) * self.s)) + np.log(con))) + self.lam * cp.quad_form(
- # self.alpha_unif, self.L))
- self.objective_unif = cp.Minimize(cp.sum(cp.logistic(cp.square(
- (self.L_unif @ self.beta_unif - self.y.view(-1).numpy()) / (np.sqrt(2) * self.s)) + np.log(con))) + self.lam * cp.sum_squares(self.beta_unif))
- self.prob_unif = cp.Problem(self.objective_unif)
- self.prepared_log_marginal = True
-
- def _prepare_log_marginal_huber(self):
- beta = cp.Variable(self.n)
- L = cp.Parameter((self.n, self.n))
-
- objective = cp.Minimize(cp.sum(
- cp.huber((L @ beta - self.y.view(-1).numpy()) / self.s, M=self.huber_delta)) + self.lam * cp.sum_squares(
- beta))
-
- prob = cp.Problem(objective)
- cvxpylayer = CvxpyLayer(prob, parameters=[L], variables=[beta])
- self.prepared_log_marginal = True
- print ("cvxpy-layer has been initialized.")
- return cvxpylayer
-
- def _log_marginal_huber_cvxpy(self, kernel, X, weight):
- func = kernel.get_kernel()
- self.jitter = 10e-4
- L_tch = torch.linalg.cholesky(func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.jitter)
-
- if not self.prepared_log_marginal:
- self._cvxpylayer = self._prepare_log_marginal_huber()
- solution = self._cvxpylayer(L_tch)[0]
-
- huber = lambda beta: torch.nn.functional.huber_loss(L_tch@beta/self.s,self.y.view(-1)/self.s,reduction='sum',delta = self.huber_delta) + self.lam * beta.T @ beta
- H = torch.autograd.functional.hessian(huber, solution)
-
- logdet = - 0.5* torch.slogdet(H)[1]* weight
- logprob = -0.5* huber(solution) +logdet
- logprob = -logprob
- return logprob
-
-
- def _log_marginal_map(self, kernel, X, weight):
- # this implementation uses Danskin theorem to simplify gradient propagation
- func = kernel.get_kernel()
- self.jitter = 10e-4
- K_tch =func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.jitter
-
- # solve
- solution = torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double()
- if self.warm_start_solution is None:
- self.warm_start_solution = solution.clone()
-
- if self.loss == "huber":
- alpha = self._huber_fit(None, newK = K_tch).detach()
- loglikelihood = lambda alpha: torch.nn.functional.huber_loss(K_tch@alpha/self.s,self.y.view(-1)/self.s,
- reduction='sum',delta = self.huber_delta) + self.lam * alpha.T @K_tch@ alpha
-
- solution.data = alpha.reshape(-1).data
- self.warm_start_solution.data = solution.data
- mask = torch.abs(K_tch @ alpha - self.y)/self.s self.svr_eps).int()) \
- + self.lam * alpha.T @K_tch@ alpha
-
- solution.data = alpha.reshape(-1).data
- self.warm_start_solution.data = solution.data
- H = torch.autograd.functional.hessian(loglikelihood, solution)
-
- elif self.loss == "unif":
- alpha = self._unif_fit_torch(None, newK=K_tch).detach()
- con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2))
-
-
- loglikelihood = lambda alpha: torch.sum(torch.log(torch.exp( ((K_tch@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ) + 1 ) ) \
- + self.lam * alpha @ K_tch@ alpha
- #v = lambda alpha : torch.sum(torch.exp( ((K_tch@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ))
- solution.data = alpha.reshape(-1).data
- self.warm_start_solution.data = solution.data
- H = hessian(loglikelihood)(solution)
-
- logdet = - 0.5* torch.slogdet(H)[1] * weight
- logprob = -0.5* loglikelihood(solution) + logdet
- logprob = -logprob
- return logprob
-
-
-
- def _log_marginal_squared(self, kernel, X, weight):
- func = kernel.get_kernel()
- K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.s * self.s
- logdet = -0.5 * torch.slogdet(K)[1] * weight
- alpha = torch.linalg.solve(K, self.y)
- logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet
- logprob = -logprob
- return logprob
-
- def optimize_params(self, type='bandwidth', restarts=10, regularizer=None,
- maxiter=1000, mingradnorm=1e-4, verbose=False, optimizer="pymanopt", scale=1., weight=1., save = False,
- save_name = 'model.np', init_func = None, bounds = None, parallel = False, cores = None):
-
- # Spectral norm regularizer
- if regularizer is not None:
- if regularizer[0] == "spectral_norm":
- regularizer_func = lambda S: regularizer[1] * torch.norm(1/S[0], p='nuc')
- elif regularizer[0] == 'lasso':
- regularizer_func = lambda S: regularizer[1] * torch.norm(1/S[0], p=1)
- else:
- regularizer_func = None
- else:
- regularizer_func = None
-
- if type == "bandwidth":
- params = {}
- for key, dict2 in self.kernel_object.params_dict.items():
- if 'gamma' in dict2.keys():
- params[key] = {'gamma': (init_func, Euclidean(1), bounds)}
- elif 'ard_gamma' in dict2.keys():
- params[key] = {'ard_gamma': (init_func, Euclidean(len(dict2['group'])), bounds)}
-
- elif type == "bandwidth+noise":
- params = {}
- init_func_noise = lambda x: self.s
- for key, dict2 in self.kernel_object.params_dict.items():
-
- if 'gamma' in dict2.keys():
- params[key] = {'gamma': (init_func, Euclidean(1), bounds)}
-
- elif 'ard_gamma' in dict2.keys():
- params[key] = {'ard_gamma': (init_func, Euclidean(len(dict2['group'])), bounds)}
-
- params['likelihood'] = {'sigma':(init_func_noise, Euclidean(1), None )}
-
- elif type == "rots":
- params = {}
- d = int(self.kernel_object.d)
- for key, dict2 in self.kernel_object.params_dict.items():
- if 'rot' in dict2.keys():
- params[key] = {'rot': (None, Stiefel(d, d), None)}
- elif type == "groups":
- params = {}
- optimizer = "discrete"
- d = self.kernel_object.d
- for key, dict2 in self.kernel_object.params_dict.items():
- if 'groups' in dict2.keys():
- params[key] = {'groups': (None, helper.generate_groups(d), None)}
- pass
- elif type == "covariance":
- params = {}
- d = int(self.kernel_object.d)
- for key, dict2 in self.kernel_object.params_dict.items():
- if 'cov' in dict2.keys():
- params[key] = {'cov': (None, PSDFixedRank(d, d), None)}
- else:
- raise AttributeError("This quick-optimization is not implemented.")
-
- self.optimize_params_general(params=params, restarts=restarts,
- optimizer=optimizer, regularizer_func=regularizer_func,
- maxiter=maxiter, mingradnorm=mingradnorm, verbose=verbose, scale=scale,
- weight=weight, save = save, save_name = save_name, parallel = parallel, cores = cores)
-
- def log_probability(self, xtest, sample):
- from scipy.stats import multivariate_normal
- mu, covar = self.mean_std(xtest, full=True)
- p = np.log(multivariate_normal.pdf(sample.view(-1).numpy(), mean=mu.view(-1).numpy(), cov=covar.numpy()))
- return p
-
- def volume_mean_cvxpy(self, xtest, weights=None, eps=10e-2,
- tol=10e-14, max_weight=1, max_iter=1000,
- verbose=False, scale=10e-4, slope=1.,
- bisections=10, B='auto', optimal_scale=None,
- optimize_scale=False, relax='relu'):
-
- n = self.x.size()[0]
- K = self.get_kernel() # (self.x, self.x)
- Kinv = torch.pinverse(K + eps * torch.eye(K.size()[0]).double()).numpy()
- if weights is None:
- weights = torch.ones(self.x.size()[0]) / n
- if B == 'auto':
- alpha, _ = torch.lstsq(self.y, K)
- beta = K @ alpha
- B = beta.T @ Kinv @ beta
- print("Auto:B", B)
-
- def fun(scale_arg):
- beta = cp.Variable(n)
- if relax == 'relu':
- loss_fn_transformed = cp.sum(cp.pos(weights * slope * (
- cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_arg * cp.quad_form(beta,
- Kinv)
- elif relax == 'log':
- loss_fn_transformed = cp.sum(cp.logistic(weights * slope * (
- cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_arg * cp.quad_form(beta,
- Kinv)
-
- # loss_fn_transformed = cp.sum(weights*logit(slope*(cp.abs(beta - self.y.numpy().reshape(-1)) -eps))) + 0.5*scale_arg*cp.quad_form(beta, Kinv)-
-
- prob = cp.Problem(cp.Minimize(loss_fn_transformed))
- # prob.solve(solver=cp.MOSEK, feastol=tol, verbose=False)
- prob.solve(solver=cp.MOSEK, verbose=False)
- if verbose == True:
- print("scale:", scale_arg, "cond:", np.linalg.cond(Kinv), "sub.", beta.value.T @ Kinv @ beta.value - B,
- "B:", B)
- return beta.value.T @ Kinv @ beta.value - B
-
- if optimize_scale:
- return helper.bisection(fun, 0., max_weight, bisections)
-
- if optimal_scale is None:
- scale_star = helper.bisection(fun, 0., max_weight, bisections)
- else:
- scale_star = optimal_scale
-
- beta = cp.Variable(n)
- if relax == 'relu':
- loss_fn_transformed = cp.sum(weights * cp.pos(
- slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_star * cp.quad_form(beta,
- Kinv)
- elif relax == 'log':
- loss_fn_transformed = cp.sum(weights * cp.logistic(
- slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_star * cp.quad_form(beta,
- Kinv)
- prob = cp.Problem(cp.Minimize(loss_fn_transformed))
- # prob.solve(solver=cp.CVXOPT, feastol=tol, verbose=verbose)
- prob.solve(solver=cp.MOSEK, verbose=verbose)
- beta_torch = torch.from_numpy(beta.value).view(-1, 1)
- alpha = torch.from_numpy(Kinv) @ beta_torch
- ytest = self.kernel(self.x, xtest) @ alpha
- return ytest
-
- def volume_mean(self, xtest, weights=None, eps=10e-2, tol=10e-6, max_iter=1000, verbose=False, eta_start=0.01,
- eta_decrease=0.9, scale=1, slope=1., warm=True, relax='relu', norm=False, B='auto'):
- self.scale = scale
- self.relax = relax
-
- K = self.get_kernel() # (self.x, self.x)
- Kinv = torch.pinverse(K)
-
- if weights is None:
- weights = torch.ones(self.x.size()[0])
- else:
- weights[weights < 10e-6] = 0. # * self.x.size()[0]
- weights = weights.view(-1)
- if warm == True:
- # warm start with L2 fit
- alpha, _ = torch.lstsq(self.y, K)
- beta = K @ alpha
- else:
- beta = torch.randn(size=(self.n, 1)).double() # .requires_grad_(True)*0
-
- # loss_fn_original = lambda alpha: torch.sum(torch.relu(torch.abs(K @ alpha - self.y) -eps)) + 0.5*self.s * alpha.T @ K @ alpha
- if self.relax == "relu":
- loss_fn_transformed = lambda beta: torch.sum(
- torch.relu(torch.abs(beta - self.y) - eps)) + self.scale * 0.5 * self.s * beta.T @ Kinv @ beta
-
- elif self.relax == "tanh":
- self.slope = slope
- tanh = lambda x: (torch.tanh(self.slope * x) + 1) * 0.5
- loss_fn_transformed = lambda beta: torch.sum(weights * tanh(torch.abs(beta - self.y) - eps).view(
- -1)) + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta
-
- elif self.relax == "elu":
- self.slope = slope
- elu = lambda x: torch.nn.elu(x, alpha=self.slope)
- loss_fn_transformed = lambda beta: torch.sum(
- elu(torch.abs(beta - self.y) - eps)) + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta
-
- elif self.relax == "relu":
- return self.volume_mean_cvxpy(xtest, weights=weights, eps=eps, scale=scale, tol=tol)
- else:
- raise AssertionError("Unkown relaxation.")
-
- current_loss = 10e10
- eta = eta_start
- for i in range(max_iter):
- grad = self.s * (Kinv @ beta)
- beta = self.proximal(beta, grad, eta, eps, weights)
- past_loss = current_loss
- current_loss = loss_fn_transformed(beta)
- if current_loss > past_loss:
- eta = eta * eta_decrease
- elif np.abs(current_loss - past_loss) < tol:
- break
-
- # print (i, beta.T)
- if verbose == True:
- print(i, loss_fn_transformed(beta), eta)
-
- print("final norm:", beta.T @ Kinv @ beta)
-
- # alpha = torch.inverse(self.K) @ beta
- alpha = torch.pinverse(K) @ beta
- # alpha = torch.lstsq(K,beta)
- ytest = self.kernel(self.x, xtest) @ alpha
- # max = torch.max(torch.abs(beta - self.y))
- if norm == True:
- return beta.T @ Kinv @ beta
- # yz = self.kernel(self.x, self.x) @ alpha
- # approx_v = torch.sum(torch.relu(torch.abs(beta - self.y) -eps))/max
- # approx_p = approx_v/self.n
- # mask = (torch.abs(yz[:,0] - self.y[:,0])) > eps
- # approx_p = float(torch.sum(mask))/float(self.n)
- return ytest # ,approx_p
-
- def volume_mean_norm(self, xtest, weights=None, eps=10e-2, tol=10e-6, max_iter=1000, verbose=False, eta_start=0.01,
- eta_decrease=0.9, scale=1, slope=1., warm=True, relax='relu', B='auto'):
- K = self.kernel(self.x, self.x)
- Kinv = torch.pinverse(K)
- if B == 'auto':
- alpha, _ = torch.lstsq(self.y, self.K)
- beta = K @ alpha
- B = beta.T @ Kinv @ beta
-
- func = lambda s: self.volume_mean(xtest, weights=weights, eps=eps, tol=tol, max_iter=max_iter, verbose=verbose,
- eta_start=eta_start,
- eta_decrease=eta_decrease, scale=s, slope=slope, warm=warm, relax=relax,
- norm=True) - B
-
- s_star = stpy.optim.custom_optimizers.bisection(func, 0., 1000., 10)
-
- return self.volume_mean(xtest, weights=weights, eps=eps, tol=tol, max_iter=max_iter, verbose=verbose,
- eta_start=eta_start,
- eta_decrease=eta_decrease, scale=s_star, slope=slope, warm=warm, relax=relax,
- norm=False)
-
- def proximal(self, beta, nabla, eta, eps, weights):
- res = beta
- for i in range(self.n):
- from scipy.optimize import minimize
-
- b = float(beta[i, :])
- y = float(self.y[i, :])
- g = float(nabla[i, :])
- w = float(weights[i])
- # s = float(self.s)
-
- tanh = lambda x: (np.tanh(self.slope * x) + 1) * 0.5
- elu = lambda x: torch.elu(x, alpha=self.slope).numpy()
-
- if self.relax == "relu":
- loss_reg = lambda x: w * np.maximum(0, np.abs(x - y) - eps)
- elif self.relax == "tanh":
- loss_reg = lambda x: w * tanh(np.abs(x - y) - eps)
- elif self.relax == "elu":
- loss_reg = lambda x: w * elu(np.abs(x - y) - eps)
- else:
- raise AssertionError("Unkown relaxation.")
-
- loss_scalar = lambda x: ((1 / (2. * eta)) * (x - (b - eta * g)) ** 2) + loss_reg(x)
-
- x0 = np.array([0.])
- # print (minimize(loss_scalar,x0,method ='nelder-mead').x)
- res[i, :] = float(minimize(loss_scalar, x0, method='nelder-mead').x)
- return res
-
- def get_lambdas(self, beta, mean=False):
- """
- Gets lambda function to evaluate acquisiton function and its derivative
- :param beta: beta in GP-UCB
- :return: [lambda,lambda]
- """
- mean = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[0][0][0]
- sigma = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[1][0][0]
-
- if mean == True:
- return [mean, sigma]
- else:
- fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x))
- grad = lambda x: -complex_step_derivative(fun, 1e-10, x.reshape(1, -1))
-
- return [fun, grad]
-
- def get_kernel(self):
- return self.K
-
- def ucb_optimize(self, beta, multistart=25, lcb=False):
- """
- Optimizes UCB acquisiton function and return next point and its value as output
- :param beta: beta from GP UCB
- :param multistart: number of starts
- :return: (next_point, value at next_point)
- """
-
- mean = lambda x: self.mean_std(x, reuse=True)[0][0][0]
- sigma = lambda x: self.mean_std(x, reuse=True)[1][0][0]
-
- ucb = lambda x: torch.dot(torch.Tensor([1.0, np.sqrt(beta)]), torch.Tensor(
- [self.mean_std(x, reuse=True)[0][0][0], self.mean_std(x, reuse=True)[1][0][0]]))
- lcb = lambda x: torch.dot(torch.Tensor([1.0, np.sqrt(beta)]), torch.Tensor(
- [self.mean_std(x, reuse=True)[0][0][0], -self.mean_std(x, reuse=True)[1][0][0]]))
-
- if lcb == False:
- fun2 = lambda x: -ucb(torch.from_numpy(x).view(1, -1)).numpy()
- else:
- fun2 = lambda x: -lcb(torch.from_numpy(x).view(1, -1)).numpy()
- fun = lambda x: -(
- mean(torch.from_numpy(x).view(1, -1)) + np.sqrt(beta) * sigma(torch.from_numpy(x).view(1, -1)))
-
- self.back_prop = False
- self.mean_std(self.x)
-
- mybounds = self.bounds
-
- results = []
-
- from scipy.optimize import minimize
-
- for i in range(multistart):
- x0 = np.random.randn(self.d)
- for i in range(self.d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
-
- res = minimize(fun2, x0, method="L-BFGS-B", jac=None, tol=0.000001, bounds=mybounds)
- solution = res.x
- results.append([solution, -fun(solution)])
-
- results = np.array(results)
- index = np.argmax(results[:, 1])
- solution = results[index, 0]
-
- return (torch.from_numpy(solution), -fun(solution))
-
- def isin(self, xnext):
- self.epsilon = 0.001
- for v in self.x:
- if torch.norm(v - xnext, p=2) < self.epsilon:
- return True
-
- def sample_and_condition(self, x):
- xprobe = x.view(1, -1)
- fprobe = self.sample(xprobe)
- if not self.isin(xprobe):
- self.x = torch.cat((self.x, xprobe), dim=0)
- self.y = torch.cat((self.y, fprobe), dim=0)
- self.fit_gp(self.x, self.y)
- return -fprobe
-
- def get_lambdas_TH(self):
- fun = lambda x: self.sample_and_condition(x)
- grad = None
- return [fun, grad]
-
- def sample_iteratively_max(self, xtest, multistart=20, minimizer="coordinate-wise", grid=100):
- """
- Samples Path from GP and takes the maximum iteratively
- :param xtest: grid
- :param size: number of samples
- :return: numpy array
- """
- # print ("Iterative:",multistart,minimizer,grid)
- from scipy.optimize import minimize
- # old stuff
- xold = self.x
- yold = self.y
-
- # with fixed grid
- if xtest is not None:
- # number of samples
- nn = xtest.shape[0]
-
- f = torch.zeros(nn, dtype=torch.float64)
-
- for j in range(nn):
- xprobe = xtest[j, :].view(1, -1)
- (K_star, K_star_star) = self.execute(xprobe)
- (ymean, yvar) = self.mean_std(xprobe)
- L = torch.sqrt(K_star_star + self.s * self.s * torch.eye(1, dtype=torch.float64) - yvar)
- fprobe = ymean + L * torch.randn(1, dtype=torch.float64)
- # add x and fprobe to the dataset and redo the whole
- f[j] = fprobe
- if not self.isin(xprobe):
- self.x = torch.cat((self.x, xprobe), dim=0)
- self.y = torch.cat((self.y, fprobe), dim=0)
-
- self.fit_gp(self.x, self.y)
-
- val, index = torch.max(f, dim=0)
- self.fit_gp(xold, yold)
- return (xtest[index, :], f[index])
-
- else:
- # Iterative without grid
-
- # get bounds
- if self.bounds == None:
- mybounds = tuple([(-self.diameter, self.diameter) for i in range(self.d)])
- else:
- mybounds = self.bounds
- [fun, grad] = self.get_lambdas_TH()
-
- results = []
- for j in range(multistart):
-
- # print ("Multistart:",j)
- x0 = torch.randn(self.d, dtype=torch.float64)
- for i in range(self.d):
- x0[i].uniform_(mybounds[i][0], mybounds[i][1])
-
- # simple coordnate-wise optimization
- if minimizer == "coordinate-wise":
- solution = x0
- for i in range(self.d):
- xtest = torch.from_numpy(np.tile(x0, (grid, 1)))
- xtest[:, i] = torch.linspace(mybounds[i][0], mybounds[i][1], grid)
- sample = self.sample(xtest)
-
- ## Add to the posterior
- self.x = torch.cat((self.x, xtest), dim=0)
- self.y = torch.cat((self.y, sample), dim=0)
-
- # argmax
- val, index = torch.max(sample, dim=0)
- out = xtest[index, :]
-
- # fit new GP
- self.fit_gp(self.x, self.y)
- solution[i] = out[0, i]
-
- elif minimizer == "L-BFGS-B":
- solution = np.random.randn(self.d)
- xmax = [b[1] for b in mybounds]
- xmin = [b[0] for b in mybounds]
- bounds = MyBounds(xmax=xmax, xmin=xmin)
- func = lambda x: fun(torch.from_numpy(x)).numpy()[0][0]
- res = scipy.optimize.basinhopping(func, solution, disp=False, niter=grid, accept_test=bounds)
- solution = torch.from_numpy(res.x)
-
- else:
- raise AssertionError("Wrong optimizer selected.")
-
- results.append(torch.cat((solution, -fun(solution)[0])))
- self.x = xold
- self.y = yold
- self.fit_gp(self.x, self.y)
-
- results = torch.stack(results)
- val, index = torch.max(results[:, -1], dim=0)
- solution = results[index, 0:self.d].view(1, self.d)
- self.x = xold
- self.y = yold
- self.fit_gp(self.x, self.y)
-
- return (solution, -fun(solution))
+ def __init__(
+ self,
+ gamma=1,
+ s=0.001,
+ kappa=1.0,
+ kernel_name="squared_exponential",
+ diameter=1.0,
+ groups=None,
+ bounds=None,
+ nu=1.5,
+ kernel=None,
+ d=1,
+ power=2,
+ lam=1.0,
+ loss="squared",
+ huber_delta=1.35,
+ hyper="classical",
+ B=1.0,
+ svr_eps=0.1,
+ ):
+ """
+
+ :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel
+ :param s: level of noise
+ :param kernel: choose from a list
+ :param diameter: diameter of the set (deprecated)
+ :param groups: additive groups
+ :param bounds: bounds for the continuous optimization
+ :param v: parameter for matern kernel
+ """
+
+ ## GP properties
+ self.s = s
+ self.d = d
+ self.x = None
+ self.K = np.array([1.0])
+ self.mu = 0.0
+ self.lam = lam
+ self.total_bound = B
+ self.prob = 0.5
+ self.svr_eps = svr_eps
+ self.safe = False
+ self.fitted = False
+ self.diameter = diameter
+ self.bounds = bounds
+ self.admits_first_order = False
+ self.back_prop = True
+ self.loss = loss
+ self.huber_delta = huber_delta
+ self.hyper = hyper
+ self.prepared_log_marginal = False
+ self.warm_start_solution = None
+ self.max_size = 10000
+ ## kernel hyperparameters
+ if kernel is not None:
+ self.kernel_object = kernel
+ self.kernel = kernel.kernel
+ self.d = kernel.d
+ else:
+ self.kernel_object = KernelFunction(
+ kernel_name=kernel_name,
+ gamma=gamma,
+ nu=nu,
+ groups=groups,
+ kappa=kappa,
+ power=power,
+ d=d,
+ )
+ self.kernel = self.kernel_object.kernel
+
+ self.gamma = gamma
+ self.v = nu
+ self.groups = groups
+ self.kappa = kappa
+ self.custom = kernel
+ self.optkernel = kernel_name
+
+ def residuals(self, x, y):
+ res = self.mean(x) - y
+ return res
+
+ def description(self):
+ """
+ Description of GP in text
+ :return: string with description
+ """
+ return self.kernel_object.description() + "\nlambda=" + str(self.s)
+
+ def embed(self, x):
+ return self.kernel_object.embed(x)
+
+ def get_basis_size(self):
+ return self.kernel_object.get_basis_size()
+
+ def make_safe(self, x):
+ """
+ Make the input dataset numerically stable by removing duplicates?
+ :param x:
+ :return:
+ """
+ self.epsilon = 0.001
+ # remove vectors that are very close to each other
+ return x
+
+ def add_data_point(self, x, y, Sigma=None):
+
+ if self.x is not None:
+ self.x = torch.cat((self.x, x), dim=0)
+ self.y = torch.cat((self.y, y), dim=0)
+ if Sigma is None:
+ self.Sigma = torch.block_diag(
+ self.Sigma, torch.eye(x.size()[0], dtype=torch.double) * self.s
+ )
+ else:
+ self.x = x
+ self.y = y
+ self.Sigma = Sigma
+ self.fit_gp(self.x, self.y, Sigma=self.Sigma)
+
+ def fit(self, x=None, y=None):
+ if x is not None:
+ self.fit_gp(x, y)
+ else:
+ self.fit_gp(self.x, self.y)
+
+ def lcb(self, xtest):
+ """
+ Lower confidence bound
+ :return:
+ """
+ mu, s = self.mean_std(xtest)
+ return mu - 2 * s
+
+ def ucb(self, xtest):
+ """
+ Upper confidence bound
+ :param xtest:
+ :return:
+ """
+ mu, s = self.mean_std(xtest)
+ return mu + 2 * s
+
+ def fit_gp(self, x, y, Sigma=None, iterative=False, extrapoint=False):
+ """
+ Fits the Gaussian process, possible update is via iterative inverse
+ :param x: data x
+ :param y: values y
+ :param iterative: iterative inverse, where only last point of x is used
+ :param extrapoint: iterative inverse must be allowed, x is the only addition
+ :return:
+ """
+ # first fit
+ try:
+ self.n, self.d = list(x.size())
+ except:
+ self.n, self.d = x.shape
+
+ if Sigma is None:
+ self.Sigma = (self.s) * torch.eye(self.n, dtype=torch.float64)
+ else:
+ self.Sigma = Sigma
+
+ if self.fitted == False or iterative == False:
+
+ if self.safe == True:
+ x = self.make_safe(x)
+
+ self.x = x
+ self.y = y
+ self.K = self.kernel(x, x) + self.Sigma.T @ self.Sigma
+ self.fitted = True
+ else:
+ # iterative inverse
+ if iterative == True:
+ if extrapoint == False:
+ last_point = self.x[-1, :].view(1, -1)
+ else:
+ last_point = x
+ old_K = self.K
+ old_Kinv = self.Kinv
+ else:
+ pass
+ self.mean_std(x)
+ return None
+
+ def norm(self):
+ if self.fitted:
+ val = torch.sqrt(self.A.T @ self.kernel(self.x, self.x) @ self.A)
+ return val
+ else:
+ return None
+
+ def beta(self, delta=1e-3, norm=1):
+ """
+ return concentration parameter given the current estimates
+
+ :param delta: failure probability
+ :param norm: norm assumption
+ :return:
+ """
+ beta_value = self.s * norm + torch.sqrt(
+ 2 * torch.log(1.0 / delta + torch.log(torch.det(self.K) / self.s**self.n))
+ )
+ return beta_value
+
+ def execute(self, xtest):
+ """
+ Calculates the covariance between data and xtest
+ :param xtest:
+ :return:
+ """
+ if self.fitted == True:
+ K_star = self.kernel(self.x, xtest)
+ else:
+ K_star = None
+ K_star_star = self.kernel(xtest, xtest)
+ return (K_star, K_star_star)
+
+ def _huber_fit(self, K_star, newK=None):
+ alpha = cp.Variable(self.n)
+ self.jitter = 10e-5
+ if newK is None:
+ K = self.kernel(self.x, self.x) + self.jitter * torch.eye(
+ self.n, dtype=torch.float64
+ )
+ else:
+ K = newK.detach()
+ K = cp.atoms.affine.wraps.psd_wrap(K)
+ objective = cp.Minimize(
+ cp.sum(
+ cp.huber(
+ (K @ alpha - self.y.view(-1).numpy()) / self.s, M=self.huber_delta
+ )
+ )
+ + self.lam * cp.quad_form(alpha, K)
+ )
+ prob = cp.Problem(objective)
+ prob.solve(solver=cp.MOSEK, enforce_dpp=False)
+ if K_star is not None:
+ return K_star @ torch.from_numpy(alpha.value).view(-1, 1)
+ else:
+ return torch.from_numpy(alpha.value).view(-1, 1)
+
+ def _svr_fit(self, K_star, newK=None):
+ alpha = cp.Variable(self.n)
+ self.jitter = 10e-5
+ if newK is None:
+ K = self.kernel(self.x, self.x) + self.jitter * torch.eye(
+ self.n, dtype=torch.float64
+ )
+ else:
+ K = newK.detach()
+
+ K = cp.atoms.affine.wraps.psd_wrap(K)
+ objective = cp.Minimize(self.lam * cp.quad_form(alpha, K))
+ constraints = [cp.abs(K @ alpha - self.y.view(-1).numpy()) <= self.svr_eps]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK, enforce_dpp=False)
+ if K_star is not None:
+ return K_star @ torch.from_numpy(alpha.value).view(-1, 1)
+ else:
+ return torch.from_numpy(alpha.value).view(-1, 1)
+
+ def _unif_fit(self, K_star, newK=None):
+ alpha = cp.Variable((self.n, 1))
+ self.jitter = 10e-5
+ if newK is None:
+ K = self.kernel(self.x, self.x) + self.jitter * torch.eye(
+ self.n, dtype=torch.float64
+ )
+ else:
+ K = newK.detach()
+
+ K = cp.atoms.affine.wraps.psd_wrap(K)
+ con = (
+ 2
+ * self.total_bound
+ * self.prob
+ / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2))
+ )
+ objective = cp.Minimize(
+ cp.sum(
+ cp.logistic(
+ cp.square(
+ (K @ alpha - self.y.view(-1, 1).numpy()) / (np.sqrt(2) * self.s)
+ )
+ + np.log(con)
+ )
+ )
+ + self.lam * cp.quad_form(alpha, K)
+ )
+ prob = cp.Problem(objective)
+ prob.solve(solver=cp.MOSEK, enforce_dpp=False)
+ if K_star is not None:
+ return K_star @ torch.from_numpy(alpha.value).view(-1, 1)
+ else:
+ return torch.from_numpy(alpha.value).view(-1, 1)
+
+ def _unif_fit_torch(self, K_star, newK=None, warm_start=None):
+ self.jitter = 10e-5
+ if newK is None:
+ K = self.kernel(self.x, self.x) + self.jitter * torch.eye(
+ self.n, dtype=torch.float64
+ )
+ else:
+ K = newK.detach()
+
+ con = (
+ 2
+ * self.total_bound
+ * self.prob
+ / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2))
+ )
+ unif = (
+ lambda alpha: torch.sum(
+ torch.log(
+ torch.exp(
+ ((K @ alpha - self.y.view(-1)) ** 2) / (2 * self.s**2)
+ + np.log(con)
+ )
+ + 1
+ )
+ )
+ + self.lam * alpha @ K @ alpha
+ )
+ if warm_start is None:
+ x_init = torch.zeros(size=(self.n, 1)).view(-1).double()
+ else:
+ x_init = warm_start.view(-1)
+
+ res = minimize_torch(
+ unif,
+ x_init,
+ method="l-bfgs",
+ tol=1e-3,
+ disp=0,
+ options={"max_iter": 200, "gtol": 1e-3},
+ )
+ alpha = res.x
+
+ if K_star is not None:
+ return K_star @ alpha.view(-1, 1)
+ else:
+ return alpha.view(-1, 1)
+
+ def _huber_fit_torch(self, K_star, newK=None):
+ self.jitter = 10e-5
+ if newK is None:
+ K = self.kernel(self.x, self.x) + self.jitter * torch.eye(
+ self.n, dtype=torch.float64
+ )
+ else:
+ K = newK
+ L = torch.linalg.cholesky(K)
+
+ huber = (
+ lambda beta: torch.nn.functional.huber_loss(
+ L @ beta / self.s,
+ self.y.view(-1) / self.s,
+ reduction="sum",
+ delta=self.huber_delta,
+ )
+ + self.lam * beta @ beta
+ )
+ # x_init = torch.linalg.solve(L.T@L+torch.eye(self.n).double()*self.s**2*self.lam, self.y)
+ x_init = torch.zeros(size=(self.n, 1)).view(-1).double()
+ res = minimize_torch(
+ huber,
+ x_init,
+ method="l-bfgs",
+ tol=1e-4,
+ disp=0,
+ options={"max_iter": 10**3, "gtol": 1e-4},
+ )
+ alpha = torch.linalg.solve(L, res.x)
+ if K_star is not None:
+ return K_star @ alpha.view(-1, 1)
+ else:
+ return alpha.view(-1, 1)
+
+ def mean_std(self, xtest, full=False, reuse=False):
+ if xtest.size()[0] < self.max_size:
+ return self.mean_std_sub(xtest, full=full, reuse=reuse)
+ else:
+ stepby = self.max_size
+ mu = torch.zeros(size=(xtest.size()[0], 1)).double()
+ std = torch.zeros(size=(xtest.size()[0], 1)).double()
+
+ # first
+ i = 0
+ mu[i * stepby : (i + 1) * stepby], std[i * stepby : (i + 1) * stepby] = (
+ self.mean_std_sub(xtest[i * stepby : (i + 1) * stepby, :], reuse=False)
+ )
+
+ for i in np.arange(1, xtest.size()[0] // stepby, 1):
+ print(i, "/", xtest.size()[0] // stepby)
+ (
+ mu[i * stepby : (i + 1) * stepby],
+ std[i * stepby : (i + 1) * stepby],
+ ) = self.mean_std_sub(
+ xtest[i * stepby : (i + 1) * stepby, :], reuse=True
+ )
+
+ # last
+ if xtest.size()[0] % stepby > 0:
+ (
+ mu[xtest.size()[0] - xtest.size()[0] % stepby :],
+ std[xtest.size()[0] - xtest.size()[0] % stepby :],
+ ) = self.mean_std_sub(
+ xtest[xtest.size()[0] - xtest.size()[0] % stepby :, :], reuse=True
+ )
+
+ return mu, std
+
+ def mean_std_sub(self, xtest, full=False, reuse=False):
+ """
+ Return posterior mean and variance as tuple
+ :param xtest: grid, numpy array (2D)
+ :param full: Instead of just poinwise variance, full covariance can be outputed (bool)
+ :return: (tensor,tensor)
+ """
+ if full:
+ (K_star, K_star_star) = self.execute(xtest)
+ else:
+ K_star = self.kernel(self.x, xtest)
+ diag_K_star_star = torch.hstack(
+ [
+ self.kernel(xtest[i, :].view(1, -1), xtest[i, :].view(1, -1)).view(
+ 1
+ )
+ for i in range(xtest.size()[0])
+ ]
+ )
+
+ if self.fitted == False:
+ # the process is not fitted
+
+ if full == False:
+ x = torch.sum(xtest, dim=1)
+ # first = torch.diag(K_star_star).view(-1, 1)
+ first = diag_K_star_star.view(-1, 1)
+ variance = first
+ yvar = torch.sqrt(variance)
+ else:
+ x = torch.sum(xtest, dim=1)
+ first = K_star_star
+ yvar = first
+
+ return (0 * x.view(-1, 1), yvar)
+
+ else:
+
+ if self.back_prop == False:
+ if reuse == False:
+ # self.decomp = torch.lu(self.K.unsqueeze(0))
+ self.LU, self.pivot = torch.linalg.lu_factor(self.K.unsqueeze(0))
+ # self.A = torch.lu_solve(self.y.unsqueeze(0), *self.decomp)[0, :, :]
+ self.A = torch.linalg.lu_solve(
+ self.LU, self.pivot, self.y.unsqueeze(0)
+ )[0, :, :]
+ self.B = torch.t(
+ torch.linalg.lu_solve(
+ self.LU, self.pivot, torch.t(K_star).unsqueeze(0)
+ )[0, :, :]
+ )
+ else:
+ if reuse == False:
+ self.A = torch.linalg.lstsq(self.K, self.y)[0]
+ # self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star)))
+ self.B = torch.t(torch.linalg.lstsq(self.K, torch.t(K_star))[0])
+
+ if self.loss == "squared":
+ ymean = torch.mm(K_star, self.A)
+ elif self.loss == "huber":
+ ymean = self._huber_fit(K_star)
+ elif self.loss == "svr":
+ ymean = self._svr_fit(K_star)
+ elif self.loss == "unif" or self.loss == "unif_new":
+ ymean = self._unif_fit_torch(K_star)
+ else:
+ raise AssertionError("Loss function not implemented.")
+
+ if full == False:
+ first = diag_K_star_star.view(-1, 1)
+ second = torch.einsum("ij,ji->i", (self.B, torch.t(K_star))).view(-1, 1)
+ variance = first - second
+ yvar = torch.sqrt(variance)
+ else:
+ first = K_star_star
+ second = torch.mm(self.B, torch.t(K_star))
+ yvar = first - second
+
+ return (ymean, yvar)
+
+ def mean(self, xtest):
+ """
+ Calculates the mean prediction over a specific input space
+ :param xtest: input
+ :return:
+ """
+ K_star = self.kernel(self.x, xtest)
+
+ if self.loss == "squared":
+ ymean = torch.mm(K_star, self.A)
+ elif self.loss == "huber":
+ ymean = self._huber_fit(K_star)
+ else:
+ raise AssertionError("Loss function not implemented.")
+
+ return ymean
+
+ def gradient_mean_var(self, point, hessian=True):
+ """
+ Can calculate gradient at single point atm.
+
+ :param point:
+ :return:
+ """
+
+ # mean
+ point.requires_grad_(True)
+ mu = self.mean_std(point)[0]
+ nabla_mu = grad(mu, point, create_graph=True)[0][0]
+
+ if hessian == True:
+ # variance
+ H = self.kernel_object.get_2_der(point)
+ C = self.kernel_object.get_1_der(point, self.x)
+
+ V = H - torch.t(C) @ self.K @ C
+
+ return [nabla_mu, V]
+ else:
+ return nabla_mu
+
+ def mean_gradient_hessian(self, xtest, hessian=False):
+ hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64)
+ xtest.requires_grad_(True)
+ # xtest.retain_grad()
+ mu = self.mean_std(xtest)[0]
+ # mu.backward(retain_graph=True)
+
+ # nabla_mu = xtest.grad
+ nabla_mu = grad(mu, xtest, create_graph=True)[0][0]
+
+ if hessian == False:
+ return nabla_mu
+ else:
+ for i in range(self.d):
+ hessian_mu[i, :] = grad(
+ nabla_mu[i], xtest, create_graph=True, retain_graph=True
+ )[0][0]
+ return [nabla_mu, hessian_mu]
+
+ def sample(self, xtest, size=1, jitter=10e-8):
+ """
+ Samples Path from GP, return a numpy array evaluated over grid
+ :param xtest: grid
+ :param size: number of samples
+ :return: numpy array
+ """
+ nn = list(xtest.size())[0]
+
+ if self.fitted == True:
+ (ymean, yvar) = self.mean_std(xtest, full=True)
+ Cov = yvar + 10e-10 * torch.eye(nn, dtype=torch.float64)
+ L = torch.linalg.cholesky(Cov)
+ # L = torch.from_numpy(np.linalg.cholesky(Cov.numpy()))
+ random_vector = torch.normal(
+ mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0
+ )
+ f = ymean + torch.mm(L, random_vector)
+ else:
+ (K_star, K_star_star) = self.execute(xtest)
+ L = torch.linalg.cholesky(
+ K_star_star + jitter * torch.eye(nn, dtype=torch.float64)
+ )
+ random_vector = torch.normal(
+ mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0
+ )
+ f = self.mu + torch.mm(L, random_vector)
+ return f
+
+ def sample_and_max(self, xtest, size=1):
+ """
+ Samples Path from GP and takes argmax
+ :param xtest: grid
+ :param size: number of samples
+ :return: (argmax, max)
+ """
+ f = self.sample(xtest, size=size)
+ self.temp = f
+ val, index = torch.max(f, dim=0)
+ return (xtest[index, :], val)
+
+ def log_marginal(self, kernel, X, weight):
+
+ if self.loss == "squared":
+ return self._log_marginal_squared(kernel, X, weight)
+ elif self.loss == "unif_new":
+ return self._log_marginal_unif(kernel, X, weight)
+ else:
+ return self._log_marginal_map(kernel, X, weight)
+
+ def _log_marginal_unif(self, kernel, X, weight):
+ if not self.prepared_log_marginal:
+ self._prepare_log_marginal_unif()
+
+ func = kernel.get_kernel()
+ self.jitter = 10e-4
+ K = (
+ func(self.x, self.x, **X)
+ + torch.eye(self.n, dtype=torch.float64) * self.jitter
+ )
+ # print ("Kernel")
+ # print (K)
+ L = torch.linalg.cholesky(K)
+ self.L_unif.value = L.data.numpy()
+
+ self.prob_unif.solve(solver=cp.MOSEK, enforce_dpp=False, warm_start=True)
+
+ solution = (
+ torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double()
+ )
+ solution.data = torch.from_numpy(self.beta_unif.value)
+ con = (
+ 2
+ * self.total_bound
+ * self.prob
+ / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2))
+ )
+
+ loglikelihood = (
+ lambda beta: torch.sum(
+ torch.log(
+ torch.exp(
+ ((L @ beta - self.y.view(-1)) ** 2) / (2 * self.s**2)
+ + np.log(con)
+ )
+ + 1
+ )
+ )
+ + self.lam * beta.T @ beta
+ )
+
+ H = hessian(loglikelihood)(solution)
+ logdet = -0.5 * torch.slogdet(H)[1] * weight
+ logprob = -0.5 * loglikelihood(solution) + logdet
+ logprob = -logprob
+ return logprob
+
+ def _prepare_log_marginal_unif(self):
+
+ self.beta_unif = cp.Variable(self.n)
+ self.L_unif = cp.Parameter((self.n, self.n))
+
+ con = (
+ 2
+ * self.total_bound
+ * self.prob
+ / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2))
+ )
+ # self.objective_unif = cp.Minimize(cp.sum(cp.logistic(cp.square(
+ # (self.K_unif @ self.alpha_unif - self.y.view(-1).numpy()) / (np.sqrt(2) * self.s)) + np.log(con))) + self.lam * cp.quad_form(
+ # self.alpha_unif, self.L))
+ self.objective_unif = cp.Minimize(
+ cp.sum(
+ cp.logistic(
+ cp.square(
+ (self.L_unif @ self.beta_unif - self.y.view(-1).numpy())
+ / (np.sqrt(2) * self.s)
+ )
+ + np.log(con)
+ )
+ )
+ + self.lam * cp.sum_squares(self.beta_unif)
+ )
+ self.prob_unif = cp.Problem(self.objective_unif)
+ self.prepared_log_marginal = True
+
+ def _prepare_log_marginal_huber(self):
+ beta = cp.Variable(self.n)
+ L = cp.Parameter((self.n, self.n))
+
+ objective = cp.Minimize(
+ cp.sum(
+ cp.huber(
+ (L @ beta - self.y.view(-1).numpy()) / self.s, M=self.huber_delta
+ )
+ )
+ + self.lam * cp.sum_squares(beta)
+ )
+
+ prob = cp.Problem(objective)
+ cvxpylayer = CvxpyLayer(prob, parameters=[L], variables=[beta])
+ self.prepared_log_marginal = True
+ print("cvxpy-layer has been initialized.")
+ return cvxpylayer
+
+ def _log_marginal_huber_cvxpy(self, kernel, X, weight):
+ func = kernel.get_kernel()
+ self.jitter = 10e-4
+ L_tch = torch.linalg.cholesky(
+ func(self.x, self.x, **X)
+ + torch.eye(self.n, dtype=torch.float64) * self.jitter
+ )
+
+ if not self.prepared_log_marginal:
+ self._cvxpylayer = self._prepare_log_marginal_huber()
+ solution = self._cvxpylayer(L_tch)[0]
+
+ huber = (
+ lambda beta: torch.nn.functional.huber_loss(
+ L_tch @ beta / self.s,
+ self.y.view(-1) / self.s,
+ reduction="sum",
+ delta=self.huber_delta,
+ )
+ + self.lam * beta.T @ beta
+ )
+ H = torch.autograd.functional.hessian(huber, solution)
+
+ logdet = -0.5 * torch.slogdet(H)[1] * weight
+ logprob = -0.5 * huber(solution) + logdet
+ logprob = -logprob
+ return logprob
+
+ def _log_marginal_map(self, kernel, X, weight):
+ # this implementation uses Danskin theorem to simplify gradient propagation
+ func = kernel.get_kernel()
+ self.jitter = 10e-4
+ K_tch = (
+ func(self.x, self.x, **X)
+ + torch.eye(self.n, dtype=torch.float64) * self.jitter
+ )
+
+ # solve
+ solution = (
+ torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double()
+ )
+ if self.warm_start_solution is None:
+ self.warm_start_solution = solution.clone()
+
+ if self.loss == "huber":
+ alpha = self._huber_fit(None, newK=K_tch).detach()
+ loglikelihood = (
+ lambda alpha: torch.nn.functional.huber_loss(
+ K_tch @ alpha / self.s,
+ self.y.view(-1) / self.s,
+ reduction="sum",
+ delta=self.huber_delta,
+ )
+ + self.lam * alpha.T @ K_tch @ alpha
+ )
+
+ solution.data = alpha.reshape(-1).data
+ self.warm_start_solution.data = solution.data
+ mask = torch.abs(K_tch @ alpha - self.y) / self.s < self.huber_delta
+ mask = mask.view(-1).double()
+ D = torch.diag(mask)
+ H = K_tch @ D @ K_tch + 2 * self.lam * K_tch
+
+ elif self.loss == "svr":
+ alpha = self._svr_fit(None, newK=K_tch).detach()
+
+ loglikelihood = (
+ lambda alpha: torch.sum(
+ torch.abs(K_tch @ alpha - self.y.view(-1))
+ * (K_tch @ alpha - self.y.view(-1) > self.svr_eps).int()
+ )
+ + self.lam * alpha.T @ K_tch @ alpha
+ )
+
+ solution.data = alpha.reshape(-1).data
+ self.warm_start_solution.data = solution.data
+ H = torch.autograd.functional.hessian(loglikelihood, solution)
+
+ elif self.loss == "unif":
+ alpha = self._unif_fit_torch(None, newK=K_tch).detach()
+ con = (
+ 2
+ * self.total_bound
+ * self.prob
+ / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2))
+ )
+
+ loglikelihood = (
+ lambda alpha: torch.sum(
+ torch.log(
+ torch.exp(
+ ((K_tch @ alpha - self.y.view(-1)) ** 2) / (2 * self.s**2)
+ + np.log(con)
+ )
+ + 1
+ )
+ )
+ + self.lam * alpha @ K_tch @ alpha
+ )
+ # v = lambda alpha : torch.sum(torch.exp( ((K_tch@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ))
+ solution.data = alpha.reshape(-1).data
+ self.warm_start_solution.data = solution.data
+ H = hessian(loglikelihood)(solution)
+
+ logdet = -0.5 * torch.slogdet(H)[1] * weight
+ logprob = -0.5 * loglikelihood(solution) + logdet
+ logprob = -logprob
+ return logprob
+
+ def _log_marginal_squared(self, kernel, X, weight):
+ func = kernel.get_kernel()
+ K = (
+ func(self.x, self.x, **X)
+ + torch.eye(self.n, dtype=torch.float64) * self.s * self.s
+ )
+ logdet = -0.5 * torch.slogdet(K)[1] * weight
+ alpha = torch.linalg.solve(K, self.y)
+ logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet
+ logprob = -logprob
+ return logprob
+
+ def optimize_params(
+ self,
+ type="bandwidth",
+ restarts=10,
+ regularizer=None,
+ maxiter=1000,
+ mingradnorm=1e-4,
+ verbose=False,
+ optimizer="pymanopt",
+ scale=1.0,
+ weight=1.0,
+ save=False,
+ save_name="model.np",
+ init_func=None,
+ bounds=None,
+ parallel=False,
+ cores=None,
+ ):
+
+ # Spectral norm regularizer
+ if regularizer is not None:
+ if regularizer[0] == "spectral_norm":
+ regularizer_func = lambda S: regularizer[1] * torch.norm(
+ 1 / S[0], p="nuc"
+ )
+ elif regularizer[0] == "lasso":
+ regularizer_func = lambda S: regularizer[1] * torch.norm(1 / S[0], p=1)
+ else:
+ regularizer_func = None
+ else:
+ regularizer_func = None
+
+ if type == "bandwidth":
+ params = {}
+ for key, dict2 in self.kernel_object.params_dict.items():
+ if "gamma" in dict2.keys():
+ params[key] = {"gamma": (init_func, Euclidean(1), bounds)}
+ elif "ard_gamma" in dict2.keys():
+ params[key] = {
+ "ard_gamma": (init_func, Euclidean(len(dict2["group"])), bounds)
+ }
+
+ elif type == "bandwidth+noise":
+ params = {}
+ init_func_noise = lambda x: self.s
+ for key, dict2 in self.kernel_object.params_dict.items():
+
+ if "gamma" in dict2.keys():
+ params[key] = {"gamma": (init_func, Euclidean(1), bounds)}
+
+ elif "ard_gamma" in dict2.keys():
+ params[key] = {
+ "ard_gamma": (init_func, Euclidean(len(dict2["group"])), bounds)
+ }
+
+ params["likelihood"] = {"sigma": (init_func_noise, Euclidean(1), None)}
+
+ elif type == "rots":
+ params = {}
+ d = int(self.kernel_object.d)
+ for key, dict2 in self.kernel_object.params_dict.items():
+ if "rot" in dict2.keys():
+ params[key] = {"rot": (None, Stiefel(d, d), None)}
+ elif type == "groups":
+ params = {}
+ optimizer = "discrete"
+ d = self.kernel_object.d
+ for key, dict2 in self.kernel_object.params_dict.items():
+ if "groups" in dict2.keys():
+ params[key] = {"groups": (None, helper.generate_groups(d), None)}
+ pass
+ elif type == "covariance":
+ params = {}
+ d = int(self.kernel_object.d)
+ for key, dict2 in self.kernel_object.params_dict.items():
+ if "cov" in dict2.keys():
+ params[key] = {"cov": (None, PSDFixedRank(d, d), None)}
+ else:
+ raise AttributeError("This quick-optimization is not implemented.")
+
+ self.optimize_params_general(
+ params=params,
+ restarts=restarts,
+ optimizer=optimizer,
+ regularizer_func=regularizer_func,
+ maxiter=maxiter,
+ mingradnorm=mingradnorm,
+ verbose=verbose,
+ scale=scale,
+ weight=weight,
+ save=save,
+ save_name=save_name,
+ parallel=parallel,
+ cores=cores,
+ )
+
+ def log_probability(self, xtest, sample):
+ from scipy.stats import multivariate_normal
+
+ mu, covar = self.mean_std(xtest, full=True)
+ p = np.log(
+ multivariate_normal.pdf(
+ sample.view(-1).numpy(), mean=mu.view(-1).numpy(), cov=covar.numpy()
+ )
+ )
+ return p
+
+ def volume_mean_cvxpy(
+ self,
+ xtest,
+ weights=None,
+ eps=10e-2,
+ tol=10e-14,
+ max_weight=1,
+ max_iter=1000,
+ verbose=False,
+ scale=10e-4,
+ slope=1.0,
+ bisections=10,
+ B="auto",
+ optimal_scale=None,
+ optimize_scale=False,
+ relax="relu",
+ ):
+
+ n = self.x.size()[0]
+ K = self.get_kernel() # (self.x, self.x)
+ Kinv = torch.pinverse(K + eps * torch.eye(K.size()[0]).double()).numpy()
+ if weights is None:
+ weights = torch.ones(self.x.size()[0]) / n
+ if B == "auto":
+ alpha, _ = torch.lstsq(self.y, K)
+ beta = K @ alpha
+ B = beta.T @ Kinv @ beta
+ print("Auto:B", B)
+
+ def fun(scale_arg):
+ beta = cp.Variable(n)
+ if relax == "relu":
+ loss_fn_transformed = cp.sum(
+ cp.pos(
+ weights
+ * slope
+ * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps)
+ )
+ ) + 0.5 * scale_arg * cp.quad_form(beta, Kinv)
+ elif relax == "log":
+ loss_fn_transformed = cp.sum(
+ cp.logistic(
+ weights
+ * slope
+ * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps)
+ )
+ ) + 0.5 * scale_arg * cp.quad_form(beta, Kinv)
+
+ # loss_fn_transformed = cp.sum(weights*logit(slope*(cp.abs(beta - self.y.numpy().reshape(-1)) -eps))) + 0.5*scale_arg*cp.quad_form(beta, Kinv)-
+
+ prob = cp.Problem(cp.Minimize(loss_fn_transformed))
+ # prob.solve(solver=cp.MOSEK, feastol=tol, verbose=False)
+ prob.solve(solver=cp.MOSEK, verbose=False)
+ if verbose == True:
+ print(
+ "scale:",
+ scale_arg,
+ "cond:",
+ np.linalg.cond(Kinv),
+ "sub.",
+ beta.value.T @ Kinv @ beta.value - B,
+ "B:",
+ B,
+ )
+ return beta.value.T @ Kinv @ beta.value - B
+
+ if optimize_scale:
+ return helper.bisection(fun, 0.0, max_weight, bisections)
+
+ if optimal_scale is None:
+ scale_star = helper.bisection(fun, 0.0, max_weight, bisections)
+ else:
+ scale_star = optimal_scale
+
+ beta = cp.Variable(n)
+ if relax == "relu":
+ loss_fn_transformed = cp.sum(
+ weights
+ * cp.pos(slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps))
+ ) + 0.5 * scale_star * cp.quad_form(beta, Kinv)
+ elif relax == "log":
+ loss_fn_transformed = cp.sum(
+ weights
+ * cp.logistic(slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps))
+ ) + 0.5 * scale_star * cp.quad_form(beta, Kinv)
+ prob = cp.Problem(cp.Minimize(loss_fn_transformed))
+ # prob.solve(solver=cp.CVXOPT, feastol=tol, verbose=verbose)
+ prob.solve(solver=cp.MOSEK, verbose=verbose)
+ beta_torch = torch.from_numpy(beta.value).view(-1, 1)
+ alpha = torch.from_numpy(Kinv) @ beta_torch
+ ytest = self.kernel(self.x, xtest) @ alpha
+ return ytest
+
+ def volume_mean(
+ self,
+ xtest,
+ weights=None,
+ eps=10e-2,
+ tol=10e-6,
+ max_iter=1000,
+ verbose=False,
+ eta_start=0.01,
+ eta_decrease=0.9,
+ scale=1,
+ slope=1.0,
+ warm=True,
+ relax="relu",
+ norm=False,
+ B="auto",
+ ):
+ self.scale = scale
+ self.relax = relax
+
+ K = self.get_kernel() # (self.x, self.x)
+ Kinv = torch.pinverse(K)
+
+ if weights is None:
+ weights = torch.ones(self.x.size()[0])
+ else:
+ weights[weights < 10e-6] = 0.0 # * self.x.size()[0]
+ weights = weights.view(-1)
+ if warm == True:
+ # warm start with L2 fit
+ alpha, _ = torch.lstsq(self.y, K)
+ beta = K @ alpha
+ else:
+ beta = torch.randn(size=(self.n, 1)).double() # .requires_grad_(True)*0
+
+ # loss_fn_original = lambda alpha: torch.sum(torch.relu(torch.abs(K @ alpha - self.y) -eps)) + 0.5*self.s * alpha.T @ K @ alpha
+ if self.relax == "relu":
+ loss_fn_transformed = (
+ lambda beta: torch.sum(torch.relu(torch.abs(beta - self.y) - eps))
+ + self.scale * 0.5 * self.s * beta.T @ Kinv @ beta
+ )
+
+ elif self.relax == "tanh":
+ self.slope = slope
+ tanh = lambda x: (torch.tanh(self.slope * x) + 1) * 0.5
+ loss_fn_transformed = (
+ lambda beta: torch.sum(
+ weights * tanh(torch.abs(beta - self.y) - eps).view(-1)
+ )
+ + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta
+ )
+
+ elif self.relax == "elu":
+ self.slope = slope
+ elu = lambda x: torch.nn.elu(x, alpha=self.slope)
+ loss_fn_transformed = (
+ lambda beta: torch.sum(elu(torch.abs(beta - self.y) - eps))
+ + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta
+ )
+
+ elif self.relax == "relu":
+ return self.volume_mean_cvxpy(
+ xtest, weights=weights, eps=eps, scale=scale, tol=tol
+ )
+ else:
+ raise AssertionError("Unkown relaxation.")
+
+ current_loss = 10e10
+ eta = eta_start
+ for i in range(max_iter):
+ grad = self.s * (Kinv @ beta)
+ beta = self.proximal(beta, grad, eta, eps, weights)
+ past_loss = current_loss
+ current_loss = loss_fn_transformed(beta)
+ if current_loss > past_loss:
+ eta = eta * eta_decrease
+ elif np.abs(current_loss - past_loss) < tol:
+ break
+
+ # print (i, beta.T)
+ if verbose == True:
+ print(i, loss_fn_transformed(beta), eta)
+
+ print("final norm:", beta.T @ Kinv @ beta)
+
+ # alpha = torch.inverse(self.K) @ beta
+ alpha = torch.pinverse(K) @ beta
+ # alpha = torch.lstsq(K,beta)
+ ytest = self.kernel(self.x, xtest) @ alpha
+ # max = torch.max(torch.abs(beta - self.y))
+ if norm == True:
+ return beta.T @ Kinv @ beta
+ # yz = self.kernel(self.x, self.x) @ alpha
+ # approx_v = torch.sum(torch.relu(torch.abs(beta - self.y) -eps))/max
+ # approx_p = approx_v/self.n
+ # mask = (torch.abs(yz[:,0] - self.y[:,0])) > eps
+ # approx_p = float(torch.sum(mask))/float(self.n)
+ return ytest # ,approx_p
+
+ def volume_mean_norm(
+ self,
+ xtest,
+ weights=None,
+ eps=10e-2,
+ tol=10e-6,
+ max_iter=1000,
+ verbose=False,
+ eta_start=0.01,
+ eta_decrease=0.9,
+ scale=1,
+ slope=1.0,
+ warm=True,
+ relax="relu",
+ B="auto",
+ ):
+ K = self.kernel(self.x, self.x)
+ Kinv = torch.pinverse(K)
+ if B == "auto":
+ alpha, _ = torch.lstsq(self.y, self.K)
+ beta = K @ alpha
+ B = beta.T @ Kinv @ beta
+
+ func = (
+ lambda s: self.volume_mean(
+ xtest,
+ weights=weights,
+ eps=eps,
+ tol=tol,
+ max_iter=max_iter,
+ verbose=verbose,
+ eta_start=eta_start,
+ eta_decrease=eta_decrease,
+ scale=s,
+ slope=slope,
+ warm=warm,
+ relax=relax,
+ norm=True,
+ )
+ - B
+ )
+
+ s_star = stpy.optim.custom_optimizers.bisection(func, 0.0, 1000.0, 10)
+
+ return self.volume_mean(
+ xtest,
+ weights=weights,
+ eps=eps,
+ tol=tol,
+ max_iter=max_iter,
+ verbose=verbose,
+ eta_start=eta_start,
+ eta_decrease=eta_decrease,
+ scale=s_star,
+ slope=slope,
+ warm=warm,
+ relax=relax,
+ norm=False,
+ )
+
+ def proximal(self, beta, nabla, eta, eps, weights):
+ res = beta
+ for i in range(self.n):
+ from scipy.optimize import minimize
+
+ b = float(beta[i, :])
+ y = float(self.y[i, :])
+ g = float(nabla[i, :])
+ w = float(weights[i])
+ # s = float(self.s)
+
+ tanh = lambda x: (np.tanh(self.slope * x) + 1) * 0.5
+ elu = lambda x: torch.elu(x, alpha=self.slope).numpy()
+
+ if self.relax == "relu":
+ loss_reg = lambda x: w * np.maximum(0, np.abs(x - y) - eps)
+ elif self.relax == "tanh":
+ loss_reg = lambda x: w * tanh(np.abs(x - y) - eps)
+ elif self.relax == "elu":
+ loss_reg = lambda x: w * elu(np.abs(x - y) - eps)
+ else:
+ raise AssertionError("Unkown relaxation.")
+
+ loss_scalar = lambda x: (
+ (1 / (2.0 * eta)) * (x - (b - eta * g)) ** 2
+ ) + loss_reg(x)
+
+ x0 = np.array([0.0])
+ # print (minimize(loss_scalar,x0,method ='nelder-mead').x)
+ res[i, :] = float(minimize(loss_scalar, x0, method="nelder-mead").x)
+ return res
+
+ def get_lambdas(self, beta, mean=False):
+ """
+ Gets lambda function to evaluate acquisiton function and its derivative
+ :param beta: beta in GP-UCB
+ :return: [lambda,lambda]
+ """
+ mean = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[0][0][0]
+ sigma = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[1][0][0]
+
+ if mean == True:
+ return [mean, sigma]
+ else:
+ fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x))
+ grad = lambda x: -complex_step_derivative(fun, 1e-10, x.reshape(1, -1))
+
+ return [fun, grad]
+
+ def get_kernel(self):
+ return self.K
+
+ def ucb_optimize(self, beta, multistart=25, lcb=False):
+ """
+ Optimizes UCB acquisiton function and return next point and its value as output
+ :param beta: beta from GP UCB
+ :param multistart: number of starts
+ :return: (next_point, value at next_point)
+ """
+
+ mean = lambda x: self.mean_std(x, reuse=True)[0][0][0]
+ sigma = lambda x: self.mean_std(x, reuse=True)[1][0][0]
+
+ ucb = lambda x: torch.dot(
+ torch.tensor([1.0, np.sqrt(beta)]),
+ torch.tensor(
+ [
+ self.mean_std(x, reuse=True)[0][0][0],
+ self.mean_std(x, reuse=True)[1][0][0],
+ ]
+ ),
+ )
+ lcb = lambda x: torch.dot(
+ torch.tensor([1.0, np.sqrt(beta)]),
+ torch.tensor(
+ [
+ self.mean_std(x, reuse=True)[0][0][0],
+ -self.mean_std(x, reuse=True)[1][0][0],
+ ]
+ ),
+ )
+
+ if lcb == False:
+ fun2 = lambda x: -ucb(torch.from_numpy(x).view(1, -1)).numpy()
+ else:
+ fun2 = lambda x: -lcb(torch.from_numpy(x).view(1, -1)).numpy()
+ fun = lambda x: -(
+ mean(torch.from_numpy(x).view(1, -1))
+ + np.sqrt(beta) * sigma(torch.from_numpy(x).view(1, -1))
+ )
+
+ self.back_prop = False
+ self.mean_std(self.x)
+
+ mybounds = self.bounds
+
+ results = []
+
+ from scipy.optimize import minimize
+
+ for i in range(multistart):
+ x0 = np.random.randn(self.d)
+ for i in range(self.d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+
+ res = minimize(
+ fun2, x0, method="L-BFGS-B", jac=None, tol=0.000001, bounds=mybounds
+ )
+ solution = res.x
+ results.append([solution, -fun(solution)])
+
+ results = np.array(results)
+ index = np.argmax(results[:, 1])
+ solution = results[index, 0]
+
+ return (torch.from_numpy(solution), -fun(solution))
+
+ def isin(self, xnext):
+ self.epsilon = 0.001
+ for v in self.x:
+ if torch.norm(v - xnext, p=2) < self.epsilon:
+ return True
+
+ def sample_and_condition(self, x):
+ xprobe = x.view(1, -1)
+ fprobe = self.sample(xprobe)
+ if not self.isin(xprobe):
+ self.x = torch.cat((self.x, xprobe), dim=0)
+ self.y = torch.cat((self.y, fprobe), dim=0)
+ self.fit_gp(self.x, self.y)
+ return -fprobe
+
+ def get_lambdas_TH(self):
+ fun = lambda x: self.sample_and_condition(x)
+ grad = None
+ return [fun, grad]
+
+ def sample_iteratively_max(
+ self, xtest, multistart=20, minimizer="coordinate-wise", grid=100
+ ):
+ """
+ Samples Path from GP and takes the maximum iteratively
+ :param xtest: grid
+ :param size: number of samples
+ :return: numpy array
+ """
+ # print ("Iterative:",multistart,minimizer,grid)
+ from scipy.optimize import minimize
+
+ # old stuff
+ xold = self.x
+ yold = self.y
+
+ # with fixed grid
+ if xtest is not None:
+ # number of samples
+ nn = xtest.shape[0]
+
+ f = torch.zeros(nn, dtype=torch.float64)
+
+ for j in range(nn):
+ xprobe = xtest[j, :].view(1, -1)
+ (K_star, K_star_star) = self.execute(xprobe)
+ (ymean, yvar) = self.mean_std(xprobe)
+ L = torch.sqrt(
+ K_star_star
+ + self.s * self.s * torch.eye(1, dtype=torch.float64)
+ - yvar
+ )
+ fprobe = ymean + L * torch.randn(1, dtype=torch.float64)
+ # add x and fprobe to the dataset and redo the whole
+ f[j] = fprobe
+ if not self.isin(xprobe):
+ self.x = torch.cat((self.x, xprobe), dim=0)
+ self.y = torch.cat((self.y, fprobe), dim=0)
+
+ self.fit_gp(self.x, self.y)
+
+ val, index = torch.max(f, dim=0)
+ self.fit_gp(xold, yold)
+ return (xtest[index, :], f[index])
+
+ else:
+ # Iterative without grid
+
+ # get bounds
+ if self.bounds == None:
+ mybounds = tuple(
+ [(-self.diameter, self.diameter) for i in range(self.d)]
+ )
+ else:
+ mybounds = self.bounds
+ [fun, grad] = self.get_lambdas_TH()
+
+ results = []
+ for j in range(multistart):
+
+ # print ("Multistart:",j)
+ x0 = torch.randn(self.d, dtype=torch.float64)
+ for i in range(self.d):
+ x0[i].uniform_(mybounds[i][0], mybounds[i][1])
+
+ # simple coordnate-wise optimization
+ if minimizer == "coordinate-wise":
+ solution = x0
+ for i in range(self.d):
+ xtest = torch.from_numpy(np.tile(x0, (grid, 1)))
+ xtest[:, i] = torch.linspace(
+ mybounds[i][0], mybounds[i][1], grid
+ )
+ sample = self.sample(xtest)
+
+ ## Add to the posterior
+ self.x = torch.cat((self.x, xtest), dim=0)
+ self.y = torch.cat((self.y, sample), dim=0)
+
+ # argmax
+ val, index = torch.max(sample, dim=0)
+ out = xtest[index, :]
+
+ # fit new GP
+ self.fit_gp(self.x, self.y)
+ solution[i] = out[0, i]
+
+ elif minimizer == "L-BFGS-B":
+ solution = np.random.randn(self.d)
+ xmax = [b[1] for b in mybounds]
+ xmin = [b[0] for b in mybounds]
+ bounds = MyBounds(xmax=xmax, xmin=xmin)
+ func = lambda x: fun(torch.from_numpy(x)).numpy()[0][0]
+ res = scipy.optimize.basinhopping(
+ func, solution, disp=False, niter=grid, accept_test=bounds
+ )
+ solution = torch.from_numpy(res.x)
+
+ else:
+ raise AssertionError("Wrong optimizer selected.")
+
+ results.append(torch.cat((solution, -fun(solution)[0])))
+ self.x = xold
+ self.y = yold
+ self.fit_gp(self.x, self.y)
+
+ results = torch.stack(results)
+ val, index = torch.max(results[:, -1], dim=0)
+ solution = results[index, 0 : self.d].view(1, self.d)
+ self.x = xold
+ self.y = yold
+ self.fit_gp(self.x, self.y)
+
+ return (solution, -fun(solution))
if __name__ == "__main__":
- from stpy.helpers.helper import interval
- # domain size
- L_infinity_ball = 1
- # dimension
- d = 1
- # error variance
- s = torch.from_numpy(np.array(1.0, dtype=np.float64))
-
- # grid density
- n = 1024
- # number of intial points
- N = 32
- # smoothness
- gamma = 0.1
- # test problem
-
- xtest = torch.from_numpy(interval(n, d))
- # x = torch.from_numpy(np.random.uniform(-L_infinity_ball,L_infinity_ball, size = (N,d)))
- x = torch.from_numpy(interval(N, 1))
- f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
- f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.,
- out=None) * s * s
- # targets
- y = f(x)
-
- # GP model with squared exponential
- kernel = KernelFunction(kernel_name = "ard", gamma = torch.ones(d, dtype = torch.float64)*gamma , groups = [[0],[1]])
- # kernel = KernelFunction(kernel_name="ard", gamma=torch.ones(1, dtype=torch.float64) * gamma, groups=[[0]])
- GP = GaussianProcess(s=s, d=1)
-
- # fit GP
- # x = x.numpy()
- GP.fit_gp(x, y)
- # get mean and variance of GP
- [mu, std] = GP.mean_std(xtest)
-
- # print ("Log probability:", GP.log_marginal_likelihood() )
- # mu_inf = GP.chebyshev_mean(xtest)
- eps = 0.1
-
- mu_vol = GP.volume_mean_cvxpy(xtest, eps=eps, verbose=True, scale=1., slope=1., tol=10e-9)
-
- GP.visualize(xtest, f_true=f_no_noise, show=False)
- plt.plot(xtest.numpy(), mu_vol.detach().numpy(), label="Least-Volume-ReLu", lw=2)
- for slope in [0.001, 0.01, 0.1, 1., 10., 100., 1000., 10000.]:
- # mu_vol_log = GP.volume_mean_cvxpy(xtest, eps=eps, verbose=True, scale=1., slope=slope, tol=10e-9, relax = 'log', B = 1000)
- # plt.plot(xtest.numpy(),mu_vol_log.detach().numpy(), '--',label = "Least-Volume-Log" + str(slope), lw = 2)
- mu_vol_tanh = GP.volume_mean(xtest, eps=eps, verbose=True, eta_start=0.1, eta_decrease=0.1, scale=1.,
- slope=slope,
- tol=0.01, warm=True, relax='tanh')
- plt.plot(xtest.numpy(), mu_vol_tanh.detach().numpy(), '-.', label="Least-Volume-Tanh" + str(slope), lw=2)
- # print (slope, np.sum(np.abs(mu_vol_log) 0:
- # something to add via low rank update
- for i in range(len(self.to_add)):
- newx = self.to_add[i][0]
- newy = self.to_add[i][1]
-
- # rank one update
- emb = self.embed(newx)
-
- if self.dual: # via Shur complements
- newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double()
- newK = torch.zeros(size=(self.n + 1, self.n + 1)).double()
-
- M = self.invK @ self.Q
- c = 1. / ((self.s ** 2 * self.lam + emb @ emb.T) - emb @ self.Q.T @ M @ emb.T)
-
- newKinv[0:self.n, 0:self.n] = self.invK + c * M @ emb.T @ emb @ M.T
- newKinv[0:self.n, self.n] = (- M @ emb.T * c).view(-1)
- newKinv[self.n, 0:self.n] = (- emb @ M.T * c).view(-1)
- newKinv[self.n, self.n] = c.view(-1)
-
- newK[0:self.n, 0:self.n] = self.K
- newK[0:self.n, self.n] = emb @ self.Q.T
- newK[self.n, 0:self.n] = emb @ self.Q.T
- newK[self.n, self.n] = self.s ** 2 * self.lam + emb @ emb.T
- self.K = newK
-
- self.invK = newKinv
-
- self.add_points(newx, newy)
- self.n = self.n + 1
- self.Q = self.embed(self.x)
- self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)))
-
- else: # via Woodbury
- c = 1 + emb @ self.invV @ emb.T
- self.invV = self.invV - (self.invV @ emb.T @ emb @ self.invV) / c
- self.add_points(newx, newy)
- self.n = self.n + 1
- self.Q = self.embed(self.x)
- # add point
-
- self.check_conversion()
-
- self.fitted = True
- self.to_add = []
-
-
- elif self.data == True: # just compute the
- self.Q = self.embed(self.x)
- if self.dual:
- I = torch.eye(self.n).double()
- Z_ = self.Q @ self.Q.T
- self.K = Z_ + self.s * self.s * self.lam * I
- # self.invK, _ = torch.solve(I, self.K)
- self.invK = torch.pinverse(self.K)
- self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)))
- else:
- I = torch.eye(int(self.m)).double()
- Z_ = self.Q.T @ self.Q
- self.V = Z_ + self.s ** 2 * self.lam * I
- self.invV = torch.pinverse(self.V)
-
- self.fitted = True
- else:
- pass
- else:
- pass
-
- def theta_mean(self, var=False, prior=False):
- self.precompute()
-
- if self.fitted == True and prior == False:
- if self.dual:
- theta_mean = self.Q.T @ self.invK @ self.y
- Z = self.invK_V
- else:
- theta_mean = self.invV @ self.Q.T @ self.y
- Z = self.s ** 2 * self.invV
- else:
- theta_mean = 0 * torch.ones(size=(self.m, 1)).double()
-
- if var is False:
- return theta_mean
- else:
- return (theta_mean, Z)
-
- def mean(self, xtest):
- return self.mean_std(xtest)[0]
-
- def mean_std(self, xtest):
- '''
- Calculate mean and variance for GP at xtest points
- '''
- self.precompute()
- embeding = self.embed(xtest)
-
- # mean
- theta_mean = self.theta_mean()
- # print(torch.norm(theta_mean))
- ymean = embeding @ theta_mean
-
- # std
- if not self.dual or self.primal:
- diagonal = self.s ** 2 * torch.einsum('ij,jk,ik->i', (embeding, self.invV, embeding)).view(-1, 1)
- else:
- diagonal = torch.einsum('ij,jk,ik->i', (embeding, self.invK_V, embeding)).view(-1, 1)
-
- ystd = torch.sqrt(diagonal)
- return (ymean, ystd)
-
- def ucb(self, xtest, delta=0.1):
- mu, std = self.mean_std(xtest)
- res = mu + np.sqrt(self.beta(delta=delta)) * std
- return res
-
- def lcb(self, xtest, delta=0.1):
- mu, std = self.mean_std(xtest)
- res = mu - np.sqrt(self.beta(delta=delta)) * std
- return res
-
- def sample_matheron(self, xtest, kernel_object, size=1):
- basis = self.get_basis_size()
- zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
- random_vector = torch.normal(mean=zeros, std=1.)
-
- Z = self.lam * torch.eye(basis, dtype=torch.float64)
- L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj()
- theta = torch.mm(L, random_vector) + self.prior_mean
-
- f_prior_xtest = torch.mm(self.embed(xtest), theta)
- f_prior_x = torch.mm(self.embed(self.x), theta)
-
- K_star = kernel_object.kernel(self.x, xtest)
- N = self.x.size()[0]
- K = kernel_object.kernel(self.x, self.x) + self.s ** 2 * self.lam * torch.eye(N)
-
- f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x)
- return f
-
- def sample_theta(self, size=1, prior=False):
-
- basis = self.get_basis_size()
-
- zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
- random_vector = torch.normal(mean=zeros, std=1.)
- self.precompute()
-
- if self.fitted == True and prior == False:
- self.L = torch.linalg.cholesky(self.get_invV()) * self.s
- theta = self.theta_mean()
- theta = theta + torch.mm(self.L, random_vector)
- else:
- Z = self.lam * torch.eye(basis, dtype=torch.float64)
- L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj()
- theta = torch.mm(L, random_vector) + self.prior_mean
-
- return theta
-
- def theta_mean_constrained(self, weights=None, B=1):
- if weights is None:
- weights = torch.ones(self.n).double() / self.n
-
- Q = self.embed(self.x)
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- objective = cp.Minimize(
- cp.sum(weights @ cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())))
- zero = np.zeros(int(torch.sum(self.m)))
- constraints = [cp.SOC(theta @ zero + B, theta)]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK)
- return torch.from_numpy(theta.value).view(-1, 1)
-
- def theta_absolute_deviation(self, weights=None, reg=None):
- if weights is None:
- weights = torch.ones(self.x.size()[0])
-
- if reg is None: # standard regularization
- Q = self.embed(self.x)
- theta = cp.Variable((int(torch.sum(self.m)), 1))
- objective = cp.Minimize(
- cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + self.s * self.lam * cp.norm2(theta))
- prob = cp.Problem(objective)
- prob.solve()
- return torch.from_numpy(theta.value)
- else: # custom regularization
- Q = self.embed(self.x)
- theta = cp.Variable((int(torch.sum(self.m)), 1))
- objective = cp.Minimize(
- cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + reg * cp.norm2(theta))
- prob = cp.Problem(objective)
- prob.solve(solver=cp.MOSEK)
- return torch.from_numpy(theta.value)
-
- def theta_absolute_deviation_constrained(self, weights=None, B=1):
- if weights is None:
- weights = torch.ones(self.x.size()[0])
- Q = self.embed(self.x)
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
-
- objective = cp.Minimize(cp.sum(weights @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())))
- zero = np.zeros(int(torch.sum(self.m)))
- constraints = [cp.SOC(theta @ zero + B, theta)]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK)
- return torch.from_numpy(theta.value).view(-1, 1)
-
- def theta_chebyschev_approximation(self, eps=1.):
- Q = self.embed(self.x).detach().numpy()
- y = self.y.view(-1).detach().numpy()
-
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- objective = cp.Minimize(cp.sum_squares(theta))
- constraints = [cp.abs(Q @ theta - y) <= eps]
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK)
- res = torch.from_numpy(theta.value).view(-1, 1)
- return res
-
- def interpolation(self, eps=0.):
- Q = self.embed(self.x).detach().numpy()
- y = self.y.view(-1).detach().numpy()
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- objective = cp.Minimize(cp.sum_squares(theta))
- constraints = [Q @ theta == y]
-
- prob = cp.Problem(objective, constraints)
- prob.solve()
- res = torch.from_numpy(theta.value).view(-1, 1)
-
- return res
-
- def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None):
- embeding = self.embed(xtest)
-
- if B is not None:
- theta_mean = self.theta_mean_constrained(weights=weights, B=B)
- else:
- theta_mean = self.theta_mean(weights=weights, reg=reg)
- ymean = torch.mm(embeding, theta_mean)
- if theta == True:
- return ymean, theta_mean
- else:
- return ymean
-
- def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False):
- embeding = self.embed(xtest)
- if B is not None:
- theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B)
- else:
- theta_mean = self.theta_absolute_deviation(weights=weights)
- ymean = torch.mm(embeding, theta_mean)
- if theta == True:
- return ymean, theta_mean
- else:
- return ymean
-
- """
+ """
+ Random Fourier Features for Gaussian Kernel
+ """
+
+ def __init__(
+ self,
+ embedding,
+ m,
+ s=0.001,
+ lam=1.0,
+ d=1,
+ diameter=1.0,
+ theta_norm=1.0,
+ verbose=True,
+ groups=None,
+ bounds=None,
+ scale=1.0,
+ kappa=1.0,
+ poly=2,
+ primal=True,
+ beta_fun=None,
+ bound=1,
+ ):
+
+ self.s = s
+ self.lam = lam
+ self.primal = primal
+ self.x = None
+
+ self.K = torch.ones(size=(1, 1)).double()
+ self.mu = 0.0
+
+ self.m = torch.from_numpy(np.array(m))
+ self.fitted = False
+ self.data = False
+
+ self.d = d
+ self.n = 0
+ self.bounds = bounds
+ self.groups = groups
+ self.diameter = diameter
+ self.theta_norm = theta_norm
+
+ self.verbose = verbose
+ self.admits_first_order = True
+
+ self.embedding = embedding
+ self.embedding_map = embedding
+
+ self.kappa = kappa
+ self.scale = scale
+ self.poly = poly
+
+ self.to_add = []
+ self.prior_mean = 0
+ self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel
+ self.dual = False
+ self.beta_fun = beta_fun
+ self.bound = bound
+
+ def beta(self, delta=0.1, norm=None):
+ # self.K = Z_ + self.s * self.s * self.lam * I
+ if norm is None:
+ norm = self.theta_norm
+
+ if self.beta_fun is None:
+ return 2.0
+
+ elif self.beta_fun == "theory":
+ K = (
+ self.kernel(self.x, self.x)
+ + torch.eye(self.x.size()[0]).double() * self.s**2 * self.lam
+ )
+
+ beta_value = (
+ self.bound * self.lam
+ + torch.logdet(K / ((self.s**2) * self.lam))
+ + 2 * np.log(1 / delta)
+ )
+ Q = self.embed(self.x)
+ Lam = self.lam * torch.eye(self.get_basis_size()).double()
+ V = Q.T @ Q / (self.s**2) + Lam
+
+ beta_value = (
+ self.bound * self.lam
+ + torch.logdet(V)
+ - torch.logdet(Lam)
+ + 2 * np.log(1 / delta)
+ )
+ beta_value = beta_value
+ else:
+ return self.beta_fun(self.K, delta=delta, norm=norm)
+ return beta_value
+
+ def description(self):
+ return "Custom Features object"
+
+ def embed(self, x):
+ return self.embedding.embed(x)
+
+ def set_embedding(self, embed):
+ self.embedding_map = embed
+
+ def get_basis_size(self):
+ return int(torch.sum(self.m))
+
+ def set_basis_size(self, m):
+ self.m = m
+
+ def kernel(self, x, y):
+ embedding = self.embed(x)
+ embedding2 = self.embed(y)
+ K = self.linear_kernel(embedding, embedding2)
+ return K
+
+ def logdet_ratio(self):
+ I = torch.eye(int(torch.sum(self.m))).double()
+ return torch.logdet(self.K) - torch.logdet(self.s**2 * self.lam * I)
+
+ def effective_dim(self, xtest):
+ Phi = self.embed(xtest)
+ d = torch.trace(
+ torch.solve(
+ Phi.T @ Phi,
+ Phi.T @ Phi + torch.eye(self.get_basis_size()).double() * self.lam,
+ )[0]
+ )
+ return d
+
+ def add_data_point(self, x, y):
+ if self.n == 0:
+ self.fit_gp(x, y)
+ else:
+ self.to_add.append([x, y])
+ self.fitted = False
+
+ def fit(self, x=None, y=None):
+ self.fit_gp(self.x, self.y)
+
+ def fit_gp(self, x, y):
+ """
+ Function to Fit GP
+ """
+ self.x = x
+ self.y = y
+ self.n = list(self.x.size())[0]
+ self.d = list(self.x.size())[1]
+
+ if self.n < self.m:
+ self.dual = True
+ else:
+ self.dual = False
+
+ if self.primal == True:
+ self.dual = False
+
+ self.data = True
+ self.fitted = False
+ self.precompute()
+ return None
+
+ def add_points(self, d):
+ x, y = d
+ if self.x is not None:
+ self.x = torch.cat((self.x, x), dim=0)
+ self.y = torch.cat((self.y, y), dim=0)
+ else:
+ self.x = x
+ self.y = y
+
+ def check_conversion(self):
+ """
+ Convert between dual and primal form
+ :return:
+ """
+ if self.primal == False:
+ if self.n == self.m: # convert do d mode
+ print("Switching mode to primal.")
+ self.dual = False
+
+ I = torch.eye(int(self.m)).double()
+ Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q))
+ self.V = Z_ + self.s * self.s * self.lam * torch.eye(
+ int(self.m), dtype=torch.float64
+ )
+ self.invV, _ = torch.solve(I, self.V)
+
+ def get_invV(self):
+ self.precompute()
+
+ if self.dual:
+ I = torch.eye(self.m).double()
+ Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q))
+ self.V = Z_ + self.s * self.s * self.lam * torch.eye(
+ self.m, dtype=torch.float64
+ )
+ self.invV = torch.linalg.solve(self.V, I)
+ return self.invV
+ else:
+ return self.invV
+
+ def precompute(self):
+
+ if self.fitted == False:
+ if len(self.to_add) > 0:
+ # something to add via low rank update
+ for i in range(len(self.to_add)):
+ newx = self.to_add[i][0]
+ newy = self.to_add[i][1]
+
+ # rank one update
+ emb = self.embed(newx)
+
+ if self.dual: # via Shur complements
+ newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double()
+ newK = torch.zeros(size=(self.n + 1, self.n + 1)).double()
+
+ M = self.invK @ self.Q
+ c = 1.0 / (
+ (self.s**2 * self.lam + emb @ emb.T)
+ - emb @ self.Q.T @ M @ emb.T
+ )
+
+ newKinv[0 : self.n, 0 : self.n] = (
+ self.invK + c * M @ emb.T @ emb @ M.T
+ )
+ newKinv[0 : self.n, self.n] = (-M @ emb.T * c).view(-1)
+ newKinv[self.n, 0 : self.n] = (-emb @ M.T * c).view(-1)
+ newKinv[self.n, self.n] = c.view(-1)
+
+ newK[0 : self.n, 0 : self.n] = self.K
+ newK[0 : self.n, self.n] = emb @ self.Q.T
+ newK[self.n, 0 : self.n] = emb @ self.Q.T
+ newK[self.n, self.n] = self.s**2 * self.lam + emb @ emb.T
+ self.K = newK
+
+ self.invK = newKinv
+
+ self.add_points(newx, newy)
+ self.n = self.n + 1
+ self.Q = self.embed(self.x)
+ self.invK_V = (1.0 / self.lam) * (
+ -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))
+ )
+
+ else: # via Woodbury
+ c = 1 + emb @ self.invV @ emb.T
+ self.invV = (
+ self.invV - (self.invV @ emb.T @ emb @ self.invV) / c
+ )
+ self.add_points(newx, newy)
+ self.n = self.n + 1
+ self.Q = self.embed(self.x)
+ # add point
+
+ self.check_conversion()
+
+ self.fitted = True
+ self.to_add = []
+
+ elif self.data == True: # just compute the
+ self.Q = self.embed(self.x)
+ if self.dual:
+ I = torch.eye(self.n).double()
+ Z_ = self.Q @ self.Q.T
+ self.K = Z_ + self.s * self.s * self.lam * I
+ # self.invK, _ = torch.solve(I, self.K)
+ self.invK = torch.pinverse(self.K)
+ self.invK_V = (1.0 / self.lam) * (
+ -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))
+ )
+ else:
+ I = torch.eye(int(self.m)).double()
+ Z_ = self.Q.T @ self.Q
+ self.V = Z_ + self.s**2 * self.lam * I
+ self.invV = torch.pinverse(self.V)
+
+ self.fitted = True
+ else:
+ pass
+ else:
+ pass
+
+ def theta_mean(self, var=False, prior=False):
+ self.precompute()
+
+ if self.fitted == True and prior == False:
+ if self.dual:
+ theta_mean = self.Q.T @ self.invK @ self.y
+ Z = self.invK_V
+ else:
+ theta_mean = self.invV @ self.Q.T @ self.y
+ Z = self.s**2 * self.invV
+ else:
+ theta_mean = 0 * torch.ones(size=(self.m, 1)).double()
+
+ if var is False:
+ return theta_mean
+ else:
+ return (theta_mean, Z)
+
+ def mean(self, xtest):
+ return self.mean_std(xtest)[0]
+
+ def mean_std(self, xtest):
+ """
+ Calculate mean and variance for GP at xtest points
+ """
+ self.precompute()
+ embeding = self.embed(xtest)
+
+ # mean
+ theta_mean = self.theta_mean()
+ # print(torch.norm(theta_mean))
+ ymean = embeding @ theta_mean
+
+ # std
+ if not self.dual or self.primal:
+ diagonal = self.s**2 * torch.einsum(
+ "ij,jk,ik->i", (embeding, self.invV, embeding)
+ ).view(-1, 1)
+ else:
+ diagonal = torch.einsum(
+ "ij,jk,ik->i", (embeding, self.invK_V, embeding)
+ ).view(-1, 1)
+
+ ystd = torch.sqrt(diagonal)
+ return (ymean, ystd)
+
+ def ucb(self, xtest, delta=0.1):
+ mu, std = self.mean_std(xtest)
+ res = mu + np.sqrt(self.beta(delta=delta)) * std
+ return res
+
+ def lcb(self, xtest, delta=0.1):
+ mu, std = self.mean_std(xtest)
+ res = mu - np.sqrt(self.beta(delta=delta)) * std
+ return res
+
+ def sample_matheron(self, xtest, kernel_object, size=1):
+ basis = self.get_basis_size()
+ zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
+ random_vector = torch.normal(mean=zeros, std=1.0)
+
+ Z = self.lam * torch.eye(basis, dtype=torch.float64)
+ L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj()
+ theta = torch.mm(L, random_vector) + self.prior_mean
+
+ f_prior_xtest = torch.mm(self.embed(xtest), theta)
+ f_prior_x = torch.mm(self.embed(self.x), theta)
+
+ K_star = kernel_object.kernel(self.x, xtest)
+ N = self.x.size()[0]
+ K = kernel_object.kernel(self.x, self.x) + self.s**2 * self.lam * torch.eye(N)
+
+ f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x)
+ return f
+
+ def sample_theta(self, size=1, prior=False):
+
+ basis = self.get_basis_size()
+
+ zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
+ random_vector = torch.normal(mean=zeros, std=1.0)
+ self.precompute()
+
+ if self.fitted == True and prior == False:
+ self.L = torch.linalg.cholesky(self.get_invV()) * self.s
+ theta = self.theta_mean()
+ theta = theta + torch.mm(self.L, random_vector)
+ else:
+ Z = self.lam * torch.eye(basis, dtype=torch.float64)
+ L = (
+ torch.linalg.cholesky(Z.transpose(-2, -1).conj())
+ .transpose(-2, -1)
+ .conj()
+ )
+ theta = torch.mm(L, random_vector) + self.prior_mean
+
+ return theta
+
+ def theta_mean_constrained(self, weights=None, B=1):
+ if weights is None:
+ weights = torch.ones(self.n).double() / self.n
+
+ Q = self.embed(self.x)
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ objective = cp.Minimize(
+ cp.sum(
+ weights
+ @ cp.square(
+ Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()
+ )
+ )
+ )
+ zero = np.zeros(int(torch.sum(self.m)))
+ constraints = [cp.SOC(theta @ zero + B, theta)]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK)
+ return torch.from_numpy(theta.value).view(-1, 1)
+
+ def theta_absolute_deviation(self, weights=None, reg=None):
+ if weights is None:
+ weights = torch.ones(self.x.size()[0])
+
+ if reg is None: # standard regularization
+ Q = self.embed(self.x)
+ theta = cp.Variable((int(torch.sum(self.m)), 1))
+ objective = cp.Minimize(
+ cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy()))
+ + self.s * self.lam * cp.norm2(theta)
+ )
+ prob = cp.Problem(objective)
+ prob.solve()
+ return torch.from_numpy(theta.value)
+ else: # custom regularization
+ Q = self.embed(self.x)
+ theta = cp.Variable((int(torch.sum(self.m)), 1))
+ objective = cp.Minimize(
+ cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy()))
+ + reg * cp.norm2(theta)
+ )
+ prob = cp.Problem(objective)
+ prob.solve(solver=cp.MOSEK)
+ return torch.from_numpy(theta.value)
+
+ def theta_absolute_deviation_constrained(self, weights=None, B=1):
+ if weights is None:
+ weights = torch.ones(self.x.size()[0])
+ Q = self.embed(self.x)
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+
+ objective = cp.Minimize(
+ cp.sum(
+ weights
+ @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())
+ )
+ )
+ zero = np.zeros(int(torch.sum(self.m)))
+ constraints = [cp.SOC(theta @ zero + B, theta)]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK)
+ return torch.from_numpy(theta.value).view(-1, 1)
+
+ def theta_chebyschev_approximation(self, eps=1.0):
+ Q = self.embed(self.x).detach().numpy()
+ y = self.y.view(-1).detach().numpy()
+
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ objective = cp.Minimize(cp.sum_squares(theta))
+ constraints = [cp.abs(Q @ theta - y) <= eps]
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK)
+ res = torch.from_numpy(theta.value).view(-1, 1)
+ return res
+
+ def interpolation(self, eps=0.0):
+ Q = self.embed(self.x).detach().numpy()
+ y = self.y.view(-1).detach().numpy()
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ objective = cp.Minimize(cp.sum_squares(theta))
+ constraints = [Q @ theta == y]
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve()
+ res = torch.from_numpy(theta.value).view(-1, 1)
+
+ return res
+
+ def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None):
+ embeding = self.embed(xtest)
+
+ if B is not None:
+ theta_mean = self.theta_mean_constrained(weights=weights, B=B)
+ else:
+ theta_mean = self.theta_mean(weights=weights, reg=reg)
+ ymean = torch.mm(embeding, theta_mean)
+ if theta == True:
+ return ymean, theta_mean
+ else:
+ return ymean
+
+ def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False):
+ embeding = self.embed(xtest)
+ if B is not None:
+ theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B)
+ else:
+ theta_mean = self.theta_absolute_deviation(weights=weights)
+ ymean = torch.mm(embeding, theta_mean)
+ if theta == True:
+ return ymean, theta_mean
+ else:
+ return ymean
+
+ """
Hessian
"""
- def mean_gradient_hessian(self, xtest, hessian=False):
- hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64)
- xtest.requires_grad_(True)
- # xtest.retain_grad()
- mu = self.mean_std(xtest)[0]
- # mu.backward(retain_graph=True)
-
- # nabla_mu = xtest.grad
- nabla_mu = grad(mu, xtest, create_graph=True)[0][0]
-
- if hessian == False:
- return nabla_mu
- else:
- for i in range(self.d):
- hessian_mu[i, :] = grad(nabla_mu[i], xtest, create_graph=True, retain_graph=True)[0][0]
- return [nabla_mu, hessian_mu]
-
- """
+ def mean_gradient_hessian(self, xtest, hessian=False):
+ hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64)
+ xtest.requires_grad_(True)
+ # xtest.retain_grad()
+ mu = self.mean_std(xtest)[0]
+ # mu.backward(retain_graph=True)
+
+ # nabla_mu = xtest.grad
+ nabla_mu = grad(mu, xtest, create_graph=True)[0][0]
+
+ if hessian == False:
+ return nabla_mu
+ else:
+ for i in range(self.d):
+ hessian_mu[i, :] = grad(
+ nabla_mu[i], xtest, create_graph=True, retain_graph=True
+ )[0][0]
+ return [nabla_mu, hessian_mu]
+
+ """
Optimization
"""
- def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"):
-
- # precompute important (theta)
- theta_mean, K = self.theta_mean(var=True)
-
- if lcb == False:
- fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + \
- beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed(
- torch.from_numpy(x).view(1, -1)).T)).detach().numpy()[0]
- else:
- fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean - \
- beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed(
- torch.from_numpy(x).view(1, -1)).T).detach().numpy()[0]).numpy()[0]
-
- if self.bounds == None:
- mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
- else:
- mybounds = self.bounds
-
- results = []
- for j in range(multistart):
-
- x0 = np.random.randn(self.d)
- for i in range(self.d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
-
- if minimizer == "L-BFGS-B":
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds)
- solution = res.x
- else:
- raise AssertionError("Wrong optimizer selected.")
-
- results.append([solution, -fun(solution)])
-
- results = np.array(results)
- index = np.argmax(results[:, 1])
- solution = results[index, 0]
- return (torch.from_numpy(solution).view(1, -1), -torch.from_numpy(fun(solution)))
-
- def sample_and_optimize(self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0):
- '''
- Sample functions from Gaussian Process and take Maximum using
- first order maximization
- '''
-
- # sample linear approximating
- theta = self.sample_theta()
-
- # get bounds
- if self.bounds == None:
- mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
- else:
- mybounds = self.bounds
-
- fun = lambda x: -torch.mm(torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))).numpy()[0]
-
- results = []
- for j in range(multistart):
- x0 = np.random.randn(self.d)
- for i in range(self.d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
-
- if minimizer == "L-BFGS-B":
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds)
- solution = res.x
- else:
- raise AssertionError("Wrong optimizer selected.")
-
- results.append([solution, -fun(solution)])
- results = np.array(results)
- index = np.argmax(results[:, 1])
- solution = results[index, 0]
-
- return (torch.from_numpy(solution), -torch.from_numpy(fun(solution)))
-
- def sample(self, xtest, size=1, prior=False):
- '''
- Sample functions from Gaussian Process
- '''
- theta = self.sample_theta(size=size, prior=prior)
- f = torch.mm(self.embed(xtest), theta)
- return f
-
- def sample_and_max(self, xtest, size=1):
- '''
- Sample functions from Gaussian Process and take Maximum
- '''
- f = self.sample(xtest, size=size)
- index = np.argmax(f, axis=0)
- return (xtest[index, :], f[index, :])
-
- def get_kernel(self):
- embeding = self.embed(self.x)
- Z_ = self.linear_kernel(embeding, embeding)
- K = (Z_ + self.s * self.s * self.lam * torch.eye(int(self.n), dtype=torch.float64))
- return K
-
- def residuals(self):
- mu, _ = self.mean_std(self.x)
- out = torch.sum((mu - self.y) ** 2)
- return out
-if __name__ == "__main__":
- N = 10
- s = 0.1
- n = 256
- L_infinity_ball = 0.5
-
- d = 1
- m = 128
+ def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"):
+
+ # precompute important (theta)
+ theta_mean, K = self.theta_mean(var=True)
+
+ if lcb == False:
+ fun = (
+ lambda x: -(
+ self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean
+ + beta
+ * torch.sqrt(
+ self.embed(torch.from_numpy(x).view(1, -1))
+ @ K
+ @ self.embed(torch.from_numpy(x).view(1, -1)).T
+ )
+ )
+ .detach()
+ .numpy()[0]
+ )
+ else:
+ fun = lambda x: -(
+ self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean
+ - beta
+ * torch.sqrt(
+ self.embed(torch.from_numpy(x).view(1, -1))
+ @ K
+ @ self.embed(torch.from_numpy(x).view(1, -1)).T
+ )
+ .detach()
+ .numpy()[0]
+ ).numpy()[0]
+
+ if self.bounds == None:
+ mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
+ else:
+ mybounds = self.bounds
+
+ results = []
+ for j in range(multistart):
+
+ x0 = np.random.randn(self.d)
+ for i in range(self.d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+
+ if minimizer == "L-BFGS-B":
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds
+ )
+ solution = res.x
+ else:
+ raise AssertionError("Wrong optimizer selected.")
+
+ results.append([solution, -fun(solution)])
+
+ results = np.array(results)
+ index = np.argmax(results[:, 1])
+ solution = results[index, 0]
+ return (
+ torch.from_numpy(solution).view(1, -1),
+ -torch.from_numpy(fun(solution)),
+ )
+
+ def sample_and_optimize(
+ self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0
+ ):
+ """
+ Sample functions from Gaussian Process and take Maximum using
+ first order maximization
+ """
+
+ # sample linear approximating
+ theta = self.sample_theta()
+
+ # get bounds
+ if self.bounds == None:
+ mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
+ else:
+ mybounds = self.bounds
+
+ fun = lambda x: -torch.mm(
+ torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))
+ ).numpy()[0]
+
+ results = []
+ for j in range(multistart):
+ x0 = np.random.randn(self.d)
+ for i in range(self.d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+
+ if minimizer == "L-BFGS-B":
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds
+ )
+ solution = res.x
+ else:
+ raise AssertionError("Wrong optimizer selected.")
+
+ results.append([solution, -fun(solution)])
+ results = np.array(results)
+ index = np.argmax(results[:, 1])
+ solution = results[index, 0]
+
+ return (torch.from_numpy(solution), -torch.from_numpy(fun(solution)))
+
+ def sample(self, xtest, size=1, prior=False):
+ """
+ Sample functions from Gaussian Process
+ """
+ theta = self.sample_theta(size=size, prior=prior)
+ f = torch.mm(self.embed(xtest), theta)
+ return f
+
+ def sample_and_max(self, xtest, size=1):
+ """
+ Sample functions from Gaussian Process and take Maximum
+ """
+ f = self.sample(xtest, size=size)
+ index = np.argmax(f, axis=0)
+ return (xtest[index, :], f[index, :])
+
+ def get_kernel(self):
+ embeding = self.embed(self.x)
+ Z_ = self.linear_kernel(embeding, embeding)
+ K = Z_ + self.s * self.s * self.lam * torch.eye(
+ int(self.n), dtype=torch.float64
+ )
+ return K
+
+ def residuals(self):
+ mu, _ = self.mean_std(self.x)
+ out = torch.sum((mu - self.y) ** 2)
+ return out
- xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball))
- x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view(-1, 1)
- F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1
- F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
- y = F(x)
-
- emb = RFFEmbedding(m=m, gamma=0.1)
- Reggr = KernelizedFeatures(embedding=emb, m=m, d=1)
- Reggr.fit_gp(x, y)
- Reggr.visualize(xtest, f_true=F_true)
+if __name__ == "__main__":
+ N = 10
+ s = 0.1
+ n = 256
+ L_infinity_ball = 0.5
+
+ d = 1
+ m = 128
+
+ xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball))
+ x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view(
+ -1, 1
+ )
+
+ F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1
+ F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
+ y = F(x)
+
+ emb = RFFEmbedding(m=m, gamma=0.1)
+ Reggr = KernelizedFeatures(embedding=emb, m=m, d=1)
+ Reggr.fit_gp(x, y)
+ Reggr.visualize(xtest, f_true=F_true)
diff --git a/stpy/continuous_processes/kernelized_features_old.py b/stpy/continuous_processes/kernelized_features_old.py
index 32877f8..62f802c 100755
--- a/stpy/continuous_processes/kernelized_features_old.py
+++ b/stpy/continuous_processes/kernelized_features_old.py
@@ -10,649 +10,757 @@
class KernelizedFeatures(GaussianProcess):
- '''
- Random Fourier Features for Gaussian Kernel
- '''
-
- def __init__(self, embedding, m, s=0.001, lam=1., d=1, diameter=1.0, verbose=True, groups=None,
- bounds=None, scale=1.0, kappa=1.0, poly=2, primal=True, beta_fun = None ):
-
- self.s = s
- self.lam = lam
- self.primal = primal
- self.x = None
-
- self.K = 0
- self.mu = 0.0
-
- self.m = torch.from_numpy(np.array(m))
- self.fitted = False
- self.data = False
-
- self.d = d
- self.n = 0
- self.bounds = bounds
- self.groups = groups
- self.diameter = diameter
-
- self.verbose = verbose
- self.admits_first_order = True
-
- self.embedding = embedding
- self.embedding_map = embedding
-
- self.kappa = kappa
- self.scale = scale
- self.poly = poly
-
- self.to_add = []
- self.prior_mean = 0
- self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel
- self.dual = False
-
- def beta(self, delta=1e-2, norm=1, theory=False, variance_only=False):
- if not theory:
- beta_value = 2.
- else:
- embeding = self.embed(self.x)
- n = self.x.size()[0]
- Z_ = self.linear_kernel(embeding, embeding)
- K = (Z_ + self.lam * torch.eye(int(self.n), dtype=torch.float64))
- if not variance_only:
- beta_value = norm * np.sqrt(self.lam) + self.s * np.sqrt(
- torch.logdet(K) - n * np.log(self.lam) + 2. * np.log(1. / delta))
- else:
- beta_value = self.s * np.sqrt(torch.logdet(K) - n * np.log(self.lam) + 2. * np.log(1. / delta))
- return beta_value
-
- def description(self):
- return "Custom Features object"
-
- def norm(self):
- if self.fitted:
- norm = torch.linalg.norm(self.theta_mean())
- return norm
- else:
- return None
-
- def embed(self, x):
- return self.embedding.embed(x)
-
- def set_embedding(self, embed):
- self.embedding_map = embed
-
- def get_basis_size(self):
- return int(torch.sum(self.m))
-
- def set_basis_size(self, m):
- self.m = m
-
- def kernel(self, x, y):
- embedding = self.embed(x)
- embedding2 = self.embed(y)
- K = self.linear_kernel(embedding, embedding2)
- return K
-
- def logdet_ratio(self):
- I = torch.eye(int(torch.sum(self.m))).double()
- return torch.logdet(self.K) - torch.logdet(self.s ** 2 * self.lam * I)
-
- def effective_dim(self, xtest):
- Phi = self.embed(xtest)
- d = torch.trace(torch.solve(Phi.T @ Phi, Phi.T @ Phi + torch.eye(self.get_basis_size()).double() * self.lam)[0])
- return d
-
- def add_data_point(self, x, y):
- if self.n == 0:
- self.fit_gp(x, y)
- else:
- self.to_add.append([x, y])
- self.fitted = False
-
- def fit(self,x= None, y=None):
- self.fit_gp(self.x,self.y)
-
- def fit_gp_soft(self, x, y, A, b, std=None):
- self.fit_gp(x, y)
- Q = self.embed(self.x)
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- if std is not None:
- P = np.diag(1 / (std ** 2))
- else:
- P = np.diag(np.ones(A.shape[0]))
-
- objective = cp.Minimize(
- cp.sum(cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()))
- + self.s ** 2 * cp.quad_form(A @ theta - b, P) + self.lam * self.s ** 2 * cp.sum_squares(theta))
- prob = cp.Problem(objective)
- prob.solve(solver=cp.MOSEK, verbose=False)
- return torch.from_numpy(theta.value).view(-1, 1)
-
- def fit_gp_equality_fast(self, x, y, A, b, rcond=1e-2):
- self.fit_gp(x, y)
- Q = self.embed(self.x)
- I = torch.zeros(Q.size()[1]).double()
-
- V = Q.T @ Q - self.lam * self.s ** 2 * I
- e = Q.T @ self.y
-
- R = torch.from_numpy(orth(A.detach().numpy().T)).T
- b = torch.zeros(size=(R.size()[0], 1)).double()
- M = torch.vstack([V, R])
- v = torch.vstack([e, b.view(-1, 1)])
- theta = torch.linalg.lstsq(M, v.view(-1))[0].view(-1, 1)
- return theta
-
- def fit_gp_equality(self, x, y, A, b, eps=1e-6, rcond=1e-6):
- self.fit_gp(x, y)
- Q = self.embed(self.x)
-
- if eps is not None:
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- objective = cp.Minimize(
- cp.sum_squares(Q.detach().numpy() @ theta - self.y.view(
- -1).detach().numpy()) + self.lam * self.s ** 2 * cp.sum_squares(theta))
-
- constraints = [A.detach().numpy() @ theta - b.detach().view(-1).numpy() <= np.ones(A.size()[0]) * eps ** 2]
- constraints += [
- A.detach().numpy() @ theta - b.detach().view(-1).numpy() >= -np.ones(A.size()[0]) * eps ** 2]
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, verbose=True)
- return torch.from_numpy(theta.value).view(-1, 1)
- else:
- r = torch.linalg.lstsq(A, b)[0]
- N = null_space(A.detach().numpy(), rcond=rcond)
- theta = cp.Variable(N.shape[1])
-
- objective = cp.Minimize(
- cp.sum_squares(Q.detach().numpy() @ N @ theta - self.y.view(
- -1).detach().numpy()) + self.lam * self.s ** 2 * cp.sum_squares(theta))
-
- prob = cp.Problem(objective)
- prob.solve(solver=cp.MOSEK, verbose=True)
- return torch.from_numpy(N @ theta.value + r.numpy()).view(-1, 1)
-
- def fit_gp(self, x, y):
- '''
- Function to Fit GP
- '''
- self.x = x
- self.y = y
- self.n = list(self.x.size())[0]
- self.d = list(self.x.size())[1]
-
- if self.n < self.m:
- self.dual = True
- else:
- self.dual = False
-
- if self.primal == True:
- self.dual = False
-
- self.data = True
- self.fitted = False
- return None
-
- def add_points(self, x, y):
- if self.x is not None:
- self.x = torch.cat((self.x, x), dim=0)
- self.y = torch.cat((self.y, y), dim=0)
- else:
- self.x = x
- self.y = y
-
- def check_conversion(self):
- """
- Convert between dual and primal form
- :return:
- """
- if self.primal == False:
- if self.n == self.m: # convert do d mode
- print("Switching mode to primal.")
- self.dual = False
-
- I = torch.eye(int(self.m)).double()
- Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q))
- self.V = (Z_ + self.s * self.s * self.lam * torch.eye(int(self.m), dtype=torch.float64))
- self.invV, _ = torch.solve(I, self.V)
-
- def get_invV(self):
- self.precompute()
-
- if self.dual:
- I = torch.eye(self.m).double()
- Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q))
- self.V = (Z_ + self.s * self.s * self.lam * torch.eye(self.m, dtype=torch.float64))
- self.invV, _ = torch.solve(I, self.V)
- return self.invV
- else:
- return self.invV
-
- def precompute(self):
- if self.fitted == False:
- if len(self.to_add) > 0:
- # something to add via low rank update
- for i in range(len(self.to_add)):
- newx = self.to_add[i][0]
- newy = self.to_add[i][1]
-
- # rank one update
- emb = self.embed(newx)
-
- if self.dual: # via Shur complements
- newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double()
- M = self.invK @ self.Q
- c = 1. / ((self.s ** 2 * self.lam + emb @ emb.T) - emb @ self.Q.T @ M @ emb.T)
-
- newKinv[0:self.n, 0:self.n] = self.invK + c * M @ emb.T @ emb @ M.T
- newKinv[0:self.n, self.n] = (- M @ emb.T * c).view(-1)
- newKinv[self.n, 0:self.n] = (- emb @ M.T * c).view(-1)
- newKinv[self.n, self.n] = c.view(-1)
-
- self.invK = newKinv
-
- self.add_points(newx, newy)
- self.n = self.n + 1
- self.Q = self.embed(self.x)
-
- self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)))
-
- else: # via Woodbury
- c = 1 + emb @ self.invV @ emb.T
- self.invV = self.invV - (self.invV @ emb.T @ emb @ self.invV) / c
- self.add_points(newx, newy)
- self.n = self.n + 1
- self.Q = self.embed(self.x)
- # add point
-
- self.check_conversion()
-
- self.fitted = True
- self.to_add = []
-
-
- elif self.data == True: # just compute the
- self.Q = self.embed(self.x)
- if not self.dual:
- I = torch.eye(int(self.m)).double()
- Z_ = self.Q.T @ self.Q
- self.V = Z_ + self.s ** 2 * self.lam * I
- self.invV = torch.pinverse(self.V, rcond=1e-10)
- else:
- I = torch.eye(self.n).double()
- Z_ = self.Q @ self.Q.T
- self.K = Z_ + self.s * self.s * self.lam * I
- # self.invK, _ = torch.solve(I, self.K)
- self.invK = torch.pinverse(self.K)
- self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)))
- self.fitted = True
- else:
- I = torch.eye(int(self.m)).double()
- self.V = self.s ** 2 * self.lam * I
- self.invV = torch.pinverse(self.V, rcond=1e-10)
- else:
- pass
-
- def theta_mean(self, var=False, prior=False):
-
- self.precompute()
- if self.fitted == True and prior == False:
- if self.dual:
- theta_mean = self.Q.T @ self.invK @ self.y
- Z = self.invK_V
- else:
- theta_mean = self.invV @ self.Q.T @ self.y
- Z = self.s ** 2 * self.invV
- else:
- theta_mean = 0 * torch.ones(size=(self.m, 1)).double()
-
- if var is False:
- return theta_mean
- else:
- return (theta_mean, Z)
-
- def mean_std(self, xtest):
- '''
- Calculate mean and variance for GP at xtest points
- '''
- # self.precompute()
- embeding = self.embed(xtest)
-
- # mean
- theta_mean = self.theta_mean()
- ymean = embeding @ theta_mean
-
- # std
- if not self.dual:
- diagonal = self.s ** 2 * torch.einsum('ij,jk,ik->i', (embeding, self.invV, embeding)).view(-1, 1)
- else:
- diagonal = torch.einsum('ij,jk,ik->i', (embeding, self.invK_V, embeding)).view(-1, 1)
-
- ystd = torch.sqrt(diagonal)
- return (ymean, ystd)
-
- def sample_matheron(self, xtest, kernel_object, size=1):
- basis = self.get_basis_size()
- zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
- random_vector = torch.normal(mean=zeros, std=1.)
-
- Z = self.lam * torch.eye(basis, dtype=torch.float64)
- L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj()
- theta = torch.mm(L, random_vector) + self.prior_mean
-
- f_prior_xtest = torch.mm(self.embed(xtest), theta)
- f_prior_x = torch.mm(self.embed(self.x), theta)
-
- K_star = kernel_object.kernel(self.x, xtest)
- N = self.x.size()[0]
- K = kernel_object.kernel(self.x, self.x) + self.s ** 2 * self.lam * torch.eye(N)
-
- f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x)
- return f
-
- def sample_theta(self, size=1, prior=False):
-
- basis = self.get_basis_size()
-
- zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
- random_vector = torch.normal(mean=zeros, std=1.).double()
- self.precompute()
-
- if self.fitted == True and prior == False:
- self.L = torch.linalg.cholesky(self.get_invV()) * self.s
- theta = self.theta_mean().view(-1, 1)
- print(theta.size())
- print(self.L.size())
- print(random_vector.size())
- theta = theta + torch.mm(self.L, random_vector)
- else:
- Z = (self.lam) * torch.eye(basis, dtype=torch.float64)
- L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj()
- theta = torch.mm(L, random_vector) + self.prior_mean
-
- return theta
-
- def theta_mean_constrained(self, weights=None, B=1):
- if weights is None:
- weights = torch.ones(self.n).double() / self.n
-
- Q = self.embed(self.x)
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- objective = cp.Minimize(
- cp.sum(weights @ cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())))
- zero = np.zeros(int(torch.sum(self.m)))
- constraints = [cp.SOC(theta @ zero + B, theta)]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK)
- return torch.from_numpy(theta.value).view(-1, 1)
-
- def theta_absolute_deviation(self, weights=None, reg=None):
- if weights is None:
- weights = torch.ones(self.x.size()[0])
-
- if reg is None: # standard regularization
- Q = self.embed(self.x)
- theta = cp.Variable((int(torch.sum(self.m)), 1))
- objective = cp.Minimize(
- cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + self.s * self.lam * cp.norm2(theta))
- prob = cp.Problem(objective)
- prob.solve()
- return torch.from_numpy(theta.value)
- else: # custom regularization
- Q = self.embed(self.x)
- theta = cp.Variable((int(torch.sum(self.m)), 1))
- objective = cp.Minimize(
- cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + reg * cp.norm2(theta))
- prob = cp.Problem(objective)
- prob.solve(solver=cp.MOSEK)
- return torch.from_numpy(theta.value)
-
- def theta_absolute_deviation_constrained(self, weights=None, B=1):
- if weights is None:
- weights = torch.ones(self.x.size()[0])
- Q = self.embed(self.x)
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
-
- objective = cp.Minimize(cp.sum(weights @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())))
- zero = np.zeros(int(torch.sum(self.m)))
- constraints = [cp.SOC(theta @ zero + B, theta)]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK)
- return torch.from_numpy(theta.value).view(-1, 1)
-
- def theta_chebyschev_approximation(self, eps=1.):
- Q = self.embed(self.x).detach().numpy()
- y = self.y.view(-1).detach().numpy()
-
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- objective = cp.Minimize(cp.sum_squares(theta))
- constraints = [cp.abs(Q @ theta - y) <= eps]
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK)
- res = torch.from_numpy(theta.value).view(-1, 1)
- return res
-
- def interpolation(self, eps=0.):
- Q = self.embed(self.x).detach().numpy()
- y = self.y.view(-1).detach().numpy()
- theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
- objective = cp.Minimize(cp.sum_squares(theta))
- constraints = [Q @ theta == y]
-
- prob = cp.Problem(objective, constraints)
- prob.solve()
- res = torch.from_numpy(theta.value).view(-1, 1)
-
- return res
-
- def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None):
- embeding = self.embed(xtest)
-
- if B is not None:
- theta_mean = self.theta_mean_constrained(weights=weights, B=B)
- else:
- theta_mean = self.theta_mean(weights=weights, reg=reg)
- ymean = torch.mm(embeding, theta_mean)
- if theta == True:
- return ymean, theta_mean
- else:
- return ymean
-
- def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False):
- embeding = self.embed(xtest)
- if B is not None:
- theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B)
- else:
- theta_mean = self.theta_absolute_deviation(weights=weights)
- ymean = torch.mm(embeding, theta_mean)
- if theta == True:
- return ymean, theta_mean
- else:
- return ymean
-
- """
+ """
+ Random Fourier Features for Gaussian Kernel
+ """
+
+ def __init__(
+ self,
+ embedding,
+ m,
+ s=0.001,
+ lam=1.0,
+ d=1,
+ diameter=1.0,
+ verbose=True,
+ groups=None,
+ bounds=None,
+ scale=1.0,
+ kappa=1.0,
+ poly=2,
+ primal=True,
+ beta_fun=None,
+ ):
+
+ self.s = s
+ self.lam = lam
+ self.primal = primal
+ self.x = None
+
+ self.K = 0
+ self.mu = 0.0
+
+ self.m = torch.from_numpy(np.array(m))
+ self.fitted = False
+ self.data = False
+
+ self.d = d
+ self.n = 0
+ self.bounds = bounds
+ self.groups = groups
+ self.diameter = diameter
+
+ self.verbose = verbose
+ self.admits_first_order = True
+
+ self.embedding = embedding
+ self.embedding_map = embedding
+
+ self.kappa = kappa
+ self.scale = scale
+ self.poly = poly
+
+ self.to_add = []
+ self.prior_mean = 0
+ self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel
+ self.dual = False
+
+ def beta(self, delta=1e-2, norm=1, theory=False, variance_only=False):
+ if not theory:
+ beta_value = 2.0
+ else:
+ embeding = self.embed(self.x)
+ n = self.x.size()[0]
+ Z_ = self.linear_kernel(embeding, embeding)
+ K = Z_ + self.lam * torch.eye(int(self.n), dtype=torch.float64)
+ if not variance_only:
+ beta_value = norm * np.sqrt(self.lam) + self.s * np.sqrt(
+ torch.logdet(K) - n * np.log(self.lam) + 2.0 * np.log(1.0 / delta)
+ )
+ else:
+ beta_value = self.s * np.sqrt(
+ torch.logdet(K) - n * np.log(self.lam) + 2.0 * np.log(1.0 / delta)
+ )
+ return beta_value
+
+ def description(self):
+ return "Custom Features object"
+
+ def norm(self):
+ if self.fitted:
+ norm = torch.linalg.norm(self.theta_mean())
+ return norm
+ else:
+ return None
+
+ def embed(self, x):
+ return self.embedding.embed(x)
+
+ def set_embedding(self, embed):
+ self.embedding_map = embed
+
+ def get_basis_size(self):
+ return int(torch.sum(self.m))
+
+ def set_basis_size(self, m):
+ self.m = m
+
+ def kernel(self, x, y):
+ embedding = self.embed(x)
+ embedding2 = self.embed(y)
+ K = self.linear_kernel(embedding, embedding2)
+ return K
+
+ def logdet_ratio(self):
+ I = torch.eye(int(torch.sum(self.m))).double()
+ return torch.logdet(self.K) - torch.logdet(self.s**2 * self.lam * I)
+
+ def effective_dim(self, xtest):
+ Phi = self.embed(xtest)
+ d = torch.trace(
+ torch.solve(
+ Phi.T @ Phi,
+ Phi.T @ Phi + torch.eye(self.get_basis_size()).double() * self.lam,
+ )[0]
+ )
+ return d
+
+ def add_data_point(self, x, y):
+ if self.n == 0:
+ self.fit_gp(x, y)
+ else:
+ self.to_add.append([x, y])
+ self.fitted = False
+
+ def fit(self, x=None, y=None):
+ self.fit_gp(self.x, self.y)
+
+ def fit_gp_soft(self, x, y, A, b, std=None):
+ self.fit_gp(x, y)
+ Q = self.embed(self.x)
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ if std is not None:
+ P = np.diag(1 / (std**2))
+ else:
+ P = np.diag(np.ones(A.shape[0]))
+
+ objective = cp.Minimize(
+ cp.sum(
+ cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())
+ )
+ + self.s**2 * cp.quad_form(A @ theta - b, P)
+ + self.lam * self.s**2 * cp.sum_squares(theta)
+ )
+ prob = cp.Problem(objective)
+ prob.solve(solver=cp.MOSEK, verbose=False)
+ return torch.from_numpy(theta.value).view(-1, 1)
+
+ def fit_gp_equality_fast(self, x, y, A, b, rcond=1e-2):
+ self.fit_gp(x, y)
+ Q = self.embed(self.x)
+ I = torch.zeros(Q.size()[1]).double()
+
+ V = Q.T @ Q - self.lam * self.s**2 * I
+ e = Q.T @ self.y
+
+ R = torch.from_numpy(orth(A.detach().numpy().T)).T
+ b = torch.zeros(size=(R.size()[0], 1)).double()
+ M = torch.vstack([V, R])
+ v = torch.vstack([e, b.view(-1, 1)])
+ theta = torch.linalg.lstsq(M, v.view(-1))[0].view(-1, 1)
+ return theta
+
+ def fit_gp_equality(self, x, y, A, b, eps=1e-6, rcond=1e-6):
+ self.fit_gp(x, y)
+ Q = self.embed(self.x)
+
+ if eps is not None:
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ objective = cp.Minimize(
+ cp.sum_squares(
+ Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()
+ )
+ + self.lam * self.s**2 * cp.sum_squares(theta)
+ )
+
+ constraints = [
+ A.detach().numpy() @ theta - b.detach().view(-1).numpy()
+ <= np.ones(A.size()[0]) * eps**2
+ ]
+ constraints += [
+ A.detach().numpy() @ theta - b.detach().view(-1).numpy()
+ >= -np.ones(A.size()[0]) * eps**2
+ ]
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK, verbose=True)
+ return torch.from_numpy(theta.value).view(-1, 1)
+ else:
+ r = torch.linalg.lstsq(A, b)[0]
+ N = null_space(A.detach().numpy(), rcond=rcond)
+ theta = cp.Variable(N.shape[1])
+
+ objective = cp.Minimize(
+ cp.sum_squares(
+ Q.detach().numpy() @ N @ theta - self.y.view(-1).detach().numpy()
+ )
+ + self.lam * self.s**2 * cp.sum_squares(theta)
+ )
+
+ prob = cp.Problem(objective)
+ prob.solve(solver=cp.MOSEK, verbose=True)
+ return torch.from_numpy(N @ theta.value + r.numpy()).view(-1, 1)
+
+ def fit_gp(self, x, y):
+ """
+ Function to Fit GP
+ """
+ self.x = x
+ self.y = y
+ self.n = list(self.x.size())[0]
+ self.d = list(self.x.size())[1]
+
+ if self.n < self.m:
+ self.dual = True
+ else:
+ self.dual = False
+
+ if self.primal == True:
+ self.dual = False
+
+ self.data = True
+ self.fitted = False
+ return None
+
+ def add_points(self, x, y):
+ if self.x is not None:
+ self.x = torch.cat((self.x, x), dim=0)
+ self.y = torch.cat((self.y, y), dim=0)
+ else:
+ self.x = x
+ self.y = y
+
+ def check_conversion(self):
+ """
+ Convert between dual and primal form
+ :return:
+ """
+ if self.primal == False:
+ if self.n == self.m: # convert do d mode
+ print("Switching mode to primal.")
+ self.dual = False
+
+ I = torch.eye(int(self.m)).double()
+ Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q))
+ self.V = Z_ + self.s * self.s * self.lam * torch.eye(
+ int(self.m), dtype=torch.float64
+ )
+ self.invV, _ = torch.solve(I, self.V)
+
+ def get_invV(self):
+ self.precompute()
+
+ if self.dual:
+ I = torch.eye(self.m).double()
+ Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q))
+ self.V = Z_ + self.s * self.s * self.lam * torch.eye(
+ self.m, dtype=torch.float64
+ )
+ self.invV, _ = torch.solve(I, self.V)
+ return self.invV
+ else:
+ return self.invV
+
+ def precompute(self):
+ if self.fitted == False:
+ if len(self.to_add) > 0:
+ # something to add via low rank update
+ for i in range(len(self.to_add)):
+ newx = self.to_add[i][0]
+ newy = self.to_add[i][1]
+
+ # rank one update
+ emb = self.embed(newx)
+
+ if self.dual: # via Shur complements
+ newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double()
+ M = self.invK @ self.Q
+ c = 1.0 / (
+ (self.s**2 * self.lam + emb @ emb.T)
+ - emb @ self.Q.T @ M @ emb.T
+ )
+
+ newKinv[0 : self.n, 0 : self.n] = (
+ self.invK + c * M @ emb.T @ emb @ M.T
+ )
+ newKinv[0 : self.n, self.n] = (-M @ emb.T * c).view(-1)
+ newKinv[self.n, 0 : self.n] = (-emb @ M.T * c).view(-1)
+ newKinv[self.n, self.n] = c.view(-1)
+
+ self.invK = newKinv
+
+ self.add_points(newx, newy)
+ self.n = self.n + 1
+ self.Q = self.embed(self.x)
+
+ self.invK_V = (1.0 / self.lam) * (
+ -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))
+ )
+
+ else: # via Woodbury
+ c = 1 + emb @ self.invV @ emb.T
+ self.invV = (
+ self.invV - (self.invV @ emb.T @ emb @ self.invV) / c
+ )
+ self.add_points(newx, newy)
+ self.n = self.n + 1
+ self.Q = self.embed(self.x)
+ # add point
+
+ self.check_conversion()
+
+ self.fitted = True
+ self.to_add = []
+
+ elif self.data == True: # just compute the
+ self.Q = self.embed(self.x)
+ if not self.dual:
+ I = torch.eye(int(self.m)).double()
+ Z_ = self.Q.T @ self.Q
+ self.V = Z_ + self.s**2 * self.lam * I
+ self.invV = torch.pinverse(self.V, rcond=1e-10)
+ else:
+ I = torch.eye(self.n).double()
+ Z_ = self.Q @ self.Q.T
+ self.K = Z_ + self.s * self.s * self.lam * I
+ # self.invK, _ = torch.solve(I, self.K)
+ self.invK = torch.pinverse(self.K)
+ self.invK_V = (1.0 / self.lam) * (
+ -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))
+ )
+ self.fitted = True
+ else:
+ I = torch.eye(int(self.m)).double()
+ self.V = self.s**2 * self.lam * I
+ self.invV = torch.pinverse(self.V, rcond=1e-10)
+ else:
+ pass
+
+ def theta_mean(self, var=False, prior=False):
+
+ self.precompute()
+ if self.fitted == True and prior == False:
+ if self.dual:
+ theta_mean = self.Q.T @ self.invK @ self.y
+ Z = self.invK_V
+ else:
+ theta_mean = self.invV @ self.Q.T @ self.y
+ Z = self.s**2 * self.invV
+ else:
+ theta_mean = 0 * torch.ones(size=(self.m, 1)).double()
+
+ if var is False:
+ return theta_mean
+ else:
+ return (theta_mean, Z)
+
+ def mean_std(self, xtest):
+ """
+ Calculate mean and variance for GP at xtest points
+ """
+ # self.precompute()
+ embeding = self.embed(xtest)
+
+ # mean
+ theta_mean = self.theta_mean()
+ ymean = embeding @ theta_mean
+
+ # std
+ if not self.dual:
+ diagonal = self.s**2 * torch.einsum(
+ "ij,jk,ik->i", (embeding, self.invV, embeding)
+ ).view(-1, 1)
+ else:
+ diagonal = torch.einsum(
+ "ij,jk,ik->i", (embeding, self.invK_V, embeding)
+ ).view(-1, 1)
+
+ ystd = torch.sqrt(diagonal)
+ return (ymean, ystd)
+
+ def sample_matheron(self, xtest, kernel_object, size=1):
+ basis = self.get_basis_size()
+ zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
+ random_vector = torch.normal(mean=zeros, std=1.0)
+
+ Z = self.lam * torch.eye(basis, dtype=torch.float64)
+ L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj()
+ theta = torch.mm(L, random_vector) + self.prior_mean
+
+ f_prior_xtest = torch.mm(self.embed(xtest), theta)
+ f_prior_x = torch.mm(self.embed(self.x), theta)
+
+ K_star = kernel_object.kernel(self.x, xtest)
+ N = self.x.size()[0]
+ K = kernel_object.kernel(self.x, self.x) + self.s**2 * self.lam * torch.eye(N)
+
+ f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x)
+ return f
+
+ def sample_theta(self, size=1, prior=False):
+
+ basis = self.get_basis_size()
+
+ zeros = torch.zeros(size=(basis, size), dtype=torch.float64)
+ random_vector = torch.normal(mean=zeros, std=1.0).double()
+ self.precompute()
+
+ if self.fitted == True and prior == False:
+ self.L = torch.linalg.cholesky(self.get_invV()) * self.s
+ theta = self.theta_mean().view(-1, 1)
+ print(theta.size())
+ print(self.L.size())
+ print(random_vector.size())
+ theta = theta + torch.mm(self.L, random_vector)
+ else:
+ Z = (self.lam) * torch.eye(basis, dtype=torch.float64)
+ L = (
+ torch.linalg.cholesky(Z.transpose(-2, -1).conj())
+ .transpose(-2, -1)
+ .conj()
+ )
+ theta = torch.mm(L, random_vector) + self.prior_mean
+
+ return theta
+
+ def theta_mean_constrained(self, weights=None, B=1):
+ if weights is None:
+ weights = torch.ones(self.n).double() / self.n
+
+ Q = self.embed(self.x)
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ objective = cp.Minimize(
+ cp.sum(
+ weights
+ @ cp.square(
+ Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()
+ )
+ )
+ )
+ zero = np.zeros(int(torch.sum(self.m)))
+ constraints = [cp.SOC(theta @ zero + B, theta)]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK)
+ return torch.from_numpy(theta.value).view(-1, 1)
+
+ def theta_absolute_deviation(self, weights=None, reg=None):
+ if weights is None:
+ weights = torch.ones(self.x.size()[0])
+
+ if reg is None: # standard regularization
+ Q = self.embed(self.x)
+ theta = cp.Variable((int(torch.sum(self.m)), 1))
+ objective = cp.Minimize(
+ cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy()))
+ + self.s * self.lam * cp.norm2(theta)
+ )
+ prob = cp.Problem(objective)
+ prob.solve()
+ return torch.from_numpy(theta.value)
+ else: # custom regularization
+ Q = self.embed(self.x)
+ theta = cp.Variable((int(torch.sum(self.m)), 1))
+ objective = cp.Minimize(
+ cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy()))
+ + reg * cp.norm2(theta)
+ )
+ prob = cp.Problem(objective)
+ prob.solve(solver=cp.MOSEK)
+ return torch.from_numpy(theta.value)
+
+ def theta_absolute_deviation_constrained(self, weights=None, B=1):
+ if weights is None:
+ weights = torch.ones(self.x.size()[0])
+ Q = self.embed(self.x)
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+
+ objective = cp.Minimize(
+ cp.sum(
+ weights
+ @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())
+ )
+ )
+ zero = np.zeros(int(torch.sum(self.m)))
+ constraints = [cp.SOC(theta @ zero + B, theta)]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK)
+ return torch.from_numpy(theta.value).view(-1, 1)
+
+ def theta_chebyschev_approximation(self, eps=1.0):
+ Q = self.embed(self.x).detach().numpy()
+ y = self.y.view(-1).detach().numpy()
+
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ objective = cp.Minimize(cp.sum_squares(theta))
+ constraints = [cp.abs(Q @ theta - y) <= eps]
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK)
+ res = torch.from_numpy(theta.value).view(-1, 1)
+ return res
+
+ def interpolation(self, eps=0.0):
+ Q = self.embed(self.x).detach().numpy()
+ y = self.y.view(-1).detach().numpy()
+ theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy()))
+ objective = cp.Minimize(cp.sum_squares(theta))
+ constraints = [Q @ theta == y]
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve()
+ res = torch.from_numpy(theta.value).view(-1, 1)
+
+ return res
+
+ def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None):
+ embeding = self.embed(xtest)
+
+ if B is not None:
+ theta_mean = self.theta_mean_constrained(weights=weights, B=B)
+ else:
+ theta_mean = self.theta_mean(weights=weights, reg=reg)
+ ymean = torch.mm(embeding, theta_mean)
+ if theta == True:
+ return ymean, theta_mean
+ else:
+ return ymean
+
+ def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False):
+ embeding = self.embed(xtest)
+ if B is not None:
+ theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B)
+ else:
+ theta_mean = self.theta_absolute_deviation(weights=weights)
+ ymean = torch.mm(embeding, theta_mean)
+ if theta == True:
+ return ymean, theta_mean
+ else:
+ return ymean
+
+ """
Hessian
"""
- def mean_gradient_hessian(self, xtest, hessian=False):
- hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64)
- xtest.requires_grad_(True)
+ def mean_gradient_hessian(self, xtest, hessian=False):
+ hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64)
+ xtest.requires_grad_(True)
- # xtest.retain_grad()
- mu = self.mean_std(xtest)[0]
- # mu.backward(retain_graph=True)
+ # xtest.retain_grad()
+ mu = self.mean_std(xtest)[0]
+ # mu.backward(retain_graph=True)
- # nabla_mu = xtest.grad
- nabla_mu = grad(mu, xtest, create_graph=True)[0][0]
+ # nabla_mu = xtest.grad
+ nabla_mu = grad(mu, xtest, create_graph=True)[0][0]
- if hessian == False:
- return nabla_mu
- else:
- for i in range(self.d):
- hessian_mu[i, :] = grad(nabla_mu[i], xtest, create_graph=True, retain_graph=True)[0][0]
- return [nabla_mu, hessian_mu]
+ if hessian == False:
+ return nabla_mu
+ else:
+ for i in range(self.d):
+ hessian_mu[i, :] = grad(
+ nabla_mu[i], xtest, create_graph=True, retain_graph=True
+ )[0][0]
+ return [nabla_mu, hessian_mu]
- """
+ """
Optimization
"""
- def ucb(self, xtest, beta = lambda :2., bound = None, lcb = False):
-
- if bound is not None:
- mu, V = self.theta_mean(var = True)
- mu = mu.T
- Phi = self.embed(xtest)
- ucb = torch.zeros(size = (xtest.size()[0],1)).double()
-
- theta = cp.Variable(self.get_basis_size())
- for i in range(xtest.size()[0]):
- phi = Phi[i,:]
- if lcb:
- objective = cp.Minimize(phi @ theta)
- else:
- objective = cp.Maximize(phi @ theta)
-
- constraints = []
- constraints += [cp.quad_form(mu.view(-1)-theta,V) <= beta()]
- constraints += [cp.sum_squares(theta) <= bound]
- prob = cp.Problem(objective, constraints)
- prob.solve()
- ucb[i,0] = prob.value
- return ucb
- else:
- mu, sigma = self.mean_std(xtest)
- if lcb:
- return mu - beta()*sigma
- else:
- return mu + beta() * sigma
-
-
- def lcb(self, xtest, beta = lambda :2, bound = None):
- return self.ucb(xtest, beta = beta, bound = bound, lcb = True)
-
-
- def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"):
-
- # precompute important (theta)
- theta_mean, K = self.theta_mean(var=True)
-
- if lcb == False:
- fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + \
- beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed(
- torch.from_numpy(x).view(1, -1)).T)).detach().numpy()[0]
- else:
- fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean - \
- beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed(
- torch.from_numpy(x).view(1, -1)).T).detach().numpy()[0]).numpy()[0]
-
- if self.bounds == None:
- mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
- else:
- mybounds = self.bounds
-
- results = []
- for j in range(multistart):
-
- x0 = np.random.randn(self.d)
- for i in range(self.d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
-
- if minimizer == "L-BFGS-B":
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds)
- solution = res.x
- else:
- raise AssertionError("Wrong optimizer selected.")
-
- results.append([solution, -fun(solution)])
-
- results = np.array(results)
- index = np.argmax(results[:, 1])
- solution = results[index, 0]
- return (torch.from_numpy(solution).view(1, -1), -torch.from_numpy(fun(solution)))
-
- def sample_and_optimize(self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0):
- '''
- Sample functions from Gaussian Process and take Maximum using
- first order maximization
- '''
-
- # sample linear approximating
- theta = self.sample_theta()
-
- # get bounds
- if self.bounds == None:
- mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
- else:
- mybounds = self.bounds
-
- fun = lambda x: -torch.mm(torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))).numpy()[0]
-
- results = []
- for j in range(multistart):
- x0 = np.random.randn(self.d)
- for i in range(self.d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
-
- if minimizer == "L-BFGS-B":
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds)
- solution = res.x
- else:
- raise AssertionError("Wrong optimizer selected.")
-
- results.append([solution, -fun(solution)])
- results = np.array(results)
- index = np.argmax(results[:, 1])
- solution = results[index, 0]
-
- return (torch.from_numpy(solution), -torch.from_numpy(fun(solution)))
-
- def sample(self, xtest, size=1, prior=False):
- '''
- Sample functions from Gaussian Process
- '''
- theta = self.sample_theta(size=size, prior=prior)
- f = torch.mm(self.embed(xtest), theta)
- return f
-
- def sample_and_max(self, xtest, size=1):
- '''
- Sample functions from Gaussian Process and take Maximum
- '''
- f = self.sample(xtest, size=size)
- index = np.argmax(f, axis=0)
- return (xtest[index, :], f[index, :])
-
- def get_kernel(self):
- embeding = self.embed(self.x)
- Z_ = self.linear_kernel(embeding, embeding)
- K = (Z_ + self.s * self.s * self.lam * torch.eye(int(self.n), dtype=torch.float64))
- return K
-
- def residuals(self):
- mu, _ = self.mean_std(self.x)
- out = torch.sum((mu - self.y) ** 2)
- return out
+ def ucb(self, xtest, beta=lambda: 2.0, bound=None, lcb=False):
+
+ if bound is not None:
+ mu, V = self.theta_mean(var=True)
+ mu = mu.T
+ Phi = self.embed(xtest)
+ ucb = torch.zeros(size=(xtest.size()[0], 1)).double()
+
+ theta = cp.Variable(self.get_basis_size())
+ for i in range(xtest.size()[0]):
+ phi = Phi[i, :]
+ if lcb:
+ objective = cp.Minimize(phi @ theta)
+ else:
+ objective = cp.Maximize(phi @ theta)
+
+ constraints = []
+ constraints += [cp.quad_form(mu.view(-1) - theta, V) <= beta()]
+ constraints += [cp.sum_squares(theta) <= bound]
+ prob = cp.Problem(objective, constraints)
+ prob.solve()
+ ucb[i, 0] = prob.value
+ return ucb
+ else:
+ mu, sigma = self.mean_std(xtest)
+ if lcb:
+ return mu - beta() * sigma
+ else:
+ return mu + beta() * sigma
+
+ def lcb(self, xtest, beta=lambda: 2, bound=None):
+ return self.ucb(xtest, beta=beta, bound=bound, lcb=True)
+
+ def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"):
+
+ # precompute important (theta)
+ theta_mean, K = self.theta_mean(var=True)
+
+ if lcb == False:
+ fun = (
+ lambda x: -(
+ self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean
+ + beta
+ * torch.sqrt(
+ self.embed(torch.from_numpy(x).view(1, -1))
+ @ K
+ @ self.embed(torch.from_numpy(x).view(1, -1)).T
+ )
+ )
+ .detach()
+ .numpy()[0]
+ )
+ else:
+ fun = lambda x: -(
+ self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean
+ - beta
+ * torch.sqrt(
+ self.embed(torch.from_numpy(x).view(1, -1))
+ @ K
+ @ self.embed(torch.from_numpy(x).view(1, -1)).T
+ )
+ .detach()
+ .numpy()[0]
+ ).numpy()[0]
+
+ if self.bounds == None:
+ mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
+ else:
+ mybounds = self.bounds
+
+ results = []
+ for j in range(multistart):
+
+ x0 = np.random.randn(self.d)
+ for i in range(self.d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+
+ if minimizer == "L-BFGS-B":
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds
+ )
+ solution = res.x
+ else:
+ raise AssertionError("Wrong optimizer selected.")
+
+ results.append([solution, -fun(solution)])
+
+ results = np.array(results)
+ index = np.argmax(results[:, 1])
+ solution = results[index, 0]
+ return (
+ torch.from_numpy(solution).view(1, -1),
+ -torch.from_numpy(fun(solution)),
+ )
+
+ def sample_and_optimize(
+ self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0
+ ):
+ """
+ Sample functions from Gaussian Process and take Maximum using
+ first order maximization
+ """
+
+ # sample linear approximating
+ theta = self.sample_theta()
+
+ # get bounds
+ if self.bounds == None:
+ mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)])
+ else:
+ mybounds = self.bounds
+
+ fun = lambda x: -torch.mm(
+ torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))
+ ).numpy()[0]
+
+ results = []
+ for j in range(multistart):
+ x0 = np.random.randn(self.d)
+ for i in range(self.d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+
+ if minimizer == "L-BFGS-B":
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds
+ )
+ solution = res.x
+ else:
+ raise AssertionError("Wrong optimizer selected.")
+
+ results.append([solution, -fun(solution)])
+ results = np.array(results)
+ index = np.argmax(results[:, 1])
+ solution = results[index, 0]
+
+ return (torch.from_numpy(solution), -torch.from_numpy(fun(solution)))
+
+ def sample(self, xtest, size=1, prior=False):
+ """
+ Sample functions from Gaussian Process
+ """
+ theta = self.sample_theta(size=size, prior=prior)
+ f = torch.mm(self.embed(xtest), theta)
+ return f
+
+ def sample_and_max(self, xtest, size=1):
+ """
+ Sample functions from Gaussian Process and take Maximum
+ """
+ f = self.sample(xtest, size=size)
+ index = np.argmax(f, axis=0)
+ return (xtest[index, :], f[index, :])
+
+ def get_kernel(self):
+ embeding = self.embed(self.x)
+ Z_ = self.linear_kernel(embeding, embeding)
+ K = Z_ + self.s * self.s * self.lam * torch.eye(
+ int(self.n), dtype=torch.float64
+ )
+ return K
+
+ def residuals(self):
+ mu, _ = self.mean_std(self.x)
+ out = torch.sum((mu - self.y) ** 2)
+ return out
if __name__ == "__main__":
- N = 10
- s = 0.1
- n = 256
- L_infinity_ball = 0.5
-
- d = 1
- m = 128
-
- xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball))
- x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view(-1, 1)
-
- F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1
- F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
- y = F(x)
-
- emb = RFFEmbedding(m=m, gamma=0.1)
- Reggr = KernelizedFeatures(embedding=emb, m=m, d=1)
- Reggr.fit_gp(x, y)
- Reggr.visualize(xtest, f_true=F_true)
+ N = 10
+ s = 0.1
+ n = 256
+ L_infinity_ball = 0.5
+
+ d = 1
+ m = 128
+
+ xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball))
+ x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view(
+ -1, 1
+ )
+
+ F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1
+ F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
+ y = F(x)
+
+ emb = RFFEmbedding(m=m, gamma=0.1)
+ Reggr = KernelizedFeatures(embedding=emb, m=m, d=1)
+ Reggr.fit_gp(x, y)
+ Reggr.visualize(xtest, f_true=F_true)
diff --git a/stpy/continuous_processes/mkl_estimator.py b/stpy/continuous_processes/mkl_estimator.py
index 59e1ca8..639541d 100755
--- a/stpy/continuous_processes/mkl_estimator.py
+++ b/stpy/continuous_processes/mkl_estimator.py
@@ -7,213 +7,271 @@
from stpy.regularization.regularizer import Regularizer
from stpy.regularization.simplex_regularizer import DirichletRegularizer, SupRegularizer
+
class MultipleKernelLearner(GaussianProcess):
- def __init__(self, kernel_objects,
- lam: float =1.0,
- s: float = 0.01,
- opt: str = 'closed',
- regularizer: Regularizer = None):
-
- self.kernel_objects = kernel_objects
- self.no_models = len(kernel_objects)
- self.regularizer = regularizer
- self.s = s
- self.lam = lam
- self.opt = opt
-
- self.var = 'fixed'
-
- def fit(self):
- self.fit_gp(self.x,self.y)
-
- def fit_gp(self, x, y):
- self.x = x
- self.y = y
- (self.n, self.d) = self.x.size()
-
- self.Ks = []
- for i in range(self.no_models):
- self.Ks.append(self.kernel_objects[i].kernel(x,x))
-
- if self.opt == 'sdp':
- alpha = cp.Variable(self.no_models)
- u = cp.Variable(1)
- A = None
- for i in range(self.no_models):
- if A is None:
- A = self.Ks[i] * alpha[i]
- else:
- A += self.Ks[i] * alpha[i]
- A = A + np.eye(self.n)*self.lam*self.s**2
- constraints = []
- l = cp.reshape(u, (1, 1))
- G = cp.bmat([[A, y.numpy()], [y.numpy().T, l]])
- constraints += [G >> 0]
- constraints += [alpha >= 0.]
- constraints += [cp.sum(alpha) == 1.]
-
- objective = cp.Minimize( u)
- prob = cp.Problem(objective, constraints)
- prob.solve( solver = cp.MOSEK,verbose = True)
-
- elif self.opt == "closed":
- alpha = cp.Variable(self.no_models, nonneg=True)
- A = sum([self.Ks[i] * alpha[i] for i in range(self.no_models)])+ np.eye(self.n) * self.lam * self.s ** 2
- constraints = [cp.sum(alpha)==1, alpha<=1]
- objective = cp.matrix_frac(self.y.numpy(), A)
- if self.regularizer is not None and self.regularizer.is_convex():
- objective = objective + self.regularizer.get_regularizer_cvxpy()(alpha)
- prob = cp.Problem(cp.Minimize(objective), constraints)
- prob.solve(solver=cp.MOSEK, verbose=False)
-
- elif self.regularizer is not None and not self.regularizer.is_convex():
- obj,con,vars = self.regularizer.get_cvxpy_objectives_constraints_variables(self.no_models)
- no_problems = len(con)
- vals = []
- args = []
- for i in range(no_problems):
- prob = cp.Problem(cp.Minimize(objective+obj[i](alpha,*vars)), constraints + con[i](alpha, *vars))
- prob.solve(solver=cp.MOSEK, verbose=False)
- vals.append(prob.value)
- args.append(alpha.value)
- alpha.value = args[np.argmin(vals)]
- else:
- prob = cp.Problem(cp.Minimize(objective), constraints)
- prob.solve(solver=cp.MOSEK, verbose=False)
-
- self.alphas = torch.from_numpy(alpha.value)
- if self.regularizer is not None:
- print (self.regularizer.name, self.alphas)
- else:
- print("No", self.alphas)
- self.K = torch.sum(torch.stack([alpha*K for alpha,K in zip(self.alphas, self.Ks)]), dim = 0) + np.eye(self.n)*self.lam*self.s**2
- self.fitted = True
-
- def execute(self, xtest):
- if self.fitted == True:
- Ks = [self.kernel_objects[i].kernel(self.x, xtest) for i in range(self.no_models)]
- K_star = torch.sum(torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0)
- else:
- K_star = None
- Ks = [self.kernel_objects[i].kernel(xtest, xtest) for i in range(self.no_models)]
- K_star_star = torch.sum(torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0)
- return (K_star, K_star_star)
-
- # def log_marginal(self, kernel, X, weight):
- # pass
-
- def mean(self, xtest):
- K_star, K_star_star = self.execute(xtest)
- self.A = torch.linalg.lstsq(self.K, self.y)[0]
- ymean = torch.mm(K_star, self.A)
- return ymean
-
- def mean_std(self, xtest, full=False, reuse=False):
- K_star, K_star_star = self.execute(xtest)
- self.A = torch.linalg.lstsq(self.K, self.y)[0]
- ymean = torch.mm(K_star, self.A)
-
- if self.var == 'fixed':
- ystd = self.std_fixed(xtest)
- elif self.var == 'true':
- ystd = self.std_opt(xtest)
- return (ymean, ystd)
-
- def lcb(self, xtest: torch.Tensor, type=None, arg=False, sign=1.):
- theta = cp.Variable((self.alpha, 1))
- args = []
- n = xtest.size()[0]
- values = torch.zeros(size=(n, 1)).double()
- Phi = self.embed(xtest)
-
- for j in range(n):
- objective = sign * Phi[j, :] @ theta
- if (self.constraints is not None and not self.constraints.is_convex()):
- value, theta_lcb = self.objective_on_non_convex_confidence_set(theta, objective, type=type)
- elif not self.regularizer.is_convex():
- value, theta_lcb = self.objective_on_non_convex_confidence_set_bisection(theta, objective,
- type=type)
- else:
- value, theta_lcb = self.objective_on_confidence_set(theta, objective, type=type)
-
- values[j] = sign * value
- if arg:
- args.append(theta_lcb)
-
- if args:
- return values, args
- else:
- return values
-
- def ucb(self, xtest):
- pass
-
- def std_opt(self, xtest):
- N = xtest.size()[0]
- for i in range(N):
- x = xtest[i,:]
- theta = cp.Variable(self.n*self.no_models)
- M = torch.block_diag(self.Ks)
- cp.norm(theta,p=2)*theta[i]
-
- def std_fixed(self, xtest):
- K_star, K_star_star = self.execute(xtest)
- self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star)))
- first = torch.diag(K_star_star).view(-1, 1)
- second = torch.einsum('ij,ji->i', (self.B, torch.t(K_star))).view(-1, 1)
- variance = first - second
- ystd = torch.sqrt(variance)
- return ystd
-
- def sample(self, xtest, size=1):
- pass
+ def __init__(
+ self,
+ kernel_objects,
+ lam: float = 1.0,
+ s: float = 0.01,
+ opt: str = "closed",
+ regularizer: Regularizer = None,
+ ):
+
+ self.kernel_objects = kernel_objects
+ self.no_models = len(kernel_objects)
+ self.regularizer = regularizer
+ self.s = s
+ self.lam = lam
+ self.opt = opt
+
+ self.var = "fixed"
+
+ def fit(self):
+ self.fit_gp(self.x, self.y)
+
+ def fit_gp(self, x, y):
+ self.x = x
+ self.y = y
+ (self.n, self.d) = self.x.size()
+
+ self.Ks = []
+ for i in range(self.no_models):
+ self.Ks.append(self.kernel_objects[i].kernel(x, x))
+
+ if self.opt == "sdp":
+ alpha = cp.Variable(self.no_models)
+ u = cp.Variable(1)
+ A = None
+ for i in range(self.no_models):
+ if A is None:
+ A = self.Ks[i] * alpha[i]
+ else:
+ A += self.Ks[i] * alpha[i]
+ A = A + np.eye(self.n) * self.lam * self.s**2
+ constraints = []
+ l = cp.reshape(u, (1, 1))
+ G = cp.bmat([[A, y.numpy()], [y.numpy().T, l]])
+ constraints += [G >> 0]
+ constraints += [alpha >= 0.0]
+ constraints += [cp.sum(alpha) == 1.0]
+
+ objective = cp.Minimize(u)
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK, verbose=True)
+
+ elif self.opt == "closed":
+ alpha = cp.Variable(self.no_models, nonneg=True)
+ A = (
+ sum([self.Ks[i] * alpha[i] for i in range(self.no_models)])
+ + np.eye(self.n) * self.lam * self.s**2
+ )
+ constraints = [cp.sum(alpha) == 1, alpha <= 1]
+ objective = cp.matrix_frac(self.y.numpy(), A)
+ if self.regularizer is not None and self.regularizer.is_convex():
+ objective = objective + self.regularizer.get_regularizer_cvxpy()(alpha)
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+ prob.solve(solver=cp.MOSEK, verbose=False)
+
+ elif self.regularizer is not None and not self.regularizer.is_convex():
+ obj, con, vars = (
+ self.regularizer.get_cvxpy_objectives_constraints_variables(
+ self.no_models
+ )
+ )
+ no_problems = len(con)
+ vals = []
+ args = []
+ for i in range(no_problems):
+ prob = cp.Problem(
+ cp.Minimize(objective + obj[i](alpha, *vars)),
+ constraints + con[i](alpha, *vars),
+ )
+ prob.solve(solver=cp.MOSEK, verbose=False)
+ vals.append(prob.value)
+ args.append(alpha.value)
+ alpha.value = args[np.argmin(vals)]
+ else:
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+ prob.solve(solver=cp.MOSEK, verbose=False)
+
+ self.alphas = torch.from_numpy(alpha.value)
+ if self.regularizer is not None:
+ print(self.regularizer.name, self.alphas)
+ else:
+ print("No", self.alphas)
+ self.K = (
+ torch.sum(
+ torch.stack([alpha * K for alpha, K in zip(self.alphas, self.Ks)]),
+ dim=0,
+ )
+ + np.eye(self.n) * self.lam * self.s**2
+ )
+ self.fitted = True
+
+ def execute(self, xtest):
+ if self.fitted == True:
+ Ks = [
+ self.kernel_objects[i].kernel(self.x, xtest)
+ for i in range(self.no_models)
+ ]
+ K_star = torch.sum(
+ torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0
+ )
+ else:
+ K_star = None
+ Ks = [
+ self.kernel_objects[i].kernel(xtest, xtest) for i in range(self.no_models)
+ ]
+ K_star_star = torch.sum(
+ torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0
+ )
+ return (K_star, K_star_star)
+
+ # def log_marginal(self, kernel, X, weight):
+ # pass
+
+ def mean(self, xtest):
+ K_star, K_star_star = self.execute(xtest)
+ self.A = torch.linalg.lstsq(self.K, self.y)[0]
+ ymean = torch.mm(K_star, self.A)
+ return ymean
+
+ def mean_std(self, xtest, full=False, reuse=False):
+ K_star, K_star_star = self.execute(xtest)
+ self.A = torch.linalg.lstsq(self.K, self.y)[0]
+ ymean = torch.mm(K_star, self.A)
+
+ if self.var == "fixed":
+ ystd = self.std_fixed(xtest)
+ elif self.var == "true":
+ ystd = self.std_opt(xtest)
+ return (ymean, ystd)
+
+ def lcb(self, xtest: torch.Tensor, type=None, arg=False, sign=1.0):
+ theta = cp.Variable((self.alpha, 1))
+ args = []
+ n = xtest.size()[0]
+ values = torch.zeros(size=(n, 1)).double()
+ Phi = self.embed(xtest)
+
+ for j in range(n):
+ objective = sign * Phi[j, :] @ theta
+ if self.constraints is not None and not self.constraints.is_convex():
+ value, theta_lcb = self.objective_on_non_convex_confidence_set(
+ theta, objective, type=type
+ )
+ elif not self.regularizer.is_convex():
+ value, theta_lcb = (
+ self.objective_on_non_convex_confidence_set_bisection(
+ theta, objective, type=type
+ )
+ )
+ else:
+ value, theta_lcb = self.objective_on_confidence_set(
+ theta, objective, type=type
+ )
+
+ values[j] = sign * value
+ if arg:
+ args.append(theta_lcb)
+
+ if args:
+ return values, args
+ else:
+ return values
+
+ def ucb(self, xtest):
+ pass
+
+ def std_opt(self, xtest):
+ N = xtest.size()[0]
+ for i in range(N):
+ x = xtest[i, :]
+ theta = cp.Variable(self.n * self.no_models)
+ M = torch.block_diag(self.Ks)
+ cp.norm(theta, p=2) * theta[i]
+
+ def std_fixed(self, xtest):
+ K_star, K_star_star = self.execute(xtest)
+ self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star)))
+ first = torch.diag(K_star_star).view(-1, 1)
+ second = torch.einsum("ij,ji->i", (self.B, torch.t(K_star))).view(-1, 1)
+ variance = first - second
+ ystd = torch.sqrt(variance)
+ return ystd
+
+ def sample(self, xtest, size=1):
+ pass
+
if __name__ == "__main__":
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- from stpy.helpers.helper import interval_torch
- import matplotlib.pyplot as plt
- n = 512
- N = 5
- s = 0.1
- d = 1
-
- xtest = interval_torch(n,d)
- x = interval_torch(N,d)
-
- kernel1 = KernelFunction(gamma = 0.05)
- kernel2 = KernelFunction(kernel_name="polynomial", power = 5)
- kernel3 = KernelFunction(kernel_name="polynomial", power=3)
- kernel4 = KernelFunction(kernel_name="polynomial", power=2)
- kernel5 = KernelFunction(kernel_name="polynomial", power=1)
- kernel6 = KernelFunction(kernel_name="polynomial", power=1)
-
- kernels = [kernel1, kernel2,kernel3, kernel4, kernel5, kernel6]
-
- GP = GaussianProcess(kernel=kernel1)
- torch.manual_seed(2)
- y = GP.sample(x)
-
- # sup inverse barrier
- for lam in [0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99,0.9999]:
- regularizer = SupRegularizer(d = len(kernels), lam = lam, constrained=True, version='1')
- mkl = MultipleKernelLearner(kernels, regularizer= regularizer)
- mkl.fit_gp(x,y)
- mkl.visualize(xtest, size = 0, show = False, fig = False, color = 'tab:blue', label = " sup:"+str(lam))
- regularizer = SupRegularizer(d=len(kernels), lam=lam, constrained=True, version='2')
- mkl = MultipleKernelLearner(kernels, regularizer=regularizer)
- mkl.fit_gp(x, y)
- mkl.visualize(xtest, size=0, show=False, fig=False, color='tab:green', label=" sup:" + str(lam))
-
- # dirichlet mixture
- regularizer = DirichletRegularizer(d=len(kernels), lam=lam, constrained=True)
- mkl = MultipleKernelLearner(kernels, regularizer=regularizer)
- mkl.fit_gp(x, y)
- mkl.visualize(xtest, size=0, show=False, fig=False, color='tab:red', label = " dirichlet")
-
- # no regularizer
- mkl = MultipleKernelLearner(kernels, regularizer=None)
- mkl.fit_gp(x, y)
- mkl.visualize(xtest, size=0, show=False, fig=False, color='tab:orange', label = " no")
-
- plt.show()
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+ from stpy.helpers.helper import interval_torch
+ import matplotlib.pyplot as plt
+
+ n = 512
+ N = 5
+ s = 0.1
+ d = 1
+
+ xtest = interval_torch(n, d)
+ x = interval_torch(N, d)
+
+ kernel1 = KernelFunction(gamma=0.05)
+ kernel2 = KernelFunction(kernel_name="polynomial", power=5)
+ kernel3 = KernelFunction(kernel_name="polynomial", power=3)
+ kernel4 = KernelFunction(kernel_name="polynomial", power=2)
+ kernel5 = KernelFunction(kernel_name="polynomial", power=1)
+ kernel6 = KernelFunction(kernel_name="polynomial", power=1)
+
+ kernels = [kernel1, kernel2, kernel3, kernel4, kernel5, kernel6]
+
+ GP = GaussianProcess(kernel=kernel1)
+ torch.manual_seed(2)
+ y = GP.sample(x)
+
+ # sup inverse barrier
+ for lam in [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 0.9999]:
+ regularizer = SupRegularizer(
+ d=len(kernels), lam=lam, constrained=True, version="1"
+ )
+ mkl = MultipleKernelLearner(kernels, regularizer=regularizer)
+ mkl.fit_gp(x, y)
+ mkl.visualize(
+ xtest,
+ size=0,
+ show=False,
+ fig=False,
+ color="tab:blue",
+ label=" sup:" + str(lam),
+ )
+ regularizer = SupRegularizer(
+ d=len(kernels), lam=lam, constrained=True, version="2"
+ )
+ mkl = MultipleKernelLearner(kernels, regularizer=regularizer)
+ mkl.fit_gp(x, y)
+ mkl.visualize(
+ xtest,
+ size=0,
+ show=False,
+ fig=False,
+ color="tab:green",
+ label=" sup:" + str(lam),
+ )
+
+ # dirichlet mixture
+ regularizer = DirichletRegularizer(d=len(kernels), lam=lam, constrained=True)
+ mkl = MultipleKernelLearner(kernels, regularizer=regularizer)
+ mkl.fit_gp(x, y)
+ mkl.visualize(
+ xtest, size=0, show=False, fig=False, color="tab:red", label=" dirichlet"
+ )
+
+ # no regularizer
+ mkl = MultipleKernelLearner(kernels, regularizer=None)
+ mkl.fit_gp(x, y)
+ mkl.visualize(xtest, size=0, show=False, fig=False, color="tab:orange", label=" no")
+
+ plt.show()
diff --git a/stpy/continuous_processes/mkl_features.py b/stpy/continuous_processes/mkl_features.py
index 8b690c0..42b6868 100755
--- a/stpy/continuous_processes/mkl_features.py
+++ b/stpy/continuous_processes/mkl_features.py
@@ -11,188 +11,196 @@
class MKL(Estimator):
- def __init__(self, embeddings, init_weights=None, lam=0.0, s=0.1):
- self.embeddings = embeddings
- self.init_weights = init_weights
- self.no_models = len(embeddings)
- self.s = s
- self.lam = lam
- if self.init_weights is None:
- self.init_weights = torch.ones(self.no_models)
- self.weights = self.init_weights
- if not isinstance(self.lam, list):
- self.lam = [lam for i in range(self.no_models)]
-
- def get_emebed_dims(self):
- self.dims = []
- for embedding in self.embeddings:
- self.dims.append(embedding.get_basis_size())
- return self.dims
-
- def total_embed_dim(self):
- sum = np.sum(self.get_emebed_dims())
- return sum
-
- def fit_gp(self, x, y):
-
- self.x = x
- self.y = y
- (self.n, self.d) = self.x.size()
- self.total_m = self.total_embed_dim()
-
- self.Reggr = KernelizedFeatures(embeding=self, m=self.total_m, d=d, s=self.s)
- self.Reggr.fit_gp(x, y)
-
- # def mean_vector(self):
- # theta = torch.zeros(size = (self.total_embed_dim()))
- # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int()
- # for index, emb in enumerate(self.embeddings):
- # theta_small = emb.sample_theta()
- # theta[dims_index[index]:dims_index[index + 1]] = theta_small.view(-1)
- # return theta
-
- def mean_vector(self):
- return self.Reggr.theta_mean()
-
- def mean_var(self, xtest):
- # mu_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64)
- # var_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64)
- #
- # for index, emb in enumerate(self.embeddings):
- # mu,var = emb.mean_var(xtest)
- # mu_avg = mu_avg + self.weights[index]*mu
- # var_avg = var_avg + self.weights[index]*var
- # return [mu_avg,var_avg]
-
- return self.Reggr.mean_std(xtest)
-
- def sample(self, xtest, size=1):
- # sample_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64)
- #
- # for index, emb in enumerate(self.embeddings):
- # sample = emb.sample(xtest, size = size)
- # sample_avg = sample_avg + self.weights[index]*sample
- return self.Reggr.sample(xtest, size=size)
-
- def embed(self, xtest):
- n = xtest.size()[0]
- Phi = torch.zeros(size=(n, int(self.total_embed_dim())), dtype=torch.float64)
- dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()), dim=0).int()
-
- for index, embedding in enumerate(self.embeddings):
- Phi[:, dims_index[index]:dims_index[index + 1]] = embedding.embed_internal(xtest)
-
- return Phi
-
- def selector_matrix(self):
- dims = []
- for embedding in self.embeddings:
- dims.append(embedding.get_basis_size())
- total_dim = self.total_embed_dim()
- selector = torch.zeros(size=(int(total_dim), self.no_models), dtype=torch.float64)
- z = 0
- for i in range(len(self.embeddings)):
- selector[z:z + dims[i], i] = 1.0
- z = z + dims[i]
- return torch.t(selector)
-
- ###
- def evaluate_design(self, C, Phi):
- n = Phi.size()[0]
-
- A = torch.lstsq(torch.t(C), torch.t(Phi))[0]
- B = torch.t(A[0:n, :])
-
- delta = torch.norm(B @ Phi - C, p=2) # /torch.norm(B, p = 2) #relative error
-
- pinv = torch.pinverse(torch.t(Phi) @ Phi)
- W = C @ pinv @ torch.t(C)
-
- rank = torch.matrix_rank(B)
- lambda_max = torch.symeig(W)[0][-1] # largest eigenvalue
-
- upper_bound = lambda_max * (self.s * self.s * 2 + delta)
-
- return [upper_bound.detach(), rank]
-
- def acquisiton_function(self, C, Phi, candidates):
- values = []
- ranks = []
- for candidate_point in candidates:
- newPhi = torch.cat((Phi, candidate_point.view(1, -1)))
- values.append(self.evaluate_design(C, newPhi)[0])
- ranks.append(self.evaluate_design(C, newPhi)[1])
-
- return [torch.Tensor(values), torch.Tensor(ranks)]
+ def __init__(self, embeddings, init_weights=None, lam=0.0, s=0.1):
+ self.embeddings = embeddings
+ self.init_weights = init_weights
+ self.no_models = len(embeddings)
+ self.s = s
+ self.lam = lam
+ if self.init_weights is None:
+ self.init_weights = torch.ones(self.no_models)
+ self.weights = self.init_weights
+ if not isinstance(self.lam, list):
+ self.lam = [lam for i in range(self.no_models)]
+
+ def get_emebed_dims(self):
+ self.dims = []
+ for embedding in self.embeddings:
+ self.dims.append(embedding.get_basis_size())
+ return self.dims
+
+ def total_embed_dim(self):
+ sum = np.sum(self.get_emebed_dims())
+ return sum
+
+ def fit_gp(self, x, y):
+
+ self.x = x
+ self.y = y
+ (self.n, self.d) = self.x.size()
+ self.total_m = self.total_embed_dim()
+
+ self.Reggr = KernelizedFeatures(embeding=self, m=self.total_m, d=d, s=self.s)
+ self.Reggr.fit_gp(x, y)
+
+ # def mean_vector(self):
+ # theta = torch.zeros(size = (self.total_embed_dim()))
+ # dims_index = torch.cumsum(torch.tensor([0] + self.get_emebed_dims()),dim = 0).int()
+ # for index, emb in enumerate(self.embeddings):
+ # theta_small = emb.sample_theta()
+ # theta[dims_index[index]:dims_index[index + 1]] = theta_small.view(-1)
+ # return theta
+
+ def mean_vector(self):
+ return self.Reggr.theta_mean()
+
+ def mean_var(self, xtest):
+ # mu_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64)
+ # var_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64)
+ #
+ # for index, emb in enumerate(self.embeddings):
+ # mu,var = emb.mean_var(xtest)
+ # mu_avg = mu_avg + self.weights[index]*mu
+ # var_avg = var_avg + self.weights[index]*var
+ # return [mu_avg,var_avg]
+
+ return self.Reggr.mean_std(xtest)
+
+ def sample(self, xtest, size=1):
+ # sample_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64)
+ #
+ # for index, emb in enumerate(self.embeddings):
+ # sample = emb.sample(xtest, size = size)
+ # sample_avg = sample_avg + self.weights[index]*sample
+ return self.Reggr.sample(xtest, size=size)
+
+ def embed(self, xtest):
+ n = xtest.size()[0]
+ Phi = torch.zeros(size=(n, int(self.total_embed_dim())), dtype=torch.float64)
+ dims_index = torch.cumsum(
+ torch.tensor([0] + self.get_emebed_dims()), dim=0
+ ).int()
+
+ for index, embedding in enumerate(self.embeddings):
+ Phi[:, dims_index[index] : dims_index[index + 1]] = (
+ embedding.embed_internal(xtest)
+ )
+
+ return Phi
+
+ def selector_matrix(self):
+ dims = []
+ for embedding in self.embeddings:
+ dims.append(embedding.get_basis_size())
+ total_dim = self.total_embed_dim()
+ selector = torch.zeros(
+ size=(int(total_dim), self.no_models), dtype=torch.float64
+ )
+ z = 0
+ for i in range(len(self.embeddings)):
+ selector[z : z + dims[i], i] = 1.0
+ z = z + dims[i]
+ return torch.t(selector)
+
+ ###
+ def evaluate_design(self, C, Phi):
+ n = Phi.size()[0]
+
+ A = torch.lstsq(torch.t(C), torch.t(Phi))[0]
+ B = torch.t(A[0:n, :])
+
+ delta = torch.norm(B @ Phi - C, p=2) # /torch.norm(B, p = 2) #relative error
+
+ pinv = torch.pinverse(torch.t(Phi) @ Phi)
+ W = C @ pinv @ torch.t(C)
+
+ rank = torch.matrix_rank(B)
+ lambda_max = torch.symeig(W)[0][-1] # largest eigenvalue
+
+ upper_bound = lambda_max * (self.s * self.s * 2 + delta)
+
+ return [upper_bound.detach(), rank]
+
+ def acquisiton_function(self, C, Phi, candidates):
+ values = []
+ ranks = []
+ for candidate_point in candidates:
+ newPhi = torch.cat((Phi, candidate_point.view(1, -1)))
+ values.append(self.evaluate_design(C, newPhi)[0])
+ ranks.append(self.evaluate_design(C, newPhi)[1])
+
+ return [torch.tensor(values), torch.tensor(ranks)]
if __name__ == "__main__":
- n = 16
- N = 4
- s = 0.00000001
- d = 1
- TestFunction = MultiRKHS()
- xtest = TestFunction.interval(n)
- x = TestFunction.initial_guess(N)
- y = TestFunction.eval(x, sigma=s)
- bounds = TestFunction.bounds()
-
- p = 2
- embedding2 = PolynomialEmbedding(d, p, groups=None)
- GP1 = KernelizedFeatures(embeding=embedding2, m=embedding2.size, d=d, s=s,
- groups=None, bounds=bounds)
-
- map = lambda x: torch.abs(x)
- embedding3 = CustomEmbedding(d, map, 1, groups=None)
-
- GP2 = KernelizedFeatures(embeding=embedding3, m=embedding3.size, d=d, s=s,
- groups=None, bounds=bounds)
-
- m = 2
- gamma = 0.2
- GP3 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None)
- GP4 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None)
-
- MKL = MKL([GP1, GP2], s=s)
-
- C = MKL.selector_matrix()
- Candidates = MKL.embed(xtest)
- eps = 1
- N = 1
- x = TestFunction.initial_guess(N)
-
- plt.close('all')
-
- while eps > 10e-3:
- # print (x,eps)
- Phi = MKL.embed(x)
- # print (C.size(), Phi.size())
- print(N, MKL.evaluate_design(C, Phi))
- eps = MKL.evaluate_design(C, Phi)[0]
- # N = N + 1
- score, rank = MKL.acquisiton_function(C, Phi, Candidates)
- score = score + 1. / (rank - 1)
- index_min = torch.argmin(score)
- x_min = xtest[index_min]
-
- plt.plot(xtest.numpy(), torch.log(score).numpy(), 'g')
- plt.plot(xtest.numpy(), rank.numpy(), 'r--')
- plt.plot(x, x * 0, 'ro')
- plt.plot(xtest[index_min].numpy(), torch.log(score[index_min]).numpy(), 'go')
- plt.show()
-
- x = torch.cat((x, x_min.view(1, -1)))
-
- y = TestFunction.eval(x, sigma=s)
- print(x)
- print(y)
-
- MKL.fit_gp(x, y)
- print("Projection:")
- print("--------------")
- print(C @ MKL.mean_vector())
- print("--------------")
-
- MKL.visualize(xtest, f_true=TestFunction.eval_noiseless)
- plt.show()
+ n = 16
+ N = 4
+ s = 0.00000001
+ d = 1
+ TestFunction = MultiRKHS()
+ xtest = TestFunction.interval(n)
+ x = TestFunction.initial_guess(N)
+ y = TestFunction.eval(x, sigma=s)
+ bounds = TestFunction.bounds()
+
+ p = 2
+ embedding2 = PolynomialEmbedding(d, p, groups=None)
+ GP1 = KernelizedFeatures(
+ embeding=embedding2, m=embedding2.size, d=d, s=s, groups=None, bounds=bounds
+ )
+
+ map = lambda x: torch.abs(x)
+ embedding3 = CustomEmbedding(d, map, 1, groups=None)
+
+ GP2 = KernelizedFeatures(
+ embeding=embedding3, m=embedding3.size, d=d, s=s, groups=None, bounds=bounds
+ )
+
+ m = 2
+ gamma = 0.2
+ GP3 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None)
+ GP4 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None)
+
+ MKL = MKL([GP1, GP2], s=s)
+
+ C = MKL.selector_matrix()
+ Candidates = MKL.embed(xtest)
+ eps = 1
+ N = 1
+ x = TestFunction.initial_guess(N)
+
+ plt.close("all")
+
+ while eps > 10e-3:
+ # print (x,eps)
+ Phi = MKL.embed(x)
+ # print (C.size(), Phi.size())
+ print(N, MKL.evaluate_design(C, Phi))
+ eps = MKL.evaluate_design(C, Phi)[0]
+ # N = N + 1
+ score, rank = MKL.acquisiton_function(C, Phi, Candidates)
+ score = score + 1.0 / (rank - 1)
+ index_min = torch.argmin(score)
+ x_min = xtest[index_min]
+
+ plt.plot(xtest.numpy(), torch.log(score).numpy(), "g")
+ plt.plot(xtest.numpy(), rank.numpy(), "r--")
+ plt.plot(x, x * 0, "ro")
+ plt.plot(xtest[index_min].numpy(), torch.log(score[index_min]).numpy(), "go")
+ plt.show()
+
+ x = torch.cat((x, x_min.view(1, -1)))
+
+ y = TestFunction.eval(x, sigma=s)
+ print(x)
+ print(y)
+
+ MKL.fit_gp(x, y)
+ print("Projection:")
+ print("--------------")
+ print(C @ MKL.mean_vector())
+ print("--------------")
+
+ MKL.visualize(xtest, f_true=TestFunction.eval_noiseless)
+ plt.show()
diff --git a/stpy/continuous_processes/nystrom_fea.py b/stpy/continuous_processes/nystrom_fea.py
index a209c47..fa30fff 100755
--- a/stpy/continuous_processes/nystrom_fea.py
+++ b/stpy/continuous_processes/nystrom_fea.py
@@ -1,347 +1,460 @@
import matplotlib.pyplot as plt
-from scipy.interpolate import LinearNDInterpolator
+from scipy.interpolate import LinearNDInterpolator, NearestNDInterpolator
from scipy.interpolate import interp1d
from stpy.continuous_processes.gauss_procc import GaussianProcess
from stpy.embeddings.embedding import *
from stpy.helpers.helper import *
+from stpy.helpers.posterior_sampling import tmg
from stpy.kernels import KernelFunction
class NystromFeatures(Embedding):
- '''
- Nystrom Features for Gaussian Kernel
- '''
-
- def __init__(self, kernel_object, m=100, approx="uniform", s=1., samples=100):
-
- self.fit = False
- self.m = m
- try:
- self.ms = int(torch.sum(m))
- except:
- self.ms = m
- self.samples = samples
- self.kernel_object = kernel_object
- self.kernel = kernel_object.kernel
- self.approx = approx
- self.s = s
-
- def description(self):
- """
- Description of GP in text
- :return: string with description
- """
- return "Nystrom\n" + "Appprox: " + self.approx
-
- def subsample(self, x, y):
- if self.approx == "uniform":
- C, w = self.uniform_subsampling(x, y)
- elif self.approx == "leverage":
- C, w = self.leverage_score_subsampling(x, y)
- elif self.approx == "online_leverage":
- C, w = self.sequential_leverage_score_subsampling(x, y)
- return (C, w)
-
- def uniform_subsampling(self, x, y):
- N = x.size()[0]
- C = np.random.choice(N, int(self.ms))
- weights = torch.ones(self.ms)
- return (C, weights)
-
- def leverage_score_subsampling(self, x, y):
- N = x.size()[0]
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s)
- GP.fit_gp(x, y)
- mean, leverage_scores = GP.mean_std(x)
- weights = torch.ones(self.ms)
-
- args = [0]
- size = 1
-
- for j in range(N):
- point = x[j, :]
- if size < self.ms:
- leverage_score = float(leverage_scores[j, :])
- q = np.random.binomial(self.ms, leverage_score)
- if q > 0:
- args.append(j)
- weights[size] = (q / float(self.ms)) / leverage_score
- size = size + 1
- else:
- pass
-
- print(args, weights)
- return (args, weights)
-
- def sequential_leverage_score_subsampling(self, x, y):
- N = x.size()[0]
- d = x.size()[1]
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s)
-
- dts = torch.zeros(self.ms, d, dtype=torch.float64)
- dts[0, :] = x[0, :]
- args = [0]
- size = 1
- weights = torch.ones(self.ms)
-
- for j in range(N):
- point = x[j, :]
- # print (size,x.size())
- if size < self.ms:
- GP.fit_gp(dts[0:size, :], y[0:size, :])
- mean, leverage_score = GP.mean_std(point.view(1, d))
- q = np.random.binomial(self.ms, float(leverage_score))
- if q > 0:
- args.append(j)
- dts[size, :] = point
- weights[size] = (q / float(self.ms)) / leverage_score
- size = size + 1
- else:
- pass
- return (args, weights)
-
- def fit_gp(self, x, y, eps=1e-14):
- '''
- Function to Fit GP
- '''
- self.x = x
- self.y = y
- self.d = x.size()[1]
- self.N = x.size()[0]
- assert (self.ms <= self.N)
- self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel
- if self.approx == "svd":
- self.xs = x
- K = self.kernel(x, x)
- if 3 * self.ms > self.N:
- (D, V) = torch.linalg.eigh(K, UPLO='U')
- V = torch.t(V)[self.N - self.ms:self.N, :].T
- D = D[self.N - self.ms:self.N]
- D[D <= eps] = 0
-
- else:
- (D, V) = torch.lobpcg(K, k=self.ms, niter=-1)
-
- # Dinv = torch.diag(1./D[self.N-self.ms:self.N])
- # Dinv[Dinv <=0 ] = 0
- # Dinv = torch.sqrt(Dinv)
- self.eigs = D
- Dinv = torch.diag(torch.sqrt(1. / D))
- # self.M = (torch.t(V)[self.N-self.ms:self.N,:]).T @ Dinv.T
- self.M = V @ Dinv
- # self.embed = lambda q: torch.t(torch.mm(Dinv, torch.mm(torch.t(V)[self.N-self.ms:self.N,:], self.kernel(q, self.x) )))
- self.embed = lambda q: self.kernel(q, self.xs).T @ self.M
- self.C = []
- elif self.approx == 'nothing':
- self.xs = self.x[0:self.ms, :]
- self.M = torch.eye(self.ms).double()
- self.embed = lambda q: self.kernel(q, self.xs).T @ self.M
-
- elif self.approx == 'positive_svd':
- from sklearn.decomposition import NMF
- GP = GaussianProcess(kernel=self.kernel_object)
- ysample = GP.sample(x, size=self.samples) ** 2
- X = ysample
- model = NMF(n_components=self.ms, max_iter=8000, tol=1e-12)
- W = torch.from_numpy(model.fit_transform(X))
- H = torch.from_numpy(model.components_)
- l = torch.norm(W, dim=1)
- l = 1. / l
-
- if x.size()[1] == 1:
- fs = []
- for j in range(self.ms):
- fs.append(interp1d(x.view(-1).numpy(), (W.T @ torch.diag(l))[j, :].numpy()))
- self.embed = lambda q: torch.cat([torch.from_numpy(fs[j](q)).view(-1, 1) for j in range(self.ms)],
- dim=1)
-
- elif x.size()[1] == 2:
- fs = []
- for j in range(self.ms):
- W_j = (W.T @ torch.diag(l))[j, :].numpy()
- fs.append(LinearNDInterpolator(x, W_j))
- self.embed = lambda q: torch.cat(
- [torch.from_numpy(fs[j](q[:, 0], q[:, 1])).view(-1, 1) for j in range(self.ms)], dim=1)
- # elif x.size()[1] == 2:
- # fs = []
- # for j in range(self.ms):
- # W_j = (W.T @ torch.diag(l))[j, :].numpy()
- # fs.append(Rbf(x[:,0],x[:,1], W_j))
- # self.embed = lambda q: torch.cat([torch.from_numpy(fs[j](q[:,0],q[:,1])).view(-1, 1) for j in range(self.ms)],
- # dim=1)
-
- self.C = []
-
- elif self.approx == "cover":
- K = self.kernel(x, x) # + self.s * self.s * torch.eye(self.N, dtype=torch.float64)
- Khalf = torch.from_numpy(np.real(scipy.linalg.sqrtm(K.numpy())))
- Khalfinv = torch.pinverse(Khalf)
- self.embed = lambda q: torch.t(
- torch.mm(Khalfinv, self.kernel(q, self.x)))
- else:
- self.C, self.weights = self.subsample(x, y)
- xs = x[self.C, :]
- self.Dweights = torch.diag(self.weights).double()
- K = torch.mm(torch.mm(self.Dweights, self.kernel(xs, xs)),
- self.Dweights) # + self.s*self.s * torch.eye(self.ms, dtype=torch.float64)
- #(D, V) = torch.symeig(K, eigenvectors=True)
- (D, V) = torch.linalg.eigh(K)
- Dinv = torch.diag(1. / D)
- Dinv[Dinv <= 0] = 0
- Dinv = torch.sqrt(Dinv)
- # Dinv = torch.diag(torch.pow(D[:],-0.5))
- self.embed = lambda q: torch.t(
- torch.mm(Dinv, torch.mm(torch.t(V), torch.mm(self.Dweights, self.kernel(q, xs)))))
- # self.embed = lambda x: torch.t(torch.mm(torch.sqrt(Dinv),torch.mm(V, self.kernel(x, xs))))
- embeding = self.embed(x)
- self.Z_ = embeding.T @ embeding + self.s * self.s * torch.eye(self.ms).double()
-
- # self.K = (self.Z_ + self.s * self.s * torch.eye(self.ms, dtype=torch.float64))
- self.K = self.Z_
- self.Q = torch.t(embeding)
-
- self.fit = True
- return None
-
- def mean_std(self, xtest):
- if self.fit == False:
- raise AssertionError("First fit")
- else:
- embeding = self.embed(xtest)
- Q = self.embed(self.x)
- theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K)
- ymean = torch.mm(embeding, theta_mean)
- temp = torch.t(torch.solve(torch.t(embeding), self.K)[0])
- diagonal = self.s * self.s * torch.einsum('ij,ji->i', (temp, torch.t(embeding))).view(-1, 1)
- yvar = torch.sqrt(diagonal)
-
- return (ymean, yvar)
-
- def outer_kernel(self):
- embeding = self.embed(self.x)
- # print (embeding.size())
- K = torch.mm(embeding, torch.t(embeding))
- # Z = self.linear_kernel(embeding, (embeding))
- K = (K + self.s * self.s * torch.eye(self.N, dtype=torch.float64))
- # K = self.kernel(self.x,self.x) + self.s*self.s*torch.eye(self.N, dtype=torch.float64)
- # print ("kernel:",K)
- # print ("approximate:",Z)
- return K
-
- def sample_theta(self, size=1):
- basis = int(int(torch.sum(self.m)))
- zeros = torch.zeros(basis, size, dtype=torch.float64)
- random_vector = torch.normal(mean=zeros, std=1.)
-
- if self.fit == True:
- # random vector
- Z = torch.pinverse(self.K, rcond=10e-6)
- self.L = torch.cholesky(Z, upper=False)
- theta_mean = torch.mm(Z, torch.mm(self.Q, self.y))
- theta = torch.mm(self.s * self.L, random_vector)
- theta = theta + theta_mean
- else:
- theta_mean = 0
- Z = (1. + self.s * self.s) * torch.eye(basis, dtype=torch.float64)
- L = torch.cholesky(Z, upper=False)
- theta = torch.mm(L, random_vector) + theta_mean
- return theta
-
- def sample(self, xtest, size=1):
- '''
- Sample functions from Gaussian Process
- '''
- theta = self.sample_theta(size=size)
- f = torch.mm(self.embed(xtest), theta)
- return f
-
- def visualize(self, xtest, f_true=None, points=True, show=True):
- [mu, std] = self.mean_std(xtest)
- if self.d == 1:
-
- plt.figure(figsize=(15, 7))
- plt.clf()
- plt.plot(self.x.numpy(), self.y.numpy(), 'r+', ms=10, marker="o")
- plt.plot(self.x[self.C, :].numpy(), self.y[self.C, :].numpy(), 'g+', ms=10, marker="o")
- # plt.plot(xtest.numpy(), self.sample(xtest, size=2).numpy(), 'k--', lw=2, label="sample")
- plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat,
- color="#dddddd")
- if f_true is not None:
- plt.plot(xtest.numpy(), f_true(xtest).numpy(), 'b-', lw=2)
- plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean")
- plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.legend()
- if show == True:
- plt.show()
-
- elif self.d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=(15, 7))
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].numpy(), (grid_x, grid_y), method='linear')
- if f_true is not None:
- grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4)
- if points == True:
- ax.scatter(self.x[:, 0].numpy(), self.x[:, 1].numpy(), self.y[:, 0].numpy(), c='r', s=100, marker="o",
- depthshade=False)
- ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4)
- plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.show()
-
- else:
- print("Visualization not implemented")
+ """
+ Nystrom Features for Gaussian Kernel
+ """
+
+ def __init__(
+ self, kernel_object, m=100, approx="uniform", s=1.0, samples=100, fast=False
+ ):
+ """
+ fast, optional
+ If it is true, the samples from the truncated Gaussian are approximated by squared samples of a Gaussian, by default True
+ """
+
+ self.fit = False
+ self.m = m
+ try:
+ self.ms = int(torch.sum(m))
+ except:
+ self.ms = m
+ self.samples = samples
+ self.kernel_object = kernel_object
+ self.kernel = kernel_object.kernel
+ self.approx = approx
+ self.s = s
+ self.fast = fast
+
+ def description(self):
+ """
+ Description of GP in text
+ :return: string with description
+ """
+ return "Nystrom\n" + "Appprox: " + self.approx
+
+ def subsample(self, x, y):
+ if self.approx == "uniform":
+ C, w = self.uniform_subsampling(x, y)
+ elif self.approx == "leverage":
+ C, w = self.leverage_score_subsampling(x, y)
+ elif self.approx == "online_leverage":
+ C, w = self.sequential_leverage_score_subsampling(x, y)
+ return (C, w)
+
+ def uniform_subsampling(self, x, y):
+ N = x.size()[0]
+ C = np.random.choice(N, int(self.ms))
+ weights = torch.ones(self.ms)
+ return (C, weights)
+
+ def leverage_score_subsampling(self, x, y):
+ N = x.size()[0]
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+
+ GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s)
+ GP.fit_gp(x, y)
+ mean, leverage_scores = GP.mean_std(x)
+ weights = torch.ones(self.ms)
+
+ args = [0]
+ size = 1
+
+ for j in range(N):
+ point = x[j, :]
+ if size < self.ms:
+ leverage_score = float(leverage_scores[j, :])
+ q = np.random.binomial(self.ms, leverage_score)
+ if q > 0:
+ args.append(j)
+ weights[size] = (q / float(self.ms)) / leverage_score
+ size = size + 1
+ else:
+ pass
+
+ print(args, weights)
+ return (args, weights)
+
+ def sequential_leverage_score_subsampling(self, x, y):
+ N = x.size()[0]
+ d = x.size()[1]
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+
+ GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s)
+
+ dts = torch.zeros(self.ms, d, dtype=torch.float64)
+ dts[0, :] = x[0, :]
+ args = [0]
+ size = 1
+ weights = torch.ones(self.ms)
+
+ for j in range(N):
+ point = x[j, :]
+ # print (size,x.size())
+ if size < self.ms:
+ GP.fit_gp(dts[0:size, :], y[0:size, :])
+ mean, leverage_score = GP.mean_std(point.view(1, d))
+ q = np.random.binomial(self.ms, float(leverage_score))
+ if q > 0:
+ args.append(j)
+ dts[size, :] = point
+ weights[size] = (q / float(self.ms)) / leverage_score
+ size = size + 1
+ else:
+ pass
+ return (args, weights)
+
+ def fit_gp(self, x, y, eps=1e-14):
+ """
+ Function to Fit GP
+ """
+ self.x = x
+ self.y = y
+ self.d = x.size()[1]
+ self.N = x.size()[0]
+ assert self.ms <= self.N
+ self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel
+ if self.approx == "svd":
+ self.xs = x
+ K = self.kernel(x, x)
+ if 3 * self.ms > self.N:
+ (D, V) = torch.linalg.eigh(K, UPLO="U")
+ V = torch.t(V)[self.N - self.ms : self.N, :].T
+ D = D[self.N - self.ms : self.N]
+ D[D <= eps] = 0
+
+ else:
+ (D, V) = torch.lobpcg(K, k=self.ms, niter=-1)
+
+ # Dinv = torch.diag(1./D[self.N-self.ms:self.N])
+ # Dinv[Dinv <=0 ] = 0
+ # Dinv = torch.sqrt(Dinv)
+ self.eigs = D
+ Dinv = torch.diag(torch.sqrt(1.0 / D))
+ # self.M = (torch.t(V)[self.N-self.ms:self.N,:]).T @ Dinv.T
+ self.M = V @ Dinv
+ # self.embed = lambda q: torch.t(torch.mm(Dinv, torch.mm(torch.t(V)[self.N-self.ms:self.N,:], self.kernel(q, self.x) )))
+ self.embed = lambda q: self.kernel(q, self.xs).T @ self.M
+ self.C = []
+ elif self.approx == "nothing":
+ self.xs = self.x[0 : self.ms, :]
+ self.M = torch.eye(self.ms).double()
+ self.embed = lambda q: self.kernel(q, self.xs).T @ self.M
+
+ elif self.approx == "positive_svd":
+ from sklearn.decomposition import NMF
+
+ if self.fast:
+ GP = GaussianProcess(kernel=self.kernel_object)
+ ysample = GP.sample(x, size=self.samples) ** 2
+ X = ysample
+ else:
+ burn_in = 30
+ ysample = tmg(
+ self.samples,
+ np.zeros(len(x)),
+ self.kernel_object.kernel(x, x).cpu().numpy()
+ + 1e-7 * np.eye(len(x)),
+ torch.ones(len(x)).cpu().numpy(),
+ np.eye(len(x)),
+ np.zeros(len(x)),
+ burn_in,
+ True,
+ )
+ X = torch.tensor(ysample.T)
+
+ model = NMF(n_components=self.ms, max_iter=8000, tol=1e-12)
+ W = torch.tensor(model.fit_transform(X.cpu()))
+ H = torch.tensor(model.components_)
+ W_norm = W / torch.linalg.norm(W, dim=0)
+
+ if x.size()[1] == 1:
+ fs = []
+ for j in range(self.ms):
+ fs.append(
+ interp1d(
+ x.view(-1).cpu().numpy(),
+ W_norm[:, j].cpu().numpy(),
+ )
+ )
+ self.embed = lambda q: torch.cat(
+ [torch.tensor(fs[j](q)).view(-1, 1) for j in range(self.ms)],
+ dim=1,
+ )
+
+ elif x.size()[1] == 2:
+
+ fs = []
+ for j in range(self.ms):
+ # each column of W is one \phi_i that is normalized to \|phi_i\|_2=1
+ W_j = W_norm[:, j].cpu().numpy()
+ fs.append(
+ (
+ LinearNDInterpolator(x.cpu().numpy(), W_j),
+ NearestNDInterpolator(x.cpu().numpy(), W_j),
+ )
+ )
+
+ def embed(q):
+ out_list = []
+ # Interpolate for points inside convex set else Nearest Neighbor
+ for j in range(self.ms):
+ cur = fs[j][0](q[:, 0].cpu().numpy(), q[:, 1].cpu().numpy())
+ mask = np.isnan(cur)
+ cur[mask] = fs[j][1](
+ q[:, 0].cpu().numpy()[mask], q[:, 1].cpu().numpy()[mask]
+ )
+ out_list.append(torch.tensor(cur).view(-1, 1))
+ return torch.cat(out_list, dim=1)
+
+ self.embed = embed
+
+ # self.embed = lambda q: torch.cat(
+ # [
+ # torch.tensor(
+ # fs[j](q[:, 0].cpu().numpy(), q[:, 1].cpu().numpy())
+ # ).view(-1, 1)
+ # for j in range(self.ms)
+ # ],
+ # dim=1,
+ # )
+
+ # elif x.size()[1] == 2:
+ # fs = []
+ # for j in range(self.ms):
+ # W_j = (W.T @ torch.diag(l))[j, :].cpu().numpy()
+ # fs.append(Rbf(x[:,0],x[:,1], W_j))
+ # self.embed = lambda q: torch.cat([torch.tensor(fs[j](q[:,0],q[:,1])).view(-1, 1) for j in range(self.ms)],
+ # dim=1)
+
+ self.C = []
+
+ elif self.approx == "cover":
+ K = self.kernel(
+ x, x
+ ) # + self.s * self.s * torch.eye(self.N, dtype=torch.float64)
+ Khalf = torch.tensor(np.real(scipy.linalg.sqrtm(K.cpu().numpy())))
+ Khalfinv = torch.pinverse(Khalf)
+ self.embed = lambda q: torch.t(torch.mm(Khalfinv, self.kernel(q, self.x)))
+ else:
+ self.C, self.weights = self.subsample(x, y)
+ xs = x[self.C, :]
+ self.Dweights = torch.diag(self.weights).double()
+ K = torch.mm(
+ torch.mm(self.Dweights, self.kernel(xs, xs)), self.Dweights
+ ) # + self.s*self.s * torch.eye(self.ms, dtype=torch.float64)
+ # (D, V) = torch.symeig(K, eigenvectors=True)
+ (D, V) = torch.linalg.eigh(K)
+ Dinv = torch.diag(1.0 / D)
+ Dinv[Dinv <= 0] = 0
+ Dinv = torch.sqrt(Dinv)
+ # Dinv = torch.diag(torch.pow(D[:],-0.5))
+ self.embed = lambda q: torch.t(
+ torch.mm(
+ Dinv,
+ torch.mm(torch.t(V), torch.mm(self.Dweights, self.kernel(q, xs))),
+ )
+ )
+ # self.embed = lambda x: torch.t(torch.mm(torch.sqrt(Dinv),torch.mm(V, self.kernel(x, xs))))
+ embeding = self.embed(x)
+ self.Z_ = embeding.T @ embeding + self.s * self.s * torch.eye(self.ms).double()
+
+ # self.K = (self.Z_ + self.s * self.s * torch.eye(self.ms, dtype=torch.float64))
+ self.K = self.Z_
+ self.Q = torch.t(embeding)
+
+ self.fit = True
+ return None
+
+ def mean_std(self, xtest):
+ if self.fit == False:
+ raise AssertionError("First fit")
+ else:
+ embeding = self.embed(xtest)
+ Q = self.embed(self.x)
+ theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K)
+ ymean = torch.mm(embeding, theta_mean)
+ temp = torch.t(torch.solve(torch.t(embeding), self.K)[0])
+ diagonal = (
+ self.s
+ * self.s
+ * torch.einsum("ij,ji->i", (temp, torch.t(embeding))).view(-1, 1)
+ )
+ yvar = torch.sqrt(diagonal)
+
+ return (ymean, yvar)
+
+ def outer_kernel(self):
+ embeding = self.embed(self.x)
+ # print (embeding.size())
+ K = torch.mm(embeding, torch.t(embeding))
+ # Z = self.linear_kernel(embeding, (embeding))
+ K = K + self.s * self.s * torch.eye(self.N, dtype=torch.float64)
+ # K = self.kernel(self.x,self.x) + self.s*self.s*torch.eye(self.N, dtype=torch.float64)
+ # print ("kernel:",K)
+ # print ("approximate:",Z)
+ return K
+
+ def sample_theta(self, size=1):
+ basis = int(int(torch.sum(self.m)))
+ zeros = torch.zeros(basis, size, dtype=torch.float64)
+ random_vector = torch.normal(mean=zeros, std=1.0)
+
+ if self.fit == True:
+ # random vector
+ Z = torch.pinverse(self.K, rcond=10e-6)
+ self.L = torch.cholesky(Z, upper=False)
+ theta_mean = torch.mm(Z, torch.mm(self.Q, self.y))
+ theta = torch.mm(self.s * self.L, random_vector)
+ theta = theta + theta_mean
+ else:
+ theta_mean = 0
+ Z = (1.0 + self.s * self.s) * torch.eye(basis, dtype=torch.float64)
+ L = torch.cholesky(Z, upper=False)
+ theta = torch.mm(L, random_vector) + theta_mean
+ return theta
+
+ def sample(self, xtest, size=1):
+ """
+ Sample functions from Gaussian Process
+ """
+ theta = self.sample_theta(size=size)
+ f = torch.mm(self.embed(xtest), theta)
+ return f
+
+ def visualize(self, xtest, f_true=None, points=True, show=True):
+ [mu, std] = self.mean_std(xtest)
+ if self.d == 1:
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ plt.plot(
+ self.x.cpu().numpy(), self.y.cpu().numpy(), "r+", ms=10, marker="o"
+ )
+ plt.plot(
+ self.x[self.C, :].cpu().numpy(),
+ self.y[self.C, :].cpu().numpy(),
+ "g+",
+ ms=10,
+ marker="o",
+ )
+ # plt.plot(xtest.cpu().numpy(), self.sample(xtest, size=2).cpu().numpy(), 'k--', lw=2, label="sample")
+ plt.fill_between(
+ xtest.cpu().numpy().flat,
+ (mu - 2 * std).cpu().numpy().flat,
+ (mu + 2 * std).cpu().numpy().flat,
+ color="#dddddd",
+ )
+ if f_true is not None:
+ plt.plot(xtest.cpu().numpy(), f_true(xtest).cpu().numpy(), "b-", lw=2)
+ plt.plot(
+ xtest.cpu().numpy(),
+ mu.cpu().numpy(),
+ "r-",
+ lw=2,
+ label="posterior mean",
+ )
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+ plt.legend()
+ if show == True:
+ plt.show()
+
+ elif self.d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].cpu().numpy()
+ yy = xtest[:, 1].cpu().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].cpu().numpy(), (grid_x, grid_y), method="linear"
+ )
+ if f_true is not None:
+ grid_z = griddata(
+ (xx, yy),
+ f_true(xtest)[:, 0].cpu().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z, color="b", alpha=0.4)
+ if points == True:
+ ax.scatter(
+ self.x[:, 0].cpu().numpy(),
+ self.x[:, 1].cpu().numpy(),
+ self.y[:, 0].cpu().numpy(),
+ c="r",
+ s=100,
+ marker="o",
+ depthshade=False,
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4)
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+ plt.show()
+
+ else:
+ print("Visualization not implemented")
if __name__ == "__main__":
- # domain size
- L_infinity_ball = 1
- # dimension
- d = 1
- # error variance
- s = 0.1
- # grid density
- n = 1024
- # number of intial points
- N = 100
- # smoothness
- gamma = torch.from_numpy(np.array([0.4, 0.4]))
- # test problem
-
- xtest = torch.from_numpy(interval(n, d))
- x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)))
-
- f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
- # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1)
-
- f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.,
- out=None) * s
- # targets
- y = f(x)
-
- # GP model with squared exponential
-
- kernel = KernelFunction(gamma=0.05)
- GP0 = GaussianProcess(kernel_custom=kernel, s=s)
- GP0.fit_gp(x, y)
- GP0.visualize(xtest, f_true=f_no_noise)
-
- GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="uniform")
- GP.fit_gp(x, y)
- GP.visualize(xtest, f_true=f_no_noise)
-
- GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="online_leverage")
- GP.fit_gp(x, y)
- GP.visualize(xtest, f_true=f_no_noise)
-
- GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="svd")
- GP.fit_gp(x, y)
- print(GP0.K, GP.outer_kernel())
- GP.visualize(xtest, f_true=f_no_noise)
+ # domain size
+ L_infinity_ball = 1
+ # dimension
+ d = 1
+ # error variance
+ s = 0.1
+ # grid density
+ n = 1024
+ # number of intial points
+ N = 100
+ # smoothness
+ gamma = torch.tensor(np.array([0.4, 0.4]))
+ # test problem
+
+ xtest = torch.tensor(interval(n, d))
+ x = torch.tensor(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)))
+
+ f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
+ # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1)
+
+ f = (
+ lambda q: f_no_noise(q)
+ + torch.normal(
+ mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.0, out=None
+ )
+ * s
+ )
+ # targets
+ y = f(x)
+
+ # GP model with squared exponential
+
+ kernel = KernelFunction(gamma=0.05)
+ GP0 = GaussianProcess(kernel_custom=kernel, s=s)
+ GP0.fit_gp(x, y)
+ GP0.visualize(xtest, f_true=f_no_noise)
+
+ GP = NystromFeatures(kernel, m=torch.tensor([30]), s=s, approx="uniform")
+ GP.fit_gp(x, y)
+ GP.visualize(xtest, f_true=f_no_noise)
+
+ GP = NystromFeatures(kernel, m=torch.tensor([30]), s=s, approx="online_leverage")
+ GP.fit_gp(x, y)
+ GP.visualize(xtest, f_true=f_no_noise)
+
+ GP = NystromFeatures(kernel, m=torch.tensor([30]), s=s, approx="svd")
+ GP.fit_gp(x, y)
+ print(GP0.K, GP.outer_kernel())
+ GP.visualize(xtest, f_true=f_no_noise)
diff --git a/stpy/continuous_processes/primal_mkl.py b/stpy/continuous_processes/primal_mkl.py
index 4944f9b..84965c0 100755
--- a/stpy/continuous_processes/primal_mkl.py
+++ b/stpy/continuous_processes/primal_mkl.py
@@ -3,200 +3,215 @@
import numpy as np
import matplotlib.pyplot as plt
-class PrimalMKL(RandomProcess):
-
- def __init__(self,embeddings,init_weights = None, lam = 0.0, s = 0):
- self.embeddings = embeddings
- self.init_weights = init_weights
- self.no_models = len(embeddings)
- self.s = s
- self.lam = lam
- if not isinstance(self.lam,list):
- self.lam = [lam for i in range(self.no_models)]
-
- def total_embed_dim(self):
- self.dims = []
- for embedding in self.embeddings:
- self.dims.append(embedding.get_basis_size())
- sum = torch.sum(torch.Tensor(self.dims))
- return sum
-
- def get_emebed_dims(self):
- self.total_embed_dim()
- return self.dims
-
- # def fit_gp(self, x, y):
- # """
- # In this function we are fitting
- # In this function we are fitting
- #
- #
- #
- # :param x:
- # :param y:
- # :return:
- # """
- #
- #
- # self.x = x
- # self.y = y
- # (self.n, self.d) = self.x.size()
- # self.total_m = self.total_embed_dim()
- # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int()
- # self.w = [torch.ones(size = (i,1), dtype = torch.float64,requires_grad = True) for i in self.get_emebed_dims()]
- #
- # self.theta = torch.ones(size = (self.no_models,1), dtype = torch.float64,requires_grad = True)
- #
- #
-
-
- def fit_gp(self,x,y):
- self.x = x
- self.y = y
- (self.n,self.d) = self.x.size()
- self.total_m = self.total_embed_dim()
- dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int()
-
- self.w = [torch.ones(size = (i,1), dtype = torch.float64,requires_grad = True) for i in self.get_emebed_dims()]
-
- self.theta = torch.ones(size = (self.no_models,1), dtype = torch.float64,requires_grad = True)
-
- # def cost(theta,w):
- #
- # Phi = torch.zeros(size = (self.n,int(self.total_m)), dtype = torch.float64)
- # reg = 0.0
- # for index,embedding in enumerate(self.embeddings):
- # Phi[:,dims_index[index]:dims_index[index+1]] = embedding.embed(self.x)*torch.sqrt(theta[index])
- # reg = reg + torch.sqrt(torch.sum((torch.sqrt(theta[index])*w[index])**2))
- # wvector = torch.cat(w, 0)
- # cost = torch.sum((torch.mm(Phi,wvector) - self.y)**2)
- # cost = cost + self.lam*reg
- # return cost
-
- def regularizers(w):
- reg = torch.zeros(self.no_models,dtype=torch.float64)
- for index, embedding in enumerate(self.embeddings):
- reg[index] = torch.sqrt(torch.sum(w[index] ** 2))
- return reg
-
- def cost(w):
- Phi = torch.zeros(size = (self.n,int(self.total_m)), dtype = torch.float64)
- reg = 0.0
- for index,embedding in enumerate(self.embeddings):
- Phi[:,dims_index[index]:dims_index[index+1]] = embedding.embed_internal(self.x)
- reg = reg + self.lam[index]*torch.sqrt(torch.sum(w[index])**2)
- wvector = torch.cat(w, 0)
- cost = torch.sum((torch.mm(Phi,wvector) - self.y)**2)
- cost = cost + reg**2 + self.s*torch.norm(wvector)**2
- return cost
-
-
-
- ## optimizer objective
- loss = torch.zeros(1,1,requires_grad = True,dtype = torch.float64)
- loss = loss + cost(self.w)
-
-
-
-
- #loss.requires_grad_(True)
-
-
- from pymanopt.manifolds import Euclidean, Product
- from pymanopt import Problem
- from pymanopt.solvers import ConjugateGradient
- from stpy.cost_functions import CostFunction
-
- # define cost function
- C = CostFunction(cost, number_args=self.no_models)
- [cost_numpy, egrad_numpy, ehess_numpy] = C.define()
- x = [np.ones(shape = (i,1)) for i in self.get_emebed_dims()]
-
-
-
- # Optimization with Conjugate Gradient Descent
- #print (cost_numpy(x))
- manifold = Product( [Euclidean(i) for i in self.get_emebed_dims()])
- problem = Problem(manifold=manifold, cost=cost_numpy, egrad=egrad_numpy, ehess=ehess_numpy, verbosity=10)
- #solver = SteepestDescent(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-10)
- solver = ConjugateGradient(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-20)
- Xopt = solver.solve(problem, x=x)
+class PrimalMKL(RandomProcess):
-
-
-
- self.w = [torch.from_numpy(w) for w in Xopt]
- self.theta = torch.sum(regularizers(self.w),dim = 0)/regularizers(self.w) + self.s
- self.theta = 1./self.theta
-
- print (self.theta)
-
-
- def mean_var(self,xtest):
- n = xtest.size()[0]
- dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int()
- Phi = torch.zeros(size=(n, int(self.total_m)), dtype=torch.float64)
-
- for index, embedding in enumerate(self.embeddings):
- Phi[:, dims_index[index]:dims_index[index + 1]] = embedding.embed_internal(xtest)
-
- wvector = torch.cat(self.w, 0)
- mu = torch.mm(Phi, wvector)
-
- K = (torch.mm(torch.t(Phi),Phi) + self.s * torch.eye(int(self.total_m), dtype=torch.float64))
- temp = torch.t(torch.solve(torch.t(Phi),K)[0])
- var = torch.sqrt(self.s*self.s*torch.einsum('ij,ji->i', (temp, torch.t(Phi) )).view(-1, 1))
-
- mu = mu.detach()
- var = var.detach()
-
- return (mu,var)
-
- def sample(self,xtest, size =1):
- mu, var = self.mean_var(xtest)
- sample = mu + var
- return sample
-
- def visualize(self,xtest,f_true = None, points = True, show = True):
- super().visualize(xtest,f_true = f_true, points = points, show = False)
- ## histogram of weights
- plt.figure(2)
- plt.bar(np.arange(len(self.embeddings)), self.theta.detach().numpy().flatten(), np.ones(len(self.embeddings)) * 0.5)
- plt.show()
+ def __init__(self, embeddings, init_weights=None, lam=0.0, s=0):
+ self.embeddings = embeddings
+ self.init_weights = init_weights
+ self.no_models = len(embeddings)
+ self.s = s
+ self.lam = lam
+ if not isinstance(self.lam, list):
+ self.lam = [lam for i in range(self.no_models)]
+
+ def total_embed_dim(self):
+ self.dims = []
+ for embedding in self.embeddings:
+ self.dims.append(embedding.get_basis_size())
+ sum = torch.sum(torch.tensor(self.dims))
+ return sum
+
+ def get_emebed_dims(self):
+ self.total_embed_dim()
+ return self.dims
+
+ # def fit_gp(self, x, y):
+ # """
+ # In this function we are fitting
+ # In this function we are fitting
+ #
+ #
+ #
+ # :param x:
+ # :param y:
+ # :return:
+ # """
+ #
+ #
+ # self.x = x
+ # self.y = y
+ # (self.n, self.d) = self.x.size()
+ # self.total_m = self.total_embed_dim()
+ # dims_index = torch.cumsum(torch.tensor([0] + self.get_emebed_dims()),dim = 0).int()
+ # self.w = [torch.ones(size = (i,1), dtype = torch.float64,requires_grad = True) for i in self.get_emebed_dims()]
+ #
+ # self.theta = torch.ones(size = (self.no_models,1), dtype = torch.float64,requires_grad = True)
+ #
+ #
+
+ def fit_gp(self, x, y):
+ self.x = x
+ self.y = y
+ (self.n, self.d) = self.x.size()
+ self.total_m = self.total_embed_dim()
+ dims_index = torch.cumsum(
+ torch.tensor([0] + self.get_emebed_dims()), dim=0
+ ).int()
+
+ self.w = [
+ torch.ones(size=(i, 1), dtype=torch.float64, requires_grad=True)
+ for i in self.get_emebed_dims()
+ ]
+
+ self.theta = torch.ones(
+ size=(self.no_models, 1), dtype=torch.float64, requires_grad=True
+ )
+
+ # def cost(theta,w):
+ #
+ # Phi = torch.zeros(size = (self.n,int(self.total_m)), dtype = torch.float64)
+ # reg = 0.0
+ # for index,embedding in enumerate(self.embeddings):
+ # Phi[:,dims_index[index]:dims_index[index+1]] = embedding.embed(self.x)*torch.sqrt(theta[index])
+ # reg = reg + torch.sqrt(torch.sum((torch.sqrt(theta[index])*w[index])**2))
+ # wvector = torch.cat(w, 0)
+ # cost = torch.sum((torch.mm(Phi,wvector) - self.y)**2)
+ # cost = cost + self.lam*reg
+ # return cost
+
+ def regularizers(w):
+ reg = torch.zeros(self.no_models, dtype=torch.float64)
+ for index, embedding in enumerate(self.embeddings):
+ reg[index] = torch.sqrt(torch.sum(w[index] ** 2))
+ return reg
+
+ def cost(w):
+ Phi = torch.zeros(size=(self.n, int(self.total_m)), dtype=torch.float64)
+ reg = 0.0
+ for index, embedding in enumerate(self.embeddings):
+ Phi[:, dims_index[index] : dims_index[index + 1]] = (
+ embedding.embed_internal(self.x)
+ )
+ reg = reg + self.lam[index] * torch.sqrt(torch.sum(w[index]) ** 2)
+ wvector = torch.cat(w, 0)
+ cost = torch.sum((torch.mm(Phi, wvector) - self.y) ** 2)
+ cost = cost + reg**2 + self.s * torch.norm(wvector) ** 2
+ return cost
+
+ ## optimizer objective
+ loss = torch.zeros(1, 1, requires_grad=True, dtype=torch.float64)
+ loss = loss + cost(self.w)
+
+ # loss.requires_grad_(True)
+
+ from pymanopt.manifolds import Euclidean, Product
+ from pymanopt import Problem
+ from pymanopt.solvers import ConjugateGradient
+ from stpy.cost_functions import CostFunction
+
+ # define cost function
+ C = CostFunction(cost, number_args=self.no_models)
+ [cost_numpy, egrad_numpy, ehess_numpy] = C.define()
+ x = [np.ones(shape=(i, 1)) for i in self.get_emebed_dims()]
+
+ # Optimization with Conjugate Gradient Descent
+ # print (cost_numpy(x))
+ manifold = Product([Euclidean(i) for i in self.get_emebed_dims()])
+ problem = Problem(
+ manifold=manifold,
+ cost=cost_numpy,
+ egrad=egrad_numpy,
+ ehess=ehess_numpy,
+ verbosity=10,
+ )
+ # solver = SteepestDescent(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-10)
+ solver = ConjugateGradient(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-20)
+ Xopt = solver.solve(problem, x=x)
+
+ self.w = [torch.from_numpy(w) for w in Xopt]
+ self.theta = (
+ torch.sum(regularizers(self.w), dim=0) / regularizers(self.w) + self.s
+ )
+ self.theta = 1.0 / self.theta
+
+ print(self.theta)
+
+ def mean_var(self, xtest):
+ n = xtest.size()[0]
+ dims_index = torch.cumsum(
+ torch.tensor([0] + self.get_emebed_dims()), dim=0
+ ).int()
+ Phi = torch.zeros(size=(n, int(self.total_m)), dtype=torch.float64)
+
+ for index, embedding in enumerate(self.embeddings):
+ Phi[:, dims_index[index] : dims_index[index + 1]] = (
+ embedding.embed_internal(xtest)
+ )
+
+ wvector = torch.cat(self.w, 0)
+ mu = torch.mm(Phi, wvector)
+
+ K = torch.mm(torch.t(Phi), Phi) + self.s * torch.eye(
+ int(self.total_m), dtype=torch.float64
+ )
+ temp = torch.t(torch.solve(torch.t(Phi), K)[0])
+ var = torch.sqrt(
+ self.s * self.s * torch.einsum("ij,ji->i", (temp, torch.t(Phi))).view(-1, 1)
+ )
+
+ mu = mu.detach()
+ var = var.detach()
+
+ return (mu, var)
+
+ def sample(self, xtest, size=1):
+ mu, var = self.mean_var(xtest)
+ sample = mu + var
+ return sample
+
+ def visualize(self, xtest, f_true=None, points=True, show=True):
+ super().visualize(xtest, f_true=f_true, points=points, show=False)
+ ## histogram of weights
+ plt.figure(2)
+ plt.bar(
+ np.arange(len(self.embeddings)),
+ self.theta.detach().numpy().flatten(),
+ np.ones(len(self.embeddings)) * 0.5,
+ )
+ plt.show()
if __name__ == "__main__":
- from stpy.continuous_processes.fourier_fea import GaussianProcessFF
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- from stpy.test_functions.benchmarks import MultiRKHS
-
- n = 1024
- N = 100
- s = 0.01
- TestFunction = MultiRKHS()
- xtest = TestFunction.interval(n)
- x = TestFunction.initial_guess(N)
- y = TestFunction.eval(x,sigma = s)
- #TestFunction.visualize(xtest)
-
-
- GP1 = GaussianProcess(s=0, kernel="linear")
- GP2 = GaussianProcessFF(s=s, m=100, approx="hermite")
-
- MKL = PrimalMKL([GP1,GP2], lam=[0.1, 0.1], s = s)
- MKL.fit_gp(x, y)
-
- print ("Importance Weights:",MKL.theta)
-
- print("Slope of linear line:", MKL.w[0])
-
- MKL.visualize(xtest, f_true=TestFunction.eval_noiseless)
-
- # MKL = PrimalMKL(GPs, lam=0.01)
- # MKL.fit_gp(x,y)
- # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless)
- #
- # MKL = PrimalMKL(GPs, lam=0.0001)
- # MKL.fit_gp(x,y)
- # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless)
+ from stpy.continuous_processes.fourier_fea import GaussianProcessFF
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+ from stpy.test_functions.benchmarks import MultiRKHS
+
+ n = 1024
+ N = 100
+ s = 0.01
+ TestFunction = MultiRKHS()
+ xtest = TestFunction.interval(n)
+ x = TestFunction.initial_guess(N)
+ y = TestFunction.eval(x, sigma=s)
+ # TestFunction.visualize(xtest)
+
+ GP1 = GaussianProcess(s=0, kernel="linear")
+ GP2 = GaussianProcessFF(s=s, m=100, approx="hermite")
+
+ MKL = PrimalMKL([GP1, GP2], lam=[0.1, 0.1], s=s)
+ MKL.fit_gp(x, y)
+
+ print("Importance Weights:", MKL.theta)
+
+ print("Slope of linear line:", MKL.w[0])
+
+ MKL.visualize(xtest, f_true=TestFunction.eval_noiseless)
+
+ # MKL = PrimalMKL(GPs, lam=0.01)
+ # MKL.fit_gp(x,y)
+ # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless)
+ #
+ # MKL = PrimalMKL(GPs, lam=0.0001)
+ # MKL.fit_gp(x,y)
+ # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless)
diff --git a/stpy/continuous_processes/trace_features.py b/stpy/continuous_processes/trace_features.py
index 6f8a8ab..a8f5f26 100644
--- a/stpy/continuous_processes/trace_features.py
+++ b/stpy/continuous_processes/trace_features.py
@@ -7,117 +7,125 @@
class TraceFeatures(KernelizedFeatures):
- def __init__(self, *args, PSD=False, **kwargs):
- super().__init__(*args, **kwargs)
- self.m = int(self.m)
- self.PSD = PSD
-
- def construct_covariance(self):
- emb = self.emb
- X = torch.flatten(torch.einsum('ij,ik->jki', emb, emb).permute(1, 0, 2), end_dim=1)
- V = torch.einsum('ik,jk->ij', X, X)
- # Z = torch.einsum('ij,j->i',X,y.reshape(-1)).reshape(-1,1)
- self.V = V + self.lam * self.s ** 2 * torch.eye(self.m ** 2).double()
-
- # self.A_new,_ = torch.solve(Z,self.V)
- # self.A_new = self.A_new.reshape(self.m,self.m)
-
- def fit_gp(self, x, y):
- self.n, self.d = x.size()
- self.x = x
- self.y = y
-
- self.emb = self.embed(x)
- self.construct_covariance()
-
- emb = self.emb.numpy()
- A = cp.Variable((self.m, self.m), symmetric=True)
- cost = cp.sum_squares \
- (cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro")
-
- if self.PSD == True:
- constraints = [A >> 0]
- else:
- constraints = []
-
- prob = cp.Problem(cp.Minimize(cost), constraints)
- prob.solve(solver=cp.MOSEK, verbose=True)
- self.A = torch.from_numpy(A.value)
- self.fit = True
-
- def mean_std(self, xtest, std=True):
- emb = self.embed(xtest)
- mu = torch.einsum('ij,jk,ik->i', emb, self.A, emb).view(-1, 1)
- if std == True:
- # invV = torch.inverse(self.V)
- X = torch.flatten(torch.einsum('ij,ik->jki', emb, emb), end_dim=1)
- Z, _ = torch.solve(X, self.V)
- # diagonal = self.lam*self.s ** 2 * torch.einsum('ji,jk,ki->i', (X, invV, X)).view(-1, 1)
- diagonal = self.lam * self.s ** 2 * torch.einsum('ij,ij->j', X, Z).view(-1, 1)
- return mu, torch.sqrt(diagonal).view(-1, 1)
- else:
- return mu
-
- def band(self, xtest, sqrtbeta=2., maximization=True):
- emb = self.embed(xtest)
- X = torch.einsum('ij,ik->ijk', emb, emb)
- n = emb.size()[0]
- ucb = torch.zeros(size=(n, 1)).double()
-
- for i in range(n):
- A = cp.Variable((self.m, self.m), symmetric=True)
- cost = cp.trace(A @ X[i, :, :])
-
- Z = torch.cholesky(self.V, upper=True)
- zero = np.zeros(self.m ** 2)
- constraints = [cp.SOC(zero.T @ cp.vec(A) + self.s * sqrtbeta, Z @ (cp.vec(A) - cp.vec(self.A.numpy())))]
-
- if self.PSD == True:
- constraints += [A >> 0]
-
- if maximization == True:
- prob = cp.Problem(cp.Maximize(cost), constraints)
- else:
- prob = cp.Problem(cp.Minimize(cost), constraints)
-
- prob.solve(solver=cp.MOSEK, verbose=False)
- ucb[i] = torch.trace(torch.from_numpy(A.value) @ X[i, :, :])
- return ucb
-
- def lcb(self, xtest, sqrtbeta=2.):
- return self.band(xtest, sqrtbeta=sqrtbeta, maximization=False)
-
- def ucb(self, xtest, sqrtbeta=2.):
- return self.band(xtest, sqrtbeta=sqrtbeta, maximization=True)
+ def __init__(self, *args, PSD=False, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.m = int(self.m)
+ self.PSD = PSD
+
+ def construct_covariance(self):
+ emb = self.emb
+ X = torch.flatten(
+ torch.einsum("ij,ik->jki", emb, emb).permute(1, 0, 2), end_dim=1
+ )
+ V = torch.einsum("ik,jk->ij", X, X)
+ # Z = torch.einsum('ij,j->i',X,y.reshape(-1)).reshape(-1,1)
+ self.V = V + self.lam * self.s**2 * torch.eye(self.m**2).double()
+
+ # self.A_new,_ = torch.solve(Z,self.V)
+ # self.A_new = self.A_new.reshape(self.m,self.m)
+
+ def fit_gp(self, x, y):
+ self.n, self.d = x.size()
+ self.x = x
+ self.y = y
+
+ self.emb = self.embed(x)
+ self.construct_covariance()
+
+ emb = self.emb.numpy()
+ A = cp.Variable((self.m, self.m), symmetric=True)
+ cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (
+ self.s**2
+ ) + (self.lam) * cp.norm(A, "fro")
+
+ if self.PSD == True:
+ constraints = [A >> 0]
+ else:
+ constraints = []
+
+ prob = cp.Problem(cp.Minimize(cost), constraints)
+ prob.solve(solver=cp.MOSEK, verbose=True)
+ self.A = torch.from_numpy(A.value)
+ self.fit = True
+
+ def mean_std(self, xtest, std=True):
+ emb = self.embed(xtest)
+ mu = torch.einsum("ij,jk,ik->i", emb, self.A, emb).view(-1, 1)
+ if std == True:
+ # invV = torch.inverse(self.V)
+ X = torch.flatten(torch.einsum("ij,ik->jki", emb, emb), end_dim=1)
+ Z, _ = torch.solve(X, self.V)
+ # diagonal = self.lam*self.s ** 2 * torch.einsum('ji,jk,ki->i', (X, invV, X)).view(-1, 1)
+ diagonal = self.lam * self.s**2 * torch.einsum("ij,ij->j", X, Z).view(-1, 1)
+ return mu, torch.sqrt(diagonal).view(-1, 1)
+ else:
+ return mu
+
+ def band(self, xtest, sqrtbeta=2.0, maximization=True):
+ emb = self.embed(xtest)
+ X = torch.einsum("ij,ik->ijk", emb, emb)
+ n = emb.size()[0]
+ ucb = torch.zeros(size=(n, 1)).double()
+
+ for i in range(n):
+ A = cp.Variable((self.m, self.m), symmetric=True)
+ cost = cp.trace(A @ X[i, :, :])
+
+ Z = torch.cholesky(self.V, upper=True)
+ zero = np.zeros(self.m**2)
+ constraints = [
+ cp.SOC(
+ zero.T @ cp.vec(A) + self.s * sqrtbeta,
+ Z @ (cp.vec(A) - cp.vec(self.A.numpy())),
+ )
+ ]
+
+ if self.PSD == True:
+ constraints += [A >> 0]
+
+ if maximization == True:
+ prob = cp.Problem(cp.Maximize(cost), constraints)
+ else:
+ prob = cp.Problem(cp.Minimize(cost), constraints)
+
+ prob.solve(solver=cp.MOSEK, verbose=False)
+ ucb[i] = torch.trace(torch.from_numpy(A.value) @ X[i, :, :])
+ return ucb
+
+ def lcb(self, xtest, sqrtbeta=2.0):
+ return self.band(xtest, sqrtbeta=sqrtbeta, maximization=False)
+
+ def ucb(self, xtest, sqrtbeta=2.0):
+ return self.band(xtest, sqrtbeta=sqrtbeta, maximization=True)
if __name__ == "__main__":
- from stpy.embeddings.embedding import HermiteEmbedding
- import matplotlib.pyplot as plt
+ from stpy.embeddings.embedding import HermiteEmbedding
+ import matplotlib.pyplot as plt
- m = 32
- n = 16
- s = 0.01
- N = 5
+ m = 32
+ n = 16
+ s = 0.01
+ N = 5
- func = lambda x: torch.sin(x * np.pi) ** 2 + 0.5
- x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, 1)))
- y = func(x)
+ func = lambda x: torch.sin(x * np.pi) ** 2 + 0.5
+ x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, 1)))
+ y = func(x)
- embedding = HermiteEmbedding(m=m, gamma=0.5)
- xtest = torch.from_numpy(np.linspace(-1, 1, n)).view(-1, 1)
+ embedding = HermiteEmbedding(m=m, gamma=0.5)
+ xtest = torch.from_numpy(np.linspace(-1, 1, n)).view(-1, 1)
- F = TraceFeatures(s=s, embedding=embedding, m=m, PSD=True)
- F.fit_gp(x, y)
+ F = TraceFeatures(s=s, embedding=embedding, m=m, PSD=True)
+ F.fit_gp(x, y)
- F.visualize(xtest, f_true=func, size=0, show=False)
+ F.visualize(xtest, f_true=func, size=0, show=False)
- lcb = F.lcb(xtest)
- ucb = F.ucb(xtest)
- plt.plot(xtest, lcb, '-s', color='lightblue', label='lcb')
- plt.plot(xtest, ucb, '-s', color='gray', label='ucb')
- plt.legend()
- plt.show()
+ lcb = F.lcb(xtest)
+ ucb = F.ucb(xtest)
+ plt.plot(xtest, lcb, "-s", color="lightblue", label="lcb")
+ plt.plot(xtest, ucb, "-s", color="gray", label="ucb")
+ plt.legend()
+ plt.show()
#
# mu, std = F.mean_std(xtest)
# plt.plot(xtest,func(xtest),'r',label = 'true')
diff --git a/stpy/continuous_processes/truncated_gp.py b/stpy/continuous_processes/truncated_gp.py
new file mode 100644
index 0000000..91d05a6
--- /dev/null
+++ b/stpy/continuous_processes/truncated_gp.py
@@ -0,0 +1,84 @@
+import numpy as np
+from stpy.continuous_processes.gauss_procc import GaussianProcess
+from stpy.helpers.posterior_sampling import tmg
+import torch
+
+
+class TruncatedGP:
+ """
+ A truncated Gaussian Process that can serve as a ground truth model
+ for the PPP estimators. Sampling is very slow at the moment
+ """
+
+ def __init__(self, kernel, d):
+ self.gp = GaussianProcess(kernel=kernel, d=d)
+ self.x_acc = None
+ self.y_acc = None
+
+ def __call__(self, x: torch.tensor, dt: float = 1.0, burn_in=30):
+ N = len(x)
+ # Initialize sample array
+ sample = torch.zeros(N)
+
+ if self.x_acc is None:
+ x_new = x
+ else:
+ # Find indices of x that are already in self.x_acc
+ matching = torch.all(
+ x.unsqueeze(1) == self.x_acc.unsqueeze(0), dim=2
+ ) # (N, M)
+ matching_indices = torch.nonzero(matching, as_tuple=False) # (K, 2)
+ idx_cached_in_x = matching_indices[:, 0] # Indices in x
+ idx_cached_in_acc = matching_indices[:, 1] # Indices in self.x_acc
+
+ # Determine which indices are new
+ mask_cached = torch.zeros(N, dtype=torch.bool)
+ mask_cached[idx_cached_in_x] = True
+ idx_new = torch.nonzero(~mask_cached).squeeze(1)
+ # Retrieve cached function values
+ sample[idx_cached_in_x] = self.y_acc[idx_cached_in_acc]
+ x_new = x[idx_new]
+
+ # Compute function values for new points
+ if len(x_new) > 0:
+ if self.gp.fitted:
+ mean_new, cov_new = self.gp.mean_std_sub(x_new, full=True)
+ mean_new = mean_new.squeeze(1)
+ else:
+ mean_new = torch.zeros(
+ len(x_new),
+ )
+ cov_new = self.gp.kernel(
+ x,
+ x,
+ )
+
+ # Sample truncated GP for new points
+ factor = torch.eye(len(x_new))
+ summand = torch.zeros(len(x_new))
+ cov_new = cov_new.cpu().numpy() + 1e-7 * np.eye(len(x_new))
+ sample_new = tmg(
+ 1,
+ mean_new.cpu().numpy(),
+ cov_new,
+ torch.ones(len(x_new)).cpu().numpy(),
+ factor.cpu().numpy(),
+ summand.cpu().numpy(),
+ burn_in,
+ True,
+ )
+ sample_new = torch.tensor(sample_new[0])
+
+ # Update sample array and caches
+ if self.x_acc is None:
+ sample = sample_new
+ self.x_acc = x_new
+ self.y_acc = sample_new
+ else:
+ sample[idx_new] = sample_new
+ self.x_acc = torch.cat([self.x_acc, x_new])
+ self.y_acc = torch.cat([self.y_acc, sample_new])
+
+ self.gp.fit(self.x_acc, self.y_acc.unsqueeze(1))
+
+ return sample * dt
diff --git a/stpy/continuous_processes/truncated_kernelized_features.py b/stpy/continuous_processes/truncated_kernelized_features.py
index 4fd76c3..01fc298 100644
--- a/stpy/continuous_processes/truncated_kernelized_features.py
+++ b/stpy/continuous_processes/truncated_kernelized_features.py
@@ -1,61 +1,96 @@
from stpy.continuous_processes.kernelized_features import KernelizedFeatures
-import torch
+import torch
+
class TruncatedKernelizedFeatures(KernelizedFeatures):
- def __init__(self, embedding, m, s=0.001, lam=1, d=1, diameter=1, verbose=True, groups=None, bounds=None, scale=1, kappa=1, poly=2,
- primal=True, beta_fun=None, alpha_score=lambda t: t**(1/4), default_alpha_score=1., bound = 1.):
- super().__init__(embedding, m, s =s, lam=lam,d= d,diameter= diameter, verbose=verbose,
- groups = groups, bounds=bounds, scale=scale, kappa=kappa, poly=poly, primal=primal, beta_fun = beta_fun, bound = bound)
- primal = True
- self.bound = bound
- self.alpha_score = alpha_score
- self.default_alpha_score = default_alpha_score
-
- def theta_mean(self, var=False, prior=False):
- self.precompute()
-
- if self.fitted == True and prior == False:
- theta_mean = self.invV@self.Q.T@self.y_truncated
- Z = self.s**2 * self.invV
- else:
- theta_mean = 0*torch.ones(size=(self.m, 1)).double()
-
- if var is False:
- return theta_mean
- else:
- return (theta_mean, Z)
-
- def fit(self, x=None, y=None):
- self.alphas = self.y*0 + self.default_alpha_score
- super().fit(x= x, y= y)
-
- def add_points(self,d):
- x, y = d
- if self.x is not None:
- self.x = torch.cat((self.x, x), dim=0)
- self.y = torch.cat((self.y, y), dim=0)
- new_alpha =torch.Tensor( [self.alpha_score(self.x.size()[0])]).view(1,1)
- self.alphas = torch.cat((self.alphas,new_alpha),dim=0)
- else:
- self.x = x
- self.y = y
- self.alphas = self.default_alpha_score
- self.fitted = False
-
- def add_data_point(self,x,y):
- self.add_points(x,y)
-
- def precompute(self):
- if self.fitted == False:
- self.Q = self.embed(self.x)
- I = torch.eye(int(self.m)).double()
- Z_ = self.Q.T@self.Q
- self.V = Z_ + (self.s **2) * self.lam *I
- self.invV = torch.pinverse(self.V)
- self.y_truncated = self.y.view(-1)*(torch.abs(self.y) < self.alphas).view(-1).double()
- self.y_truncated = self.y_truncated.view(-1,1)
- self.fitted = True
- else:
- pass
+ def __init__(
+ self,
+ embedding,
+ m,
+ s=0.001,
+ lam=1,
+ d=1,
+ diameter=1,
+ verbose=True,
+ groups=None,
+ bounds=None,
+ scale=1,
+ kappa=1,
+ poly=2,
+ primal=True,
+ beta_fun=None,
+ alpha_score=lambda t: t ** (1 / 4),
+ default_alpha_score=1.0,
+ bound=1.0,
+ ):
+ super().__init__(
+ embedding,
+ m,
+ s=s,
+ lam=lam,
+ d=d,
+ diameter=diameter,
+ verbose=verbose,
+ groups=groups,
+ bounds=bounds,
+ scale=scale,
+ kappa=kappa,
+ poly=poly,
+ primal=primal,
+ beta_fun=beta_fun,
+ bound=bound,
+ )
+ primal = True
+ self.bound = bound
+ self.alpha_score = alpha_score
+ self.default_alpha_score = default_alpha_score
+
+ def theta_mean(self, var=False, prior=False):
+ self.precompute()
+
+ if self.fitted == True and prior == False:
+ theta_mean = self.invV @ self.Q.T @ self.y_truncated
+ Z = self.s**2 * self.invV
+ else:
+ theta_mean = 0 * torch.ones(size=(self.m, 1)).double()
+
+ if var is False:
+ return theta_mean
+ else:
+ return (theta_mean, Z)
+
+ def fit(self, x=None, y=None):
+ self.alphas = self.y * 0 + self.default_alpha_score
+ super().fit(x=x, y=y)
+
+ def add_points(self, d):
+ x, y = d
+ if self.x is not None:
+ self.x = torch.cat((self.x, x), dim=0)
+ self.y = torch.cat((self.y, y), dim=0)
+ new_alpha = torch.tensor([self.alpha_score(self.x.size()[0])]).view(1, 1)
+ self.alphas = torch.cat((self.alphas, new_alpha), dim=0)
+ else:
+ self.x = x
+ self.y = y
+ self.alphas = self.default_alpha_score
+ self.fitted = False
+
+ def add_data_point(self, x, y):
+ self.add_points(x, y)
+ def precompute(self):
+ if self.fitted == False:
+ self.Q = self.embed(self.x)
+ I = torch.eye(int(self.m)).double()
+ Z_ = self.Q.T @ self.Q
+ self.V = Z_ + (self.s**2) * self.lam * I
+ self.invV = torch.pinverse(self.V)
+ self.y_truncated = (
+ self.y.view(-1) * (torch.abs(self.y) < self.alphas).view(-1).double()
+ )
+ self.y_truncated = self.y_truncated.view(-1, 1)
+ self.fitted = True
+ else:
+ pass
diff --git a/stpy/dimred/sri.py b/stpy/dimred/sri.py
index 6904037..04f6d75 100644
--- a/stpy/dimred/sri.py
+++ b/stpy/dimred/sri.py
@@ -5,130 +5,138 @@
from sklearn.cluster import KMeans
-class SRI():
-
- def __init__(self):
- """
- :param X: X values
- :param y: response variables
- :param relative: relative to number of samples
- :param buckets:
- """
-
- def standardize(self, X):
- (n, d) = X.size()
- Sigma_x = np.cov(self.X.numpy().T)
- E_x = np.mean(self.X.numpy(), axis=0)
- Sigma_x_half_inv = np.linalg.pinv(scipy.linalg.sqrtm(Sigma_x))
- Z = (X.numpy() - np.outer(np.ones(n), E_x)) @ Sigma_x_half_inv
-
- return Sigma_x_half_inv, Z
-
- def slice_kmeans(self, y):
- indices = []
- kmeans = KMeans(n_clusters=self.buckets).fit(y.numpy().reshape(-1, 1))
-
- for label in range(self.buckets):
- ind = kmeans.labels_ == label
- indices.append(ind)
- return indices
-
- def fit_sri(self, X, y, buckets=10):
- self.X = X
- self.y = y
- self.buckets = buckets
- (n, d) = self.X.size()
- Sigma_x_half_inv, Z = self.standardize(self.X)
-
- if isinstance(self.buckets, int):
- indices = self.slice_kmeans(self.y)
-
- zs = []
- ns = []
- for ind in indices:
- if np.sum(ind) > 1:
- z = np.mean(Z[ind, :].reshape(-1, d), axis=0)
- ns.append(np.sum(ind))
- zs.append(z)
- Zn = np.array(zs)
- V = (Zn.T @ np.diag(ns) @ Zn) / self.buckets
-
- else:
- raise AssertionError("Unknown bucketing rule.")
-
- lams, eignv = np.linalg.eig(V)
- betas = Sigma_x_half_inv @ eignv
- return torch.from_numpy(lams), torch.from_numpy(betas)
-
- def fit_save(self, X, y, buckets=10):
- self.X = X
- self.y = y
- self.buckets = buckets
- (n, d) = self.X.size()
- Sigma_x_half_inv, Z = self.standardize(self.X)
-
- if isinstance(self.buckets, int):
- indices = self.slice_kmeans(self.y)
-
- V = np.zeros(shape=(d, d))
- I = np.eye(d)
- for ind in indices:
- ns = np.sum(ind)
- if ns > 1:
- Covar_slice = np.cov(Z[ind, :].reshape(-1, d).T)
- V = V + ((I - Covar_slice) @ (I - Covar_slice)) * (float(ns) / float(n))
-
- else:
- raise AssertionError("Unknown bucketing rule.")
-
- lams, eignv = np.linalg.eig(V)
- betas = Sigma_x_half_inv @ eignv
- return torch.from_numpy(lams), torch.from_numpy(betas)
-
- def gradient_design(self, d, k, nablaF, eps=10e-4):
- Sigma = torch.eye(d).double() * eps
- x0 = torch.rand(size=(k, d)).double()
- subspace = nablaF(x0)
- Sigma = Sigma + subspace.T @ subspace
- return x0, Sigma, subspace
-
- def sample_dir(self, n, x0, subspace, eps=10e-4):
- indices = np.arange(0, x0.size()[0], 1)
- choice = np.random.choice(indices, n, replace=True)
- magnitude = np.diag(np.random.randn(n))
- sample = x0.numpy()[choice] + magnitude @ subspace[choice].numpy() + eps * np.random.randn(n, d)
- return torch.from_numpy(sample)
+class SRI:
+
+ def __init__(self):
+ """
+ :param X: X values
+ :param y: response variables
+ :param relative: relative to number of samples
+ :param buckets:
+ """
+
+ def standardize(self, X):
+ (n, d) = X.size()
+ Sigma_x = np.cov(self.X.numpy().T)
+ E_x = np.mean(self.X.numpy(), axis=0)
+ Sigma_x_half_inv = np.linalg.pinv(scipy.linalg.sqrtm(Sigma_x))
+ Z = (X.numpy() - np.outer(np.ones(n), E_x)) @ Sigma_x_half_inv
+
+ return Sigma_x_half_inv, Z
+
+ def slice_kmeans(self, y):
+ indices = []
+ kmeans = KMeans(n_clusters=self.buckets).fit(y.numpy().reshape(-1, 1))
+
+ for label in range(self.buckets):
+ ind = kmeans.labels_ == label
+ indices.append(ind)
+ return indices
+
+ def fit_sri(self, X, y, buckets=10):
+ self.X = X
+ self.y = y
+ self.buckets = buckets
+ (n, d) = self.X.size()
+ Sigma_x_half_inv, Z = self.standardize(self.X)
+
+ if isinstance(self.buckets, int):
+ indices = self.slice_kmeans(self.y)
+
+ zs = []
+ ns = []
+ for ind in indices:
+ if np.sum(ind) > 1:
+ z = np.mean(Z[ind, :].reshape(-1, d), axis=0)
+ ns.append(np.sum(ind))
+ zs.append(z)
+ Zn = np.array(zs)
+ V = (Zn.T @ np.diag(ns) @ Zn) / self.buckets
+
+ else:
+ raise AssertionError("Unknown bucketing rule.")
+
+ lams, eignv = np.linalg.eig(V)
+ betas = Sigma_x_half_inv @ eignv
+ return torch.from_numpy(lams), torch.from_numpy(betas)
+
+ def fit_save(self, X, y, buckets=10):
+ self.X = X
+ self.y = y
+ self.buckets = buckets
+ (n, d) = self.X.size()
+ Sigma_x_half_inv, Z = self.standardize(self.X)
+
+ if isinstance(self.buckets, int):
+ indices = self.slice_kmeans(self.y)
+
+ V = np.zeros(shape=(d, d))
+ I = np.eye(d)
+ for ind in indices:
+ ns = np.sum(ind)
+ if ns > 1:
+ Covar_slice = np.cov(Z[ind, :].reshape(-1, d).T)
+ V = V + ((I - Covar_slice) @ (I - Covar_slice)) * (
+ float(ns) / float(n)
+ )
+
+ else:
+ raise AssertionError("Unknown bucketing rule.")
+
+ lams, eignv = np.linalg.eig(V)
+ betas = Sigma_x_half_inv @ eignv
+ return torch.from_numpy(lams), torch.from_numpy(betas)
+
+ def gradient_design(self, d, k, nablaF, eps=10e-4):
+ Sigma = torch.eye(d).double() * eps
+ x0 = torch.rand(size=(k, d)).double()
+ subspace = nablaF(x0)
+ Sigma = Sigma + subspace.T @ subspace
+ return x0, Sigma, subspace
+
+ def sample_dir(self, n, x0, subspace, eps=10e-4):
+ indices = np.arange(0, x0.size()[0], 1)
+ choice = np.random.choice(indices, n, replace=True)
+ magnitude = np.diag(np.random.randn(n))
+ sample = (
+ x0.numpy()[choice]
+ + magnitude @ subspace[choice].numpy()
+ + eps * np.random.randn(n, d)
+ )
+ return torch.from_numpy(sample)
if __name__ == "__main__":
- d = 3
- p = 2
+ d = 3
+ p = 2
- sigma = 0.
- A = torch.from_numpy(np.random.randn(d, p))
- A = torch.from_numpy(np.eye(d, p))
- print(A)
- # exampel function
- f = lambda x: torch.sum((x @ A) ** 2, dim=1) + sigma * torch.randn(x.size()[0], dtype=torch.double)
- f_no_noise = lambda x: torch.sum((x @ A) ** 2, dim=1)
+ sigma = 0.0
+ A = torch.from_numpy(np.random.randn(d, p))
+ A = torch.from_numpy(np.eye(d, p))
+ print(A)
+ # exampel function
+ f = lambda x: torch.sum((x @ A) ** 2, dim=1) + sigma * torch.randn(
+ x.size()[0], dtype=torch.double
+ )
+ f_no_noise = lambda x: torch.sum((x @ A) ** 2, dim=1)
- nablaF = lambda x: x @ A @ A.T
+ nablaF = lambda x: x @ A @ A.T
- DimRed = SRI()
- N = 100
- x0, Sigma, subspace = DimRed.gradient_design(d, d, nablaF)
- X0 = DimRed.sample_dir(N, x0, subspace)
- y0 = f(X0)
+ DimRed = SRI()
+ N = 100
+ x0, Sigma, subspace = DimRed.gradient_design(d, d, nablaF)
+ X0 = DimRed.sample_dir(N, x0, subspace)
+ y0 = f(X0)
- plt.scatter(X0[:, 0], X0[:, 1], c=y0.view(-1))
- plt.show()
+ plt.scatter(X0[:, 0], X0[:, 1], c=y0.view(-1))
+ plt.show()
- lams, betas = DimRed.fit_sri(X0, y0, buckets=20)
+ lams, betas = DimRed.fit_sri(X0, y0, buckets=20)
- print(lams / torch.sum(lams))
- print(betas)
+ print(lams / torch.sum(lams))
+ print(betas)
- lams2, betas2 = DimRed.fit_save(X0, y0, buckets=20)
+ lams2, betas2 = DimRed.fit_save(X0, y0, buckets=20)
- print(lams2 / torch.sum(lams2))
- print(betas2)
+ print(lams2 / torch.sum(lams2))
+ print(betas2)
diff --git a/stpy/embeddings/bernstein_embedding.py b/stpy/embeddings/bernstein_embedding.py
index 3653911..68467c6 100644
--- a/stpy/embeddings/bernstein_embedding.py
+++ b/stpy/embeddings/bernstein_embedding.py
@@ -7,105 +7,109 @@
class BernsteinEmbedding(PositiveEmbedding):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- def basis_fun(self, x, j): # 1d
- """
- Return the value of basis function \phi_j(x)
- :param x: double, need to be in the interval
- :param j: integer, index of hat functions, 0 <= j <= m-1
- :return: \phi_j(x)
- """
- lim = [self.interval[0], self.interval[1]]
- c = np.zeros(shape=(self.m, 1))
- c[j] = 1
- bp = BPoly(c, lim, extrapolate=False)
- res = bp(x.numpy())
- value = torch.from_numpy(np.nan_to_num(res))
- return value
-
- # return torch.from_numpy(bp(x.numpy()))
-
- def get_polynomial(self, j):
- if self.d == 1:
- lim = [self.interval[0], self.interval[1]]
- c = np.zeros(shape=(self.m, 1))
- c[j] = 1
- roots = PPoly.from_bernstein_basis(BPoly(c, lim)).roots()
- poly = np.polynomial.polynomial.Polynomial(np.polynomial.polynomial.polyfromroots(roots),
- domain=np.array(lim))
-
- elif self.d == 2:
- lim = [self.interval[0], self.interval[1]]
- k = j // self.m
- l = j % self.m
- c = np.zeros(shape=(self.m, 1))
- c[k] = 1
- bp = BPoly(c, lim)
- c = np.zeros(shape=(self.m, 1))
- c[l] = 1
- bp2 = BPoly(c, lim)
- roots1 = PPoly.from_bernstein_basis(bp).roots()
- roots2 = PPoly.from_bernstein_basis(bp2).roots()
- poly1 = np.polynomial.polynomial.Polynomial(np.polynomial.polynomial.polyfromroots(roots1),
- domain=np.array(lim))
- poly2 = np.polynomial.polynomial.Polynomial(np.polynomial.polynomial.polyfromroots(roots2),
- domain=np.array(lim))
- poly = poly1 * poly2
- return poly
-
- def integral(self, S):
- assert (S.d == self.d)
- psi = torch.zeros(self.get_m()).double()
-
- if self.d == 1:
- a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1])
- for j in range(self.get_m()):
- lim = [self.interval[0], self.interval[1]]
- c = np.zeros(shape=(self.m, 1))
- c[j] = 1
- bp = BPoly(c, lim)
- xa = np.maximum(self.interval[0], a)
- xb = np.minimum(self.interval[1], b)
- psi[j] = bp.integrate(xa, xb, extrapolate=False)
-
- elif self.d == 2:
- xa, xb = S.bounds[0, 0], S.bounds[0, 1]
- ya, yb = S.bounds[1, 0], S.bounds[1, 1]
- for j in range(self.get_m()):
- lim = [self.interval[0], self.interval[1]]
-
- k = j // self.m
- l = j % self.m
-
- c = np.zeros(shape=(self.m, 1))
- c[k] = 1
- bp = BPoly(c, lim)
- vol1 = bp.integrate(xa, xb)
- c = np.zeros(shape=(self.m, 1))
- c[l] = 1
- bp = BPoly(c, lim)
- vol2 = bp.integrate(ya, yb)
- psi[j] = vol1 * vol2
-
- Gamma_half = self.cov()
- return psi @ Gamma_half
-
- def product_integral(self, S):
- m = self.get_m()
- Psi = torch.zeros(size=(m, m)).double()
- a, b = S.bounds[0, 0], S.bounds[0, 1]
- for i in range(m):
- for j in range(m):
- P = self.get_polynomial(i) * self.get_polynomial(j)
- new_p = P.integ()
- xb = np.minimum(new_p.domain[1], b)
- xa = np.maximum(new_p.domain[0], a)
- Psi[i, j] = new_p(xb) - new_p(xa)
- print(i, j, Psi[i, j])
- Gamma_half = self.cov()
- return Gamma_half @ Psi @ Gamma_half.T
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def basis_fun(self, x, j): # 1d
+ r"""
+ Return the value of basis function \phi_j(x)
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: \phi_j(x)
+ """
+ lim = [self.interval[0], self.interval[1]]
+ c = np.zeros(shape=(self.m, 1))
+ c[j] = 1
+ bp = BPoly(c, lim, extrapolate=False)
+ res = bp(x.numpy())
+ value = torch.from_numpy(np.nan_to_num(res))
+ return value
+
+ # return torch.from_numpy(bp(x.numpy()))
+
+ def get_polynomial(self, j):
+ if self.d == 1:
+ lim = [self.interval[0], self.interval[1]]
+ c = np.zeros(shape=(self.m, 1))
+ c[j] = 1
+ roots = PPoly.from_bernstein_basis(BPoly(c, lim)).roots()
+ poly = np.polynomial.polynomial.Polynomial(
+ np.polynomial.polynomial.polyfromroots(roots), domain=np.array(lim)
+ )
+
+ elif self.d == 2:
+ lim = [self.interval[0], self.interval[1]]
+ k = j // self.m
+ l = j % self.m
+ c = np.zeros(shape=(self.m, 1))
+ c[k] = 1
+ bp = BPoly(c, lim)
+ c = np.zeros(shape=(self.m, 1))
+ c[l] = 1
+ bp2 = BPoly(c, lim)
+ roots1 = PPoly.from_bernstein_basis(bp).roots()
+ roots2 = PPoly.from_bernstein_basis(bp2).roots()
+ poly1 = np.polynomial.polynomial.Polynomial(
+ np.polynomial.polynomial.polyfromroots(roots1), domain=np.array(lim)
+ )
+ poly2 = np.polynomial.polynomial.Polynomial(
+ np.polynomial.polynomial.polyfromroots(roots2), domain=np.array(lim)
+ )
+ poly = poly1 * poly2
+ return poly
+
+ def integral(self, S):
+ assert S.d == self.d
+ psi = torch.zeros(self.get_m()).double()
+
+ if self.d == 1:
+ a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1])
+ for j in range(self.get_m()):
+ lim = [self.interval[0], self.interval[1]]
+ c = np.zeros(shape=(self.m, 1))
+ c[j] = 1
+ bp = BPoly(c, lim)
+ xa = np.maximum(self.interval[0], a)
+ xb = np.minimum(self.interval[1], b)
+ psi[j] = bp.integrate(xa, xb, extrapolate=False)
+
+ elif self.d == 2:
+ xa, xb = S.bounds[0, 0], S.bounds[0, 1]
+ ya, yb = S.bounds[1, 0], S.bounds[1, 1]
+ for j in range(self.get_m()):
+ lim = [self.interval[0], self.interval[1]]
+
+ k = j // self.m
+ l = j % self.m
+
+ c = np.zeros(shape=(self.m, 1))
+ c[k] = 1
+ bp = BPoly(c, lim)
+ vol1 = bp.integrate(xa, xb)
+ c = np.zeros(shape=(self.m, 1))
+ c[l] = 1
+ bp = BPoly(c, lim)
+ vol2 = bp.integrate(ya, yb)
+ psi[j] = vol1 * vol2
+
+ Gamma_half = self.cov()
+ return psi @ Gamma_half
+
+ def product_integral(self, S):
+ m = self.get_m()
+ Psi = torch.zeros(size=(m, m)).double()
+ a, b = S.bounds[0, 0], S.bounds[0, 1]
+ for i in range(m):
+ for j in range(m):
+ P = self.get_polynomial(i) * self.get_polynomial(j)
+ new_p = P.integ()
+ xb = np.minimum(new_p.domain[1], b)
+ xa = np.maximum(new_p.domain[0], a)
+ Psi[i, j] = new_p(xb) - new_p(xa)
+ print(i, j, Psi[i, j])
+ Gamma_half = self.cov()
+ return Gamma_half @ Psi @ Gamma_half.T
+
# def cov(self, inverse = False):
# if self.precomp == False:
@@ -127,272 +131,299 @@ def product_integral(self, S):
class BernsteinSplinesOverlapping(PositiveEmbedding):
- def __init__(self, *args, degree=4, **kwargs):
- super().__init__(*args, **kwargs)
- self.degree = degree
-
- def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d
- """
- Return the value of basis function \phi_j(x)
-
- :param x: double, need to be in the interval
- :param j: integer, index of hat functions, 0 <= j <= m-1
- :return: \phi_j(x)
- """
-
- j = q // (self.degree // 2)
- k = q % (self.degree // 2)
-
- dm = (self.interval[1] - self.interval[0]) / ((self.m // (self.degree // 2))) # delta m
- tj = self.interval[0] + j * dm
- lim = [tj, tj + 2 * dm]
-
- c = np.zeros(shape=(self.degree // 2, 1))
- c[k] = 1.
- bp = BPoly(c, lim)
- res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate)
-
- if extrapolate == False:
- mask = x.numpy() == (tj + dm / 2)
- res[mask] = np.nan
- value = torch.from_numpy(np.nan_to_num(res))
- return value
-
- def integral(self, S):
- assert (S.d == self.d)
- psi = torch.zeros(self.get_m()).double()
-
- if self.d == 1:
- a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1])
- for q in range(self.get_m()):
- j = q // self.degree
- k = q % self.degree
-
- dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m
- tj = self.interval[0] + j * dm
- lim = [tj, tj + dm]
- c = np.zeros(shape=(self.degree, 1))
- c[k] = 1.
- bp = BPoly(c, lim)
- xa = np.maximum(tj, a)
- xb = np.minimum(tj + dm, b)
- psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False))
-
- elif self.d == 2:
- xa, xb = S.bounds[0, 0], S.bounds[0, 1]
- ya, yb = S.bounds[1, 0], S.bounds[1, 1]
- for z in range(self.get_m()):
- q1 = z // self.m
- q2 = z % self.m
-
- j1 = q1 // self.degree
- k1 = q1 % self.degree
- j2 = q2 // self.degree
- k2 = q2 % self.degree
-
- dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m
- tj1 = self.interval[0] + j1 * dm
- tj2 = self.interval[0] + j2 * dm
- lim1 = [tj1, tj1 + dm]
- lim2 = [tj2, tj2 + dm]
- c = np.zeros(shape=(self.degree, 1))
- c[k1] = 1.
- bp = BPoly(c, lim1)
- vol1 = bp.integrate(xa, xb)
- c = np.zeros(shape=(self.degree, 1))
- c[k2] = 1.
- bp = BPoly(c, lim2)
- vol2 = bp.integrate(ya, yb)
- psi[z] = vol1 * vol2
-
- Gamma_half = self.cov()
- return psi @ Gamma_half
+ def __init__(self, *args, degree=4, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.degree = degree
+
+ def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d
+ r"""
+ Return the value of basis function \phi_j(x)
+
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: \phi_j(x)
+ """
+
+ j = q // (self.degree // 2)
+ k = q % (self.degree // 2)
+
+ dm = (self.interval[1] - self.interval[0]) / (
+ (self.m // (self.degree // 2))
+ ) # delta m
+ tj = self.interval[0] + j * dm
+ lim = [tj, tj + 2 * dm]
+
+ c = np.zeros(shape=(self.degree // 2, 1))
+ c[k] = 1.0
+ bp = BPoly(c, lim)
+ res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate)
+
+ if extrapolate == False:
+ mask = x.numpy() == (tj + dm / 2)
+ res[mask] = np.nan
+ value = torch.from_numpy(np.nan_to_num(res))
+ return value
+
+ def integral(self, S):
+ assert S.d == self.d
+ psi = torch.zeros(self.get_m()).double()
+
+ if self.d == 1:
+ a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1])
+ for q in range(self.get_m()):
+ j = q // self.degree
+ k = q % self.degree
+
+ dm = (self.interval[1] - self.interval[0]) / (
+ (self.m // self.degree)
+ ) # delta m
+ tj = self.interval[0] + j * dm
+ lim = [tj, tj + dm]
+ c = np.zeros(shape=(self.degree, 1))
+ c[k] = 1.0
+ bp = BPoly(c, lim)
+ xa = np.maximum(tj, a)
+ xb = np.minimum(tj + dm, b)
+ psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False))
+
+ elif self.d == 2:
+ xa, xb = S.bounds[0, 0], S.bounds[0, 1]
+ ya, yb = S.bounds[1, 0], S.bounds[1, 1]
+ for z in range(self.get_m()):
+ q1 = z // self.m
+ q2 = z % self.m
+
+ j1 = q1 // self.degree
+ k1 = q1 % self.degree
+ j2 = q2 // self.degree
+ k2 = q2 % self.degree
+
+ dm = (self.interval[1] - self.interval[0]) / (
+ (self.m // self.degree)
+ ) # delta m
+ tj1 = self.interval[0] + j1 * dm
+ tj2 = self.interval[0] + j2 * dm
+ lim1 = [tj1, tj1 + dm]
+ lim2 = [tj2, tj2 + dm]
+ c = np.zeros(shape=(self.degree, 1))
+ c[k1] = 1.0
+ bp = BPoly(c, lim1)
+ vol1 = bp.integrate(xa, xb)
+ c = np.zeros(shape=(self.degree, 1))
+ c[k2] = 1.0
+ bp = BPoly(c, lim2)
+ vol2 = bp.integrate(ya, yb)
+ psi[z] = vol1 * vol2
+
+ Gamma_half = self.cov()
+ return psi @ Gamma_half
class BernsteinSplinesEmbedding(PositiveEmbedding):
- def __init__(self, *args, degree=4, derivatives=2, **kwargs):
- super().__init__(*args, **kwargs)
- self.degree = degree
- self.derivatives = derivatives
-
- # def basis_fun(self, x, j, k, derivative = 0, extrapolate = False): #1d
- def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d
- """
- Return the value of basis function \phi_j(x)
-
- :param x: double, need to be in the interval
- :param j: integer, index of hat functions, 0 <= j <= m-1
- :return: \phi_j(x)
- """
-
- j = q // self.degree
- k = q % self.degree
-
- dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m
- tj = self.interval[0] + j * dm
-
- lim = [tj, tj + dm]
- c = np.zeros(shape=(self.degree, 1))
- c[k] = 1.
- bp = BPoly(c, lim)
- res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate)
-
- if extrapolate == False:
- mask = x.numpy() == (tj + dm)
- res[mask] = np.nan
- value = torch.from_numpy(np.nan_to_num(res))
- return value
-
- def embed_internal_derivative(self, x, l=1, extrapolate=False):
- if self.d == 1:
- out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64)
- for j in range(0, self.m, 1):
- out[:, j] = self.basis_fun(x, j, derivative=l, extrapolate=extrapolate).view(-1)
- return out
-
- def get_constraints(self):
- s = self.m ** self.d
-
- # positivity constraints
- l = np.full(s, self.b)
- u = np.full(s, self.B)
- I = np.identity(s)
-
- # pointwise fix
- Zs = []
- vs = []
- for j in range(self.derivatives + 1):
- no_nodes = (self.m // self.degree) - 1
- Z = np.zeros(shape=(no_nodes, s))
- dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m
-
- for i in range(no_nodes):
- ti = torch.from_numpy(np.array(self.interval[0] + (i + 1) * dm)).view(1, -1)
- Z[i, i * self.degree:i * self.degree + self.degree] = self.embed_internal_derivative(ti, l=j,
- extrapolate=True).numpy().reshape(
- -1)[i * self.degree:i * self.degree + self.degree]
- Z[i, (i + 1) * self.degree:(i + 1) * self.degree + self.degree] = -self.embed_internal_derivative(ti,
- l=j,
- extrapolate=True).numpy().reshape(
- -1)[(i + 1) * self.degree:(i + 1) * self.degree + self.degree]
- v = np.zeros(self.m // self.degree - 1)
- Zs.append(Z)
- vs.append(v)
-
- Lambda = np.concatenate([I] + Zs)
- l = np.concatenate([l] + vs)
- u = np.concatenate([u] + vs)
- return (l, Lambda, u)
-
- def integral(self, S):
- assert (S.d == self.d)
- psi = torch.zeros(self.get_m()).double()
-
- if self.d == 1:
- a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1])
- for q in range(self.get_m()):
- j = q // self.degree
- k = q % self.degree
-
- dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m
- tj = self.interval[0] + j * dm
- lim = [tj, tj + dm]
- c = np.zeros(shape=(self.degree, 1))
- c[k] = 1.
- bp = BPoly(c, lim)
- xa = np.maximum(tj, a)
- xb = np.minimum(tj + dm, b)
- psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False))
-
- elif self.d == 2:
- xa, xb = S.bounds[0, 0], S.bounds[0, 1]
- ya, yb = S.bounds[1, 0], S.bounds[1, 1]
- for z in range(self.get_m()):
- q1 = z // self.m
- q2 = z % self.m
-
- j1 = q1 // self.degree
- k1 = q1 % self.degree
- j2 = q2 // self.degree
- k2 = q2 % self.degree
-
- dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m
- tj1 = self.interval[0] + j1 * dm
- tj2 = self.interval[0] + j2 * dm
- lim1 = [tj1, tj1 + dm]
- lim2 = [tj2, tj2 + dm]
- c = np.zeros(shape=(self.degree, 1))
- c[k1] = 1.
- bp = BPoly(c, lim1)
- vol1 = bp.integrate(xa, xb)
- c = np.zeros(shape=(self.degree, 1))
- c[k2] = 1.
- bp = BPoly(c, lim2)
- vol2 = bp.integrate(ya, yb)
- psi[z] = vol1 * vol2
-
- Gamma_half = self.cov()
- return psi @ Gamma_half
-
- def product_integral(self, S):
- pass
+ def __init__(self, *args, degree=4, derivatives=2, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.degree = degree
+ self.derivatives = derivatives
+
+ # def basis_fun(self, x, j, k, derivative = 0, extrapolate = False): #1d
+ def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d
+ r"""
+ Return the value of basis function \phi_j(x)
+
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: \phi_j(x)
+ """
+
+ j = q // self.degree
+ k = q % self.degree
+
+ dm = (self.interval[1] - self.interval[0]) / (
+ (self.m // self.degree)
+ ) # delta m
+ tj = self.interval[0] + j * dm
+
+ lim = [tj, tj + dm]
+ c = np.zeros(shape=(self.degree, 1))
+ c[k] = 1.0
+ bp = BPoly(c, lim)
+ res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate)
+
+ if extrapolate == False:
+ mask = x.numpy() == (tj + dm)
+ res[mask] = np.nan
+ value = torch.from_numpy(np.nan_to_num(res))
+ return value
+
+ def embed_internal_derivative(self, x, l=1, extrapolate=False):
+ if self.d == 1:
+ out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64)
+ for j in range(0, self.m, 1):
+ out[:, j] = self.basis_fun(
+ x, j, derivative=l, extrapolate=extrapolate
+ ).view(-1)
+ return out
+
+ def get_constraints(self):
+ s = self.m**self.d
+
+ # positivity constraints
+ l = np.full(s, self.b)
+ u = np.full(s, self.B)
+ I = np.identity(s)
+
+ # pointwise fix
+ Zs = []
+ vs = []
+ for j in range(self.derivatives + 1):
+ no_nodes = (self.m // self.degree) - 1
+ Z = np.zeros(shape=(no_nodes, s))
+ dm = (self.interval[1] - self.interval[0]) / (
+ (self.m // self.degree)
+ ) # delta m
+
+ for i in range(no_nodes):
+ ti = torch.from_numpy(np.array(self.interval[0] + (i + 1) * dm)).view(
+ 1, -1
+ )
+ Z[i, i * self.degree : i * self.degree + self.degree] = (
+ self.embed_internal_derivative(ti, l=j, extrapolate=True)
+ .numpy()
+ .reshape(-1)[i * self.degree : i * self.degree + self.degree]
+ )
+ Z[i, (i + 1) * self.degree : (i + 1) * self.degree + self.degree] = (
+ -self.embed_internal_derivative(ti, l=j, extrapolate=True)
+ .numpy()
+ .reshape(-1)[
+ (i + 1) * self.degree : (i + 1) * self.degree + self.degree
+ ]
+ )
+ v = np.zeros(self.m // self.degree - 1)
+ Zs.append(Z)
+ vs.append(v)
+
+ Lambda = np.concatenate([I] + Zs)
+ l = np.concatenate([l] + vs)
+ u = np.concatenate([u] + vs)
+ return (l, Lambda, u)
+
+ def integral(self, S):
+ assert S.d == self.d
+ psi = torch.zeros(self.get_m()).double()
+
+ if self.d == 1:
+ a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1])
+ for q in range(self.get_m()):
+ j = q // self.degree
+ k = q % self.degree
+
+ dm = (self.interval[1] - self.interval[0]) / (
+ (self.m // self.degree)
+ ) # delta m
+ tj = self.interval[0] + j * dm
+ lim = [tj, tj + dm]
+ c = np.zeros(shape=(self.degree, 1))
+ c[k] = 1.0
+ bp = BPoly(c, lim)
+ xa = np.maximum(tj, a)
+ xb = np.minimum(tj + dm, b)
+ psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False))
+
+ elif self.d == 2:
+ xa, xb = S.bounds[0, 0], S.bounds[0, 1]
+ ya, yb = S.bounds[1, 0], S.bounds[1, 1]
+ for z in range(self.get_m()):
+ q1 = z // self.m
+ q2 = z % self.m
+
+ j1 = q1 // self.degree
+ k1 = q1 % self.degree
+ j2 = q2 // self.degree
+ k2 = q2 % self.degree
+
+ dm = (self.interval[1] - self.interval[0]) / (
+ (self.m // self.degree)
+ ) # delta m
+ tj1 = self.interval[0] + j1 * dm
+ tj2 = self.interval[0] + j2 * dm
+ lim1 = [tj1, tj1 + dm]
+ lim2 = [tj2, tj2 + dm]
+ c = np.zeros(shape=(self.degree, 1))
+ c[k1] = 1.0
+ bp = BPoly(c, lim1)
+ vol1 = bp.integrate(xa, xb)
+ c = np.zeros(shape=(self.degree, 1))
+ c[k2] = 1.0
+ bp = BPoly(c, lim2)
+ vol2 = bp.integrate(ya, yb)
+ psi[z] = vol1 * vol2
+
+ Gamma_half = self.cov()
+ return psi @ Gamma_half
+
+ def product_integral(self, S):
+ pass
if __name__ == "__main__":
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- from stpy.helpers.helper import interval
- import matplotlib.pyplot as plt
- from stpy.kernels import KernelFunction
- from stpy.embeddings.bump_bases import FaberSchauderEmbedding
-
- d = 1
- m = 32
- n = 64
- N = 10
-
- sqrtbeta = 2
- s = 0.001
- b = 0.0
- B = 200
-
- gamma = 0.1
- kernel_object = KernelFunction(gamma=gamma)
-
- # Emb = BernsteinSplinesEmbedding(d, m,kernel_object=kernel_object, offset=0.5,b=b,B=B,s = s)
- EmbBern = BernsteinEmbedding(d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s)
- EmbFaber = FaberSchauderEmbedding(d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s)
- GP = GaussianProcess(d=d, s=s, kernel=kernel_object)
- # GPNyst = KernelizedFeatures(embedding=EmbNys.GP,m = m, s = s,)
-
- xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.1))
- x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1)
-
- F_true = lambda x: torch.sin(x) ** 2 - 0.1
- F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
- y = F(x)
-
- # Emb.fit_gp(x,y)
- EmbBern.fit(x, y)
- EmbFaber.fit(x, y)
-
- GP.fit_gp(x, y)
-
- # mu = Emb.mean_std(xtest)
- mu_true, _ = GP.mean_std(xtest)
- mu_bern = EmbBern.mean_std(xtest)
- mu_faber = EmbFaber.mean_std(xtest)
-
- plt.plot(xtest, xtest * 0 + b, 'k--')
- # plt.plot(xtest, xtest * 0 + B, 'k--')
-
- plt.plot(xtest, F_true(xtest), 'r', label='true')
- # plt.plot(xtest,mu_true_nyst,color = 'lightblue', label = 'Nystrom')
- plt.plot(xtest, mu_true, 'b--', label='no-constraints')
-
- plt.plot(x, y, 'ro')
- # plt.plot(xtest, mu, 'g-x', label = 'splines Bernstein')
- plt.plot(xtest, mu_bern, 'y-o', label='Bernstein basis')
- plt.plot(xtest, mu_faber, 'g-o', label='Faber basis')
- plt.legend()
- plt.show()
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+ from stpy.helpers.helper import interval
+ import matplotlib.pyplot as plt
+ from stpy.kernels import KernelFunction
+ from stpy.embeddings.bump_bases import FaberSchauderEmbedding
+
+ d = 1
+ m = 32
+ n = 64
+ N = 10
+
+ sqrtbeta = 2
+ s = 0.001
+ b = 0.0
+ B = 200
+
+ gamma = 0.1
+ kernel_object = KernelFunction(gamma=gamma)
+
+ # Emb = BernsteinSplinesEmbedding(d, m,kernel_object=kernel_object, offset=0.5,b=b,B=B,s = s)
+ EmbBern = BernsteinEmbedding(
+ d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s
+ )
+ EmbFaber = FaberSchauderEmbedding(
+ d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s
+ )
+ GP = GaussianProcess(d=d, s=s, kernel=kernel_object)
+ # GPNyst = KernelizedFeatures(embedding=EmbNys.GP,m = m, s = s,)
+
+ xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.1))
+ x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1)
+
+ F_true = lambda x: torch.sin(x) ** 2 - 0.1
+ F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
+ y = F(x)
+
+ # Emb.fit_gp(x,y)
+ EmbBern.fit(x, y)
+ EmbFaber.fit(x, y)
+
+ GP.fit_gp(x, y)
+
+ # mu = Emb.mean_std(xtest)
+ mu_true, _ = GP.mean_std(xtest)
+ mu_bern = EmbBern.mean_std(xtest)
+ mu_faber = EmbFaber.mean_std(xtest)
+
+ plt.plot(xtest, xtest * 0 + b, "k--")
+ # plt.plot(xtest, xtest * 0 + B, 'k--')
+
+ plt.plot(xtest, F_true(xtest), "r", label="true")
+ # plt.plot(xtest,mu_true_nyst,color = 'lightblue', label = 'Nystrom')
+ plt.plot(xtest, mu_true, "b--", label="no-constraints")
+
+ plt.plot(x, y, "ro")
+ # plt.plot(xtest, mu, 'g-x', label = 'splines Bernstein')
+ plt.plot(xtest, mu_bern, "y-o", label="Bernstein basis")
+ plt.plot(xtest, mu_faber, "g-o", label="Faber basis")
+ plt.legend()
+ plt.show()
diff --git a/stpy/embeddings/bump_bases.py b/stpy/embeddings/bump_bases.py
index 3e71555..5d86e2c 100644
--- a/stpy/embeddings/bump_bases.py
+++ b/stpy/embeddings/bump_bases.py
@@ -11,120 +11,119 @@
class TriangleEmbedding(PositiveEmbedding):
- def __init__(self, *args, **kwargs):
-
- super().__init__(*args, **kwargs)
-
- def basis_fun(self, x, j):
- """
- Return the value of basis function \phi_j(x)
-
- :param x: double, need to be in the interval
- :param j: integer, index of hat functions, 0 <= j <= m-1
- :return: \phi_j(x)
- """
-
- dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
- tj = self.interval[0] + (j) * dm
- res = 1 - torch.abs((x - tj) / dm)
- res[res < 0] = 0
- return res
-
- def integrate_1d(self, a, b, tj, dm):
- """
- :param a: from
- :param b: to
- :param tj: node
- :param dm: width
- :return:
- """
- if a <= tj - dm and b >= tj + dm: # contained
- vol = 1. * dm
-
- elif a >= tj + dm or b <= tj - dm: # outside
- vol = 0.
-
- elif a <= tj - dm and b >= tj and b <= tj + dm: # a out , b inside second half
- res = max(1. - np.abs((b - tj) / dm), 0)
- vol = dm * 0.5 + (b - tj) * (1. + res) / 2.
-
- elif b >= tj + dm and a <= tj and a >= tj - dm: # b out, a inside first half
- res = max(1. - np.abs((a - tj) / dm), 0)
- vol = dm * 0.5 + (tj - a) * (1. + res) / 2.
-
- elif a <= tj - dm and b <= tj and b >= tj - dm: # a out, b inside first half
- res = max(1. - np.abs((b - tj) / dm), 0)
- vol = 0.5 * (b - (tj - dm)) * res
-
- elif b >= tj + dm and a >= tj and a <= tj + dm: # b out, a inside second half
- res = max(1. - np.abs((a - tj) / dm), 0)
- vol = 0.5 * ((tj + dm) - a) * res
-
-
- else: # inside
- resa = max(1. - np.abs((a - tj) / dm), 0)
- resb = max(1. - np.abs((b - tj) / dm), 0)
-
- if b <= tj:
- vol = (b - a) * (resb + resa) / 2.
- elif a >= tj:
- vol = (b - a) * (resa + resb) / 2.
- else:
- vol = (tj - a) * (1 + resa) / 2. + (b - tj) * (resb + 1) / 2.
-
- return vol
-
- def integral(self, S):
- """
- Integrate the Phi(x) over S
- :param S: borel set
- :return:
- """
- if S in self.procomp_integrals.keys():
- return self.procomp_integrals[S]
-
-
- else:
- assert (S.d == self.d)
- psi = torch.zeros(self.get_m()).double()
- if S.type == "box":
- if self.d == 1:
- dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
- a, b = S.bounds[0, 0], S.bounds[0, 1]
- for j in range(self.get_m()):
- tj = self.interval[0] + j * dm
- vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm)
- psi[j] = vol
-
- elif self.d == 2:
- dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
-
- xa, xb = S.bounds[0, 0], S.bounds[0, 1]
- ya, yb = S.bounds[1, 0], S.bounds[1, 1]
-
- for j in range(self.get_m()):
- tj = self.interval[0] + (j % self.m) * dm
- tk = self.interval[0] + (j // self.m) * dm
-
- # triangle center point
- # center_point = torch.Tensor( [tj,tk]).view(-1,1)
- vol = self.integrate_1d(xa.numpy(), xb.numpy(), tk, dm)
- vol2 = self.integrate_1d(ya.numpy(), yb.numpy(), tj, dm)
- psi[j] = vol * vol2
- # if torch.sum(S.is_inside(center_point)):
- # psi[j] = (dm**2)/3.
- else:
- raise ("more than 2D not implemented.")
-
- elif S.type == "round":
- weights, nodes = S.return_legendre_discretization(30)
- vals = self.embed_internal(nodes)
- psi = weights.view(1, -1) @ vals
-
- Gamma_half = self.cov()
- emb = psi @ Gamma_half
- self.procomp_integrals[S] = emb
- return emb
+ def __init__(self, *args, **kwargs):
+
+ super().__init__(*args, **kwargs)
+
+ def basis_fun(self, x, j):
+ r"""
+ Return the value of basis function \phi_j(x)
+
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: \phi_j(x)
+ """
+
+ dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
+ tj = self.interval[0] + (j) * dm
+ res = 1 - torch.abs((x - tj) / dm)
+ res[res < 0] = 0
+ return res
+
+ def integrate_1d(self, a, b, tj, dm):
+ """
+ :param a: from
+ :param b: to
+ :param tj: node
+ :param dm: width
+ :return:
+ """
+ if a <= tj - dm and b >= tj + dm: # contained
+ vol = 1.0 * dm
+
+ elif a >= tj + dm or b <= tj - dm: # outside
+ vol = 0.0
+
+ elif a <= tj - dm and b >= tj and b <= tj + dm: # a out , b inside second half
+ res = max(1.0 - np.abs((b - tj) / dm), 0)
+ vol = dm * 0.5 + (b - tj) * (1.0 + res) / 2.0
+
+ elif b >= tj + dm and a <= tj and a >= tj - dm: # b out, a inside first half
+ res = max(1.0 - np.abs((a - tj) / dm), 0)
+ vol = dm * 0.5 + (tj - a) * (1.0 + res) / 2.0
+
+ elif a <= tj - dm and b <= tj and b >= tj - dm: # a out, b inside first half
+ res = max(1.0 - np.abs((b - tj) / dm), 0)
+ vol = 0.5 * (b - (tj - dm)) * res
+
+ elif b >= tj + dm and a >= tj and a <= tj + dm: # b out, a inside second half
+ res = max(1.0 - np.abs((a - tj) / dm), 0)
+ vol = 0.5 * ((tj + dm) - a) * res
+
+ else: # inside
+ resa = max(1.0 - np.abs((a - tj) / dm), 0)
+ resb = max(1.0 - np.abs((b - tj) / dm), 0)
+
+ if b <= tj:
+ vol = (b - a) * (resb + resa) / 2.0
+ elif a >= tj:
+ vol = (b - a) * (resa + resb) / 2.0
+ else:
+ vol = (tj - a) * (1 + resa) / 2.0 + (b - tj) * (resb + 1) / 2.0
+
+ return vol
+
+ def integral(self, S):
+ r"""
+ Integrate the Phi(x) over S
+ :param S: borel set
+ :return: $\int_S \Phi(x) dx$
+ """
+ if S in self.precomp_integral.keys():
+ return self.precomp_integral[S]
+
+ else:
+ assert S.d == self.d
+ psi = torch.zeros(self.get_m()).double()
+ if S.type == "box":
+ if self.d == 1:
+ dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
+ a, b = S.bounds[0, 0], S.bounds[0, 1]
+ for j in range(self.get_m()):
+ tj = self.interval[0] + j * dm
+ vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm)
+ psi[j] = float(vol)
+
+ elif self.d == 2:
+ dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
+
+ xa, xb = S.bounds[0, 0], S.bounds[0, 1]
+ ya, yb = S.bounds[1, 0], S.bounds[1, 1]
+
+ for j in range(self.get_m()):
+ tj = self.interval[0] + (j % self.m) * dm
+ tk = self.interval[0] + (j // self.m) * dm
+
+ # triangle center point
+ # center_point = torch.tensor( [tj,tk]).view(-1,1)
+ vol = self.integrate_1d(xa.numpy(), xb.numpy(), tk, dm)
+ vol2 = self.integrate_1d(ya.numpy(), yb.numpy(), tj, dm)
+ psi[j] = float(vol * vol2)
+ # if torch.sum(S.is_inside(center_point)):
+ # psi[j] = (dm**2)/3.
+ else:
+ raise ("more than 2D not implemented.")
+
+ elif S.type == "round":
+ weights, nodes = S.return_legendre_discretization(30)
+ vals = self.embed_internal(nodes)
+ psi = weights.view(1, -1) @ vals
+
+ Gamma_half = self.cov()
+ emb = psi @ Gamma_half
+ self.precomp_integral[S] = emb
+ return emb
+
# def product_integral(self, S):
# assert( S.d == self.d)
@@ -145,286 +144,303 @@ def integral(self, S):
class FaberSchauderEmbedding(TriangleEmbedding):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- if int(np.log2(self.m)) != np.log2(self.m):
- raise AssertionError("This basis works only with log_2(n) is integer.")
-
- def basis_fun(self, x, j):
- """
- Return the value of basis function \phi_j(x)
-
- :param x: double, need to be in the interval
- :param j: integer, index of hat functions, 0 <= j <= m-1
- :return: \phi_j(x)
- """
- if j == 0:
- res = x * 0 + 1
- elif j == 1:
- dm = (self.interval[1] - self.interval[0]) / 2 # delta m
- res = 1 - torch.abs((x) / dm)
- res[res < 0] = 0
- else:
- level = np.floor(np.log2(j))
- start = 2 ** level
- dm = (self.interval[1] - self.interval[0]) / (2 * start)
- tj = self.interval[0] + (j - start) * 2 * dm + dm
- res = 1 - torch.abs((x - tj) / dm)
- res[res < 0] = 0
- return res
-
- def integral(self, S):
- assert (S.d == self.d)
- psi = torch.zeros(self.get_m()).double()
-
- if self.d == 1:
- a, b = S.bounds[0, 0], S.bounds[0, 1]
- for j in range(self.get_m()):
- if j == 0:
- vol = (b - a)
- elif j == 1:
- dm = (self.interval[1] - self.interval[0]) / 2 # delta m
- vol = self.integrate_1d(a.numpy(), b.numpy(), 0, dm)
- else:
- level = np.floor(np.log2(j))
- start = 2 ** level
- dm = (self.interval[1] - self.interval[0]) / (2 * start)
- tj = self.interval[0] + (j - start) * 2 * dm + dm
- vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm)
- psi[j] = vol
- return psi
-
- def hierarchical_mask(self):
- mask = [0]
- for i in range(int(np.log2(self.m))):
- for j in range(2**i):
- mask.append(i+1)
- return torch.Tensor(mask)
-
- def product_integral(self):
- raise NotImplementedError("Not implemented.")
- pass
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ if int(np.log2(self.m)) != np.log2(self.m):
+ raise AssertionError("This basis works only with log_2(n) is integer.")
+
+ def basis_fun(self, x, j):
+ r"""
+ Return the value of basis function \phi_j(x)
+
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: \phi_j(x)
+ """
+ if j == 0:
+ res = x * 0 + 1
+ elif j == 1:
+ dm = (self.interval[1] - self.interval[0]) / 2 # delta m
+ res = 1 - torch.abs((x) / dm)
+ res[res < 0] = 0
+ else:
+ level = np.floor(np.log2(j))
+ start = 2**level
+ dm = (self.interval[1] - self.interval[0]) / (2 * start)
+ tj = self.interval[0] + (j - start) * 2 * dm + dm
+ res = 1 - torch.abs((x - tj) / dm)
+ res[res < 0] = 0
+ return res
+
+ def integral(self, S):
+ assert S.d == self.d
+ psi = torch.zeros(self.get_m()).double()
+
+ if self.d == 1:
+ a, b = S.bounds[0, 0], S.bounds[0, 1]
+ for j in range(self.get_m()):
+ if j == 0:
+ vol = b - a
+ elif j == 1:
+ dm = (self.interval[1] - self.interval[0]) / 2 # delta m
+ vol = self.integrate_1d(a.numpy(), b.numpy(), 0, dm)
+ else:
+ level = np.floor(np.log2(j))
+ start = 2**level
+ dm = (self.interval[1] - self.interval[0]) / (2 * start)
+ tj = self.interval[0] + (j - start) * 2 * dm + dm
+ vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm)
+ psi[j] = vol
+ return psi
+
+ def hierarchical_mask(self):
+ mask = [0]
+ for i in range(int(np.log2(self.m))):
+ for j in range(2**i):
+ mask.append(i + 1)
+ return torch.tensor(mask)
+
+ def product_integral(self):
+ raise NotImplementedError("Not implemented.")
+ pass
class KuhnExponentialEmbedding(PositiveEmbedding):
- """
- Basis from: Covering numbers of Gaussian reproducing kernel Hilbert spaces
- by Thomas Kuhn
+ """
+ Basis from: Covering numbers of Gaussian reproducing kernel Hilbert spaces
+ by Thomas Kuhn
- """
+ """
- def __init__(self, *args, gamma=0.1, **kwargs):
- super().__init__(self, *args, **kwargs)
- self.gamma = gamma
+ def __init__(self, *args, gamma=0.1, **kwargs):
+ super().__init__(self, *args, **kwargs)
+ self.gamma = gamma
- def basis_fun(self, x, j):
- k = np.exp(j / 2 * np.log(1. / self.gamma) - (j / 2) * scipy.special.gammaln(j + 1))
- res = k * (x ** j) * torch.exp(- (x ** 2) / (2 * self.gamma ** 2))
- mask1 = x < 0
- mask2 = x > 1
- res[mask1] = 0.
- res[mask2] = 0.
- return res
+ def basis_fun(self, x, j):
+ k = np.exp(
+ j / 2 * np.log(1.0 / self.gamma) - (j / 2) * scipy.special.gammaln(j + 1)
+ )
+ res = k * (x**j) * torch.exp(-(x**2) / (2 * self.gamma**2))
+ mask1 = x < 0
+ mask2 = x > 1
+ res[mask1] = 0.0
+ res[mask2] = 0.0
+ return res
class CustomHaarBumps(PositiveEmbedding):
- """
+ """
- Custom Haar basis that cover different sized pockets of domain
+ Custom Haar basis that cover different sized pockets of domain
- """
+ """
- # def __init__(self, *args, **kwargs):
- # super().__init__(self,*args, **kwargs)
- # nodes = None
- # widths = None
- # self.nodes = nodes
- # self.widths = widths
+ # def __init__(self, *args, **kwargs):
+ # super().__init__(self,*args, **kwargs)
+ # nodes = None
+ # widths = None
+ # self.nodes = nodes
+ # self.widths = widths
- def __init__(self, d, m, nodes, widths, weights, **kwargs):
- super().__init__(d, m, **kwargs)
- self.nodes = nodes
- self.widths = widths
- self.weights = weights
+ def __init__(self, d, m, nodes, widths, weights, **kwargs):
+ super().__init__(d, m, **kwargs)
+ self.nodes = nodes
+ self.widths = widths
+ self.weights = weights
- def basis_fun(self, x, j):
+ def basis_fun(self, x, j):
- if self.nodes is None or self.widths is None:
- super().basis_fun(x, j)
- else:
- mask = np.abs(x - self.nodes[j]) < self.widths[j]
- out = x * 0
- out[mask] = self.weights[j]
- return out
+ if self.nodes is None or self.widths is None:
+ super().basis_fun(x, j)
+ else:
+ mask = np.abs(x - self.nodes[j]) < self.widths[j]
+ out = x * 0
+ out[mask] = self.weights[j]
+ return out
class BumpsEmbedding(PositiveEmbedding):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
- def integrate(self, a, b, j):
- vol = 0.
- return vol
+ def integrate(self, a, b, j):
+ vol = 0.0
+ return vol
- def integral(self, S):
- """
- Integrate the Phi(x) over S
- :param S: borel set
- :return:
- """
- assert (S.d == self.d)
- psi = torch.zeros(self.get_m()).double()
+ def integral(self, S):
+ """
+ Integrate the Phi(x) over S
+ :param S: borel set
+ :return:
+ """
+ assert S.d == self.d
+ psi = torch.zeros(self.get_m()).double()
- a, b = S.bounds[0, 0], S.bounds[0, 1]
- for j in range(self.get_m()):
- vol = self.integrate(a.numpy(), b.numpy(), j)
- psi[j] = vol
+ a, b = S.bounds[0, 0], S.bounds[0, 1]
+ for j in range(self.get_m()):
+ vol = self.integrate(a.numpy(), b.numpy(), j)
+ psi[j] = vol
- def basis_fun(self, x, j): # 1d
- """
- Return the value of basis function \phi_j(x)
+ def basis_fun(self, x, j): # 1d
+ r"""
+ Return the value of basis function \phi_j(x)
- :param x: double, need to be in the interval
- :param j: integer, index of hat functions, 0 <= j <= m-1
- :return: \phi_j(x)
- """
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: \phi_j(x)
+ """
- dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
- tj = self.interval[0] + (j) * dm
- res = -(x - tj) * (x - (tj + (2 * dm))) * (1. / (dm ** 2))
- res[res < 0] = 0
- return res
+ dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
+ tj = self.interval[0] + (j) * dm
+ res = -(x - tj) * (x - (tj + (2 * dm))) * (1.0 / (dm**2))
+ res[res < 0] = 0
+ return res
class PositiveNystromEmbeddingBump(PositiveEmbedding):
- def __init__(self, *args, samples=300, **kwargs):
- super().__init__(*args, **kwargs)
- self.samples = np.maximum(samples, self.m)
-
- B = BorelSet(1, torch.Tensor([[self.interval[0], self.interval[1]]]).double())
- x = B.return_discretization(256)
- y = x[:, 0].view(-1, 1) * 0
-
- print("Starting optimal basis construction, with m =", self.m)
- self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel,
- gamma=self.kernel_object.gamma)
- self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx='positive_svd',
- samples=self.samples)
- self.GP.fit_gp(x, y)
- print("Optimal basis constructed.")
- if torch.sum(torch.isnan(self.GP.embed(x))) > 0:
- print("Failed basis? (zero is good):", torch.sum(torch.isnan(self.GP.embed(x))))
-
- self.precomp_integral = {}
-
- def basis_fun(self, x, j):
- return self.GP.embed(x)[:, j].view(-1, 1)
-
- def get_constraints(self):
- s = self.m ** self.d
- l = np.full(s, 0.0).astype(float)
- u = np.full(s, 10e10)
- Lambda = np.identity(s)
- return (l, Lambda, u)
-
- def integral(self, S):
- assert (S.d == self.d)
-
- if S in self.precomp_integral.keys():
- return self.precomp_integral[S]
- else:
- if S.d == 1:
- weights, nodes = S.return_legendre_discretization(n=256)
- psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0)
- Gamma_half = self.cov()
- psi = Gamma_half.T @ psi
- self.precomp_integral[S] = psi
- elif S.d == 2:
- weights, nodes = S.return_legendre_discretization(n=50)
- vals = self.embed_internal(nodes)
- psi = torch.sum(torch.diag(weights) @ vals, dim=0)
- Gamma_half = self.cov()
- psi = Gamma_half.T @ psi
- self.precomp_integral[S] = psi
- if torch.sum(torch.isnan(psi)) > 0:
- print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi)))
-
- else:
- raise NotImplementedError("Higher dimension not implemented.")
- return psi
-
- def cov(self, inverse=False):
-
- if self.precomp == False:
-
- x = torch.linspace(self.interval[0], self.interval[1], 256)
- vals = self.GP.embed(x)
- indices = torch.argmax(vals, dim=0)
- t = x[indices]
-
- if self.d == 1:
- t = t.view(-1, 1).double()
- elif self.d == 2:
- t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double()
- elif self.d == 3:
- t = torch.from_numpy(cartesian([t.numpy(), t.numpy(), t.numpy()])).double()
-
- self.Gamma = self.kernel(t, t)
- Z = self.embed_internal(t)
-
- M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0]))
- self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy())))
-
- # self.Gamma_half = torch.cholesky(Gamma \
- # + self.s * self.s * torch.eye(Gamma.size()[0]).double(), upper = True )
-
- self.Gamma_half = torch.from_numpy(
- np.real(scipy.linalg.sqrtm(self.Gamma.numpy() + (self.s ** 2) * np.eye(self.Gamma.size()[0]))))
- self.Gamma_half = self.M @ self.Gamma_half
- self.invGamma_half = torch.pinverse(self.Gamma_half)
- self.precomp = True
- else:
- pass
-
- if inverse == True:
- return self.Gamma_half, self.invGamma_half
- else:
- return self.Gamma_half
+ def __init__(self, *args, samples=300, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.samples = np.maximum(samples, self.m)
+
+ B = BorelSet(1, torch.tensor([[self.interval[0], self.interval[1]]]).double())
+ x = B.return_discretization(256)
+ y = x[:, 0].view(-1, 1) * 0
+
+ print("Starting optimal basis construction, with m =", self.m)
+ self.new_kernel_object = KernelFunction(
+ kernel_name=self.kernel_object.optkernel, gamma=self.kernel_object.gamma
+ )
+ self.GP = NystromFeatures(
+ self.new_kernel_object,
+ m=self.m,
+ approx="positive_svd",
+ samples=self.samples,
+ )
+ self.GP.fit_gp(x, y)
+ print("Optimal basis constructed.")
+ if torch.sum(torch.isnan(self.GP.embed(x))) > 0:
+ print(
+ "Failed basis? (zero is good):",
+ torch.sum(torch.isnan(self.GP.embed(x))),
+ )
+
+ self.precomp_integral = {}
+
+ def basis_fun(self, x, j):
+ return self.GP.embed(x)[:, j].view(-1, 1)
+
+ def get_constraints(self):
+ s = self.m**self.d
+ l = np.full(s, 0.0).astype(float)
+ u = np.full(s, 10e10)
+ Lambda = np.identity(s)
+ return (l, Lambda, u)
+
+ def integral(self, S):
+ assert S.d == self.d
+
+ if S in self.precomp_integral.keys():
+ return self.precomp_integral[S]
+ else:
+ if S.d == 1:
+ weights, nodes = S.return_legendre_discretization(n=256)
+ psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0)
+ Gamma_half = self.cov()
+ psi = Gamma_half.T @ psi
+ self.precomp_integral[S] = psi
+ elif S.d == 2:
+ weights, nodes = S.return_legendre_discretization(n=50)
+ vals = self.embed_internal(nodes)
+ psi = torch.sum(torch.diag(weights) @ vals, dim=0)
+ Gamma_half = self.cov()
+ psi = Gamma_half.T @ psi
+ self.precomp_integral[S] = psi
+ if torch.sum(torch.isnan(psi)) > 0:
+ print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi)))
+
+ else:
+ raise NotImplementedError("Higher dimension not implemented.")
+ return psi
+
+ def cov(self, inverse=False):
+
+ if self.precomp == False:
+
+ x = torch.linspace(self.interval[0], self.interval[1], 256)
+ vals = self.GP.embed(x)
+ indices = torch.argmax(vals, dim=0)
+ t = x[indices]
+
+ if self.d == 1:
+ t = t.view(-1, 1).double()
+ elif self.d == 2:
+ t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double()
+ elif self.d == 3:
+ t = torch.from_numpy(
+ cartesian([t.numpy(), t.numpy(), t.numpy()])
+ ).double()
+
+ self.Gamma = self.kernel(t, t)
+ Z = self.embed_internal(t)
+
+ M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0]))
+ self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy())))
+
+ # self.Gamma_half = torch.cholesky(Gamma \
+ # + self.s * self.s * torch.eye(Gamma.size()[0]).double(), upper = True )
+
+ self.Gamma_half = torch.from_numpy(
+ np.real(
+ scipy.linalg.sqrtm(
+ self.Gamma.numpy() + (self.s**2) * np.eye(self.Gamma.size()[0])
+ )
+ )
+ )
+ self.Gamma_half = self.M @ self.Gamma_half
+ self.invGamma_half = torch.pinverse(self.Gamma_half)
+ self.precomp = True
+ else:
+ pass
+
+ if inverse == True:
+ return self.Gamma_half, self.invGamma_half
+ else:
+ return self.Gamma_half
if __name__ == "__main__":
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- from stpy.helpers.helper import interval
- import matplotlib.pyplot as plt
-
- d = 1
- m = 32
- n = 64
- N = 20
- sqrtbeta = 2
- s = 0.01
- b = 0
- gamma = 0.1
- k = KernelFunction(gamma=gamma)
-
- Emb = FaberSchauderEmbedding(d, m, offset=0.2, s=s, b=b, B=1000., kernel_object=k)
- GP = GaussianProcess(d=d, s=s)
- xtest = torch.from_numpy(interval(n, d))
-
- x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1)
-
- F_true = lambda x: torch.sin(x) ** 2 - 0.1
- F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
- y = F(x)
- Emb.fit(x, y)
- GP.fit_gp(x, y)
- mu = Emb.mean_std(xtest)
- mu_true, _ = GP.mean_std(xtest)
- plt.plot(xtest, F_true(xtest), 'b', label='true')
- plt.plot(xtest, mu_true, 'b--', label='GP')
- plt.plot(x, y, 'ro')
- plt.plot(xtest, mu, 'g-', label='positive basis ')
- plt.legend()
- plt.show()
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+ from stpy.helpers.helper import interval
+ import matplotlib.pyplot as plt
+
+ d = 1
+ m = 32
+ n = 64
+ N = 20
+ sqrtbeta = 2
+ s = 0.01
+ b = 0
+ gamma = 0.1
+ k = KernelFunction(gamma=gamma)
+
+ Emb = FaberSchauderEmbedding(d, m, offset=0.2, s=s, b=b, B=1000.0, kernel_object=k)
+ GP = GaussianProcess(d=d, s=s)
+ xtest = torch.from_numpy(interval(n, d))
+
+ x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1)
+
+ F_true = lambda x: torch.sin(x) ** 2 - 0.1
+ F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
+ y = F(x)
+ Emb.fit(x, y)
+ GP.fit_gp(x, y)
+ mu = Emb.mean_std(xtest)
+ mu_true, _ = GP.mean_std(xtest)
+ plt.plot(xtest, F_true(xtest), "b", label="true")
+ plt.plot(xtest, mu_true, "b--", label="GP")
+ plt.plot(x, y, "ro")
+ plt.plot(xtest, mu, "g-", label="positive basis ")
+ plt.legend()
+ plt.show()
diff --git a/stpy/embeddings/embedding.py b/stpy/embeddings/embedding.py
index f8a5394..3d2ac08 100755
--- a/stpy/embeddings/embedding.py
+++ b/stpy/embeddings/embedding.py
@@ -6,7 +6,7 @@
__email__ = "mojmir.mutny@inf.ethz.ch"
__status__ = "DEV"
-"""
+r"""
This file implements code used in paper:
Mojmir Mutny & Andreas Krause, "Efficient High Dimensional Bayesian Optimization
@@ -50,83 +50,105 @@
import stpy.helpers.quadrature_helper as quad_help
-class Embedding():
- """
- Base class for Embeddings to approximate kernels with a higher dimensional linear product.
- """
-
- def __init__(self, gamma=0.1, nu=0.5, m=100, d=1, diameter=1.0, groups=None, kappa=1.0,
- kernel="squared_exponential", cosine=False, approx="rff", **kwargs):
- """
- Called to calculate the embedding weights (either via sampling or deterministically)
-
- Args:
- gamma: (positional, 0.1) bandwidth of the squared exponential kernel
- nu: (positional, 0.5) the parameter of Matern family
- m: (positional, 1)
- d: dimension of the
-
- Returns:
- None
- """
- self.gamma = float(gamma)
- self.n = nu
- self.m = int(m)
- self.d = int(d)
- self.nu = nu
- self.kappa = kappa
- self.cosine = cosine
- self.diameter = diameter
- self.groups = groups
- self.kernel = kernel
- self.approx = approx
- self.gradient_avail = 0
- if self.m % 2 == 1:
- raise AssertionError("Number of random features has to be even.")
-
- def sample(self):
- """
- Called to calculate the embedding weights (either via sampling or deterministically)
-
- Args:
- None
-
- Returns:
- None
- """
- raise AttributeError("Only derived classes can call this method.")
-
- def embed(self, x):
- """
- Called to calculate the embedding weights (either via sampling or deterministically)
-
- Args:
- x: numpy array containing the points to be embedded in the format (n,d)
-
- Returns:
- y: numpy array containg the embedded points (n,m), where m is the embedding dimension
- """
-
- raise AttributeError("Only derived classes can call this method.")
-
- def get_m(self):
- """
-
- :return:
-
- """
- return self.m
-
- def integral(self, S):
- a = S.bounds[:, 0]
- b = S.bounds[:, 1]
- psi = torch.zeros(self.m).double()
-
- for i in range(self.m // 2):
- omegas = self.W[i, :].view(-1)
- psi[i] = quad_help.integrate_cos_multidimensional(a.numpy(), b.numpy(), omegas.numpy())
- psi[self.m // 2 + i] = quad_help.integrate_sin_multidimensional(a.numpy(), b.numpy(), omegas.numpy())
- return psi
+class Embedding:
+ """
+ Base class for Embeddings to approximate kernels with a higher dimensional linear product.
+ """
+
+ def __init__(
+ self,
+ gamma=0.1,
+ nu=0.5,
+ m=100,
+ d=1,
+ diameter=1.0,
+ groups=None,
+ kappa=1.0,
+ kernel="squared_exponential",
+ cosine=False,
+ approx="rff",
+ **kwargs
+ ):
+ """
+ Called to calculate the embedding weights (either via sampling or deterministically)
+
+ Args:
+ gamma: (positional, 0.1) bandwidth of the squared exponential kernel
+ nu: (positional, 0.5) the parameter of Matern family
+ m: (positional, 1)
+ d: dimension of the
+
+ Returns:
+ None
+ """
+ self.gamma = float(gamma)
+ self.n = nu
+ self.m = int(m)
+ self.d = int(d)
+ self.nu = nu
+ self.kappa = kappa
+ self.cosine = cosine
+ self.diameter = diameter
+ self.groups = groups
+ self.kernel = kernel
+ self.approx = approx
+ self.gradient_avail = 0
+ if self.m % 2 == 1:
+ raise AssertionError("Number of random features has to be even.")
+
+ def sample(self):
+ """
+ Called to calculate the embedding weights (either via sampling or deterministically)
+
+ Args:
+ None
+
+ Returns:
+ None
+ """
+ raise AttributeError("Only derived classes can call this method.")
+
+ def embed(self, x):
+ """
+ Called to calculate the embedding weights (either via sampling or deterministically)
+
+ Args:
+ x: numpy array containing the points to be embedded in the format (n,d)
+
+ Returns:
+ y: numpy array containg the embedded points (n,m), where m is the embedding dimension
+ """
+
+ raise AttributeError("Only derived classes can call this method.")
+
+ def get_m(self):
+ """
+
+ :return:
+
+ """
+ return self.m
+
+ def integral(self, S):
+ """
+ Compute the integral of the kernel over the set S
+
+ :param S: Borel set
+ :return: array of length self.m of integrals of each basis function over the set S
+ """
+ a = S.bounds[:, 0]
+ b = S.bounds[:, 1]
+ psi = torch.zeros(self.m).double()
+
+ for i in range(self.m // 2):
+ omegas = self.W[i, :].view(-1)
+ psi[i] = quad_help.integrate_cos_multidimensional(
+ a.numpy(), b.numpy(), omegas.numpy()
+ )
+ psi[self.m // 2 + i] = quad_help.integrate_sin_multidimensional(
+ a.numpy(), b.numpy(), omegas.numpy()
+ )
+ return psi
"""
@@ -137,108 +159,124 @@ def integral(self, S):
class RFFEmbedding(Embedding):
- """
- Random Fourier Features emebedding
- """
-
- def __init__(self, biased=False, **kwargs):
- super().__init__(**kwargs)
- self.biased = biased
- self.sample()
-
- def sampler(self, size):
- """
- Defines the sampler object
-
- Args:
- size:
-
- Return:
- """
- if self.kernel == "squared_exponential":
- distribution = lambda size: np.random.normal(size=size) * (1. / self.gamma)
- inv_cum_dist = lambda x: norm.ppf(x) * (1. / self.gamma)
-
- elif self.kernel == "laplace":
- distribution = None
- inv_cum_dist = lambda x: (np.tan(np.pi * x - np.pi) / self.gamma)
-
- elif self.kernel == "modified_matern":
- if self.nu == 2:
- distribution = None
- inv_cum_dist = None
- pdf = lambda x: np.prod(2 * (self.gamma) / (np.power((1. + self.gamma ** 2 * x ** 2), 2) * np.pi),
- axis=1)
- elif self.nu == 3:
- distribution = None
- inv_cum_dist = None
- pdf = lambda x: np.prod((8. * self.gamma) / (np.power((1. + self.gamma ** 2 * x ** 2), 3) * 3 * np.pi),
- axis=1)
- elif self.nu == 4:
- distribution = None
- inv_cum_dist = None
- pdf = lambda x: np.prod((16. * self.gamma) / (np.power((1. + self.gamma ** 2 * x ** 2), 4) * 5 * np.pi),
- axis=1)
-
- # Random Fourier Features
- if self.approx == "rff":
- if distribution == None:
- if inv_cum_dist == None:
- self.W = helper.rejection_sampling(pdf, size=size)
- else:
- self.W = helper.sample_custom(inv_cum_dist, size=size)
- else:
- self.W = distribution(size)
-
- # Quasi Fourier Features
- elif self.approx == "halton":
- if inv_cum_dist != None:
- self.W = helper.sample_qmc_halton(inv_cum_dist, size=size)
- else:
- raise AssertionError("Inverse Cumulative Distribution could not be deduced")
-
- elif self.approx == "orf":
- distribution = lambda size: np.random.normal(size=size) * (1.)
- self.W = distribution(size)
-
- # QR decomposition
- self.Q, _ = np.linalg.qr(self.W)
- # df and size
- self.S = np.diag(chi.rvs(size[1], size=size[0]))
- self.W = np.dot(self.S, self.Q) / self.gamma ** 2
-
- return self.W
-
- def sample(self):
- """
- Samples Random Fourier Features
- """
- self.W = self.sampler(size=(self.m, self.d))
- self.W = torch.from_numpy(self.W)
-
- if self.biased == True:
- self.b = 2. * np.pi * np.random.uniform(size=(self.m))
- self.bs = self.b.reshape(self.m, 1)
- self.b = torch.from_numpy(self.b)
- self.bs = torch.from_numpy(self.bs)
-
- def embed(self, x):
- """
- :param x: torch array
- :return: embeded vector
- """
- (times, d) = x.shape
- if self.biased == True:
- z = np.sqrt(2. / self.m) * torch.t(torch.cos(self.W[:, 0:d].mm(torch.t(x)) + self.b.view(self.m, 1)))
- else:
- q = self.W[:, 0:d].mm(torch.t(x))
- # z[0:int(self.m / 2), :] = \
- z1 = np.sqrt(2. / float(self.m)) * torch.cos(q[0:int(self.m / 2), :])
- # z[int(self.m / 2):self.m, :] = np.sqrt(2. / float(self.m)) * torch.sin(q[int(self.m / 2):self.m, :])
- z2 = np.sqrt(2. / float(self.m)) * torch.sin(q[int(self.m / 2):self.m, :])
- z = torch.cat([z1, z2])
-
- return torch.t(z) * np.sqrt(self.kappa)
+ """
+ Random Fourier Features emebedding
+ """
+
+ def __init__(self, biased=False, **kwargs):
+ super().__init__(**kwargs)
+ self.biased = biased
+ self.sample()
+
+ def sampler(self, size):
+ """
+ Defines the sampler object
+
+ Args:
+ size:
+
+ Return:
+ """
+ if self.kernel == "squared_exponential":
+ distribution = lambda size: np.random.normal(size=size) * (1.0 / self.gamma)
+ inv_cum_dist = lambda x: norm.ppf(x) * (1.0 / self.gamma)
+
+ elif self.kernel == "laplace":
+ distribution = None
+ inv_cum_dist = lambda x: (np.tan(np.pi * x - np.pi) / self.gamma)
+
+ elif self.kernel == "modified_matern":
+ if self.nu == 2:
+ distribution = None
+ inv_cum_dist = None
+ pdf = lambda x: np.prod(
+ 2
+ * (self.gamma)
+ / (np.power((1.0 + self.gamma**2 * x**2), 2) * np.pi),
+ axis=1,
+ )
+ elif self.nu == 3:
+ distribution = None
+ inv_cum_dist = None
+ pdf = lambda x: np.prod(
+ (8.0 * self.gamma)
+ / (np.power((1.0 + self.gamma**2 * x**2), 3) * 3 * np.pi),
+ axis=1,
+ )
+ elif self.nu == 4:
+ distribution = None
+ inv_cum_dist = None
+ pdf = lambda x: np.prod(
+ (16.0 * self.gamma)
+ / (np.power((1.0 + self.gamma**2 * x**2), 4) * 5 * np.pi),
+ axis=1,
+ )
+
+ # Random Fourier Features
+ if self.approx == "rff":
+ if distribution == None:
+ if inv_cum_dist == None:
+ self.W = helper.rejection_sampling(pdf, size=size)
+ else:
+ self.W = helper.sample_custom(inv_cum_dist, size=size)
+ else:
+ self.W = distribution(size)
+
+ # Quasi Fourier Features
+ elif self.approx == "halton":
+ if inv_cum_dist != None:
+ self.W = helper.sample_qmc_halton(inv_cum_dist, size=size)
+ else:
+ raise AssertionError(
+ "Inverse Cumulative Distribution could not be deduced"
+ )
+
+ elif self.approx == "orf":
+ distribution = lambda size: np.random.normal(size=size) * (1.0)
+ self.W = distribution(size)
+
+ # QR decomposition
+ self.Q, _ = np.linalg.qr(self.W)
+ # df and size
+ self.S = np.diag(chi.rvs(size[1], size=size[0]))
+ self.W = np.dot(self.S, self.Q) / self.gamma**2
+
+ return self.W
+
+ def sample(self):
+ """
+ Samples Random Fourier Features
+ """
+ self.W = self.sampler(size=(self.m, self.d))
+ self.W = torch.from_numpy(self.W)
+
+ if self.biased == True:
+ self.b = 2.0 * np.pi * np.random.uniform(size=(self.m))
+ self.bs = self.b.reshape(self.m, 1)
+ self.b = torch.from_numpy(self.b)
+ self.bs = torch.from_numpy(self.bs)
+
+ def embed(self, x):
+ """
+ :param x: torch array
+ :return: embeded vector
+ """
+ (times, d) = x.shape
+ if self.biased == True:
+ z = np.sqrt(2.0 / self.m) * torch.t(
+ torch.cos(self.W[:, 0:d].mm(torch.t(x)) + self.b.view(self.m, 1))
+ )
+ else:
+ q = self.W[:, 0:d].mm(torch.t(x))
+ # z[0:int(self.m / 2), :] = \
+ z1 = np.sqrt(2.0 / float(self.m)) * torch.cos(q[0 : int(self.m / 2), :])
+ # z[int(self.m / 2):self.m, :] = np.sqrt(2. / float(self.m)) * torch.sin(q[int(self.m / 2):self.m, :])
+ z2 = np.sqrt(2.0 / float(self.m)) * torch.sin(
+ q[int(self.m / 2) : self.m, :]
+ )
+ z = torch.cat([z1, z2])
+
+ return torch.t(z) * np.sqrt(self.kappa)
"""
@@ -249,523 +287,611 @@ def embed(self, x):
class QuadratureEmbedding(Embedding):
- """
- General quadrature embedding
- """
-
- def __init__(self, scale=1.0, **kwargs):
- Embedding.__init__(self, **kwargs)
- self.scale = scale
- self.compute()
-
- def reorder_complexity(self, omegas, weights):
- abs_omegas = np.abs(omegas)
- order = np.argsort(abs_omegas)
- new_omegas = omegas[order]
- new_weights = weights[order]
- return new_omegas, new_weights
-
- def derivative_1(self, x):
- (times, d) = tuple(x.size())
- # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
- z = torch.zeros(self.d, self.m, times, dtype=x.dtype)
- q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n)
-
- omegas = self.W[:, 0:d] # (m,d)
-
- if self.cosine == False:
- z[:, 0:int(self.m / 2), :] = -torch.einsum('ij,ik->jik', omegas,
- torch.sqrt(self.weights.view(-1, 1)) * torch.sin(
- q)) # (m,d) (m,n)
- z[:, int(self.m / 2):self.m, :] = torch.einsum('ij,ik->jik', omegas,
- torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q))
- else:
- raise NotImplementedError("Cosine only features derivative not implemented")
-
- return np.sqrt(self.kappa) * z
-
- def derivative_2(self, x):
- (times, d) = tuple(x.size())
- # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
- z = torch.zeros(self.d, self.d, self.m, times, dtype=x.dtype)
- q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n)
-
- omegas = self.W[:, 0:d] # (m,d)
-
- if self.cosine == False:
- z[:, :, 0:int(self.m / 2), :] = -torch.einsum('il,ij,ik->jlik', omegas, omegas,
- torch.sqrt(self.weights.view(-1, 1)) * torch.cos(
- q)) # (m,d) (m,d) (m,n)
- z[:, :, int(self.m / 2):self.m, :] = -torch.einsum('il,ij,ik->jlik', omegas, omegas,
- torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q))
- else:
- raise NotImplementedError("Cosine only features derivative not implemented")
-
- return np.sqrt(self.kappa) * z
-
- def product_integral(self, S):
- """
- Compute the outer product integral
- :param S: Borel set
- :return: m times m matrix with integrate entries
- """
- assert S.d == 1 or S.d == 2
- if S.d == 1:
- a = S.bounds[0, 0]
- b = S.bounds[0, 1]
- h = self.m // 2
- Psi = torch.zeros(size=(self.m, self.m)).double()
- for i in range(h):
- for j in range(h):
- Psi[i, j] = torch.sqrt(self.weights[i] * self.weights[j]) * quad_help.integrate_cos_cos(a, b,
- self.W[
- i, 0],
- self.W[
- j, 0]) # cos cos
- Psi[i, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) * quad_help.integrate_sin_cos(a, b,
- self.W[
- i, 0],
- self.W[
- j, 0]) # cos sin
- Psi[i + h, j] = torch.sqrt(self.weights[j] * self.weights[i]) * quad_help.integrate_sin_cos(a, b,
- self.W[
- j, 0],
- self.W[
- i, 0]) # sin cos
- Psi[i + h, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) * quad_help.integrate_sin_sin(a,
- b,
- self.W[
- i, 0],
- self.W[
- j, 0]) # sin sin
- elif S.d == 2:
- xa = S.bounds[0, 0]
- xb = S.bounds[0, 1]
- ya = S.bounds[1, 0]
- yb = S.bounds[1, 1]
- h = self.m // 2
- Psi = torch.zeros(size=(self.m, self.m)).double()
- for i in range(h):
- for j in range(h):
- Psi[i, j] = torch.sqrt(self.weights[i] * self.weights[j]) \
- * quad_help.integrate2d_cos_cos(xa, ya, xb, yb, self.W[i, 0], self.W[i, 1],
- self.W[j, 0], self.W[j, 1]) # cos cos
- Psi[i, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) \
- * quad_help.integrate2d_sin_cos(xa, ya, xb, yb, self.W[i, 0], self.W[i, 1],
- self.W[j, 0], self.W[j, 1]) # cos cos
- Psi[i + h, j] = torch.sqrt(self.weights[j] * self.weights[i]) \
- * quad_help.integrate2d_sin_cos(xa, ya, xb, yb, self.W[j, 0], self.W[j, 1],
- self.W[i, 0], self.W[i, 1]) # cos cos
- Psi[i + h, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) \
- * quad_help.integrate2d_sin_sin(xa, ya, xb, yb, self.W[i, 0], self.W[i, 1],
- self.W[j, 0], self.W[j, 1]) # cos cos
- return self.kappa * Psi
-
- def compute(self, complexity_reorder=True):
- """
- Computes the tensor grid for Fourier features
- :return:
- """
-
- if self.cosine == False:
- self.q = int(np.power(self.m // 2, 1. / self.d))
- self.m = self.q ** self.d
- else:
- self.q = int(np.power(self.m, 1. / self.d))
- self.m = self.q ** self.d
-
- (omegas, weights) = self.nodesAndWeights(self.q)
-
- if complexity_reorder == True:
- (omegas, weights) = self.reorder_complexity(omegas, weights)
-
- self.weights = helper.cartesian([weights for weight in range(self.d)])
- self.weights = np.prod(self.weights, axis=1)
-
- v = [omegas for omega in range(self.d)]
- self.W = helper.cartesian(v)
-
- if self.cosine == False:
- self.m = self.m * 2
- else:
- pass
-
- self.W = torch.from_numpy(self.W)
- self.weights = torch.from_numpy(self.weights)
-
- def transform(self):
- """
-
- :return: spectral density of a kernel
- """
- if self.kernel == "squared_exponential":
- p = lambda omega: np.exp(-np.sum(omega ** 2, axis=1).reshape(-1, 1) / 2 * (self.gamma ** 2)) * np.power(
- (self.gamma / np.sqrt(2 * np.pi)), 1.) * np.power(np.pi / 2, 1.)
-
- elif self.kernel == "laplace":
- p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.), axis=1).reshape(-1, 1) * np.power(
- self.gamma / 2., 1.)
-
- elif self.kernel == "modified_matern":
- if self.nu == 2:
- p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.) ** self.nu, axis=1).reshape(-1,
- 1) * np.power(
- self.gamma * 1, 1.)
- elif self.nu == 3:
- p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.) ** self.nu, axis=1).reshape(-1,
- 1) * np.power(
- self.gamma * 4 / 3, 1.)
- elif self.nu == 4:
- p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.) ** self.nu, axis=1).reshape(-1,
- 1) * np.power(
- self.gamma * 8 / 5, 1.)
-
- return p
-
- def nodesAndWeights(self, q):
- """
- Compute nodes and weights of the quadrature scheme in 1D
-
- :param q: degree of quadrature
- :return: tuple of (nodes, weights)
- """
-
- # For osciallatory integrands even this has good properties.
- # weights = np.ones(self.q) * self.scale * np.pi / (self.q + 1)
- # omegas = (np.linspace(0, self.q - 1, self.q)) + 1
- # omegas = omegas * (np.pi / (self.q + 1))
-
- (omegas, weights) = np.polynomial.legendre.leggauss(2 * q)
-
- omegas = omegas[q:]
- weights = 2 * weights[q:]
-
- omegas = ((omegas + 1.) / 2.) * np.pi
- sine_scale = (1. / (np.sin(omegas) ** 2))
- omegas = self.scale / np.tan(omegas)
- prob = self.transform()
- weights = self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten()
- return (omegas, weights)
-
- def embed(self, x):
- """
- :param x: torch array
- :return: embeding of the x
- """
- (times, d) = tuple(x.size())
- # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
- z = torch.zeros(self.m, times, dtype=x.dtype)
- q = torch.mm(self.W[:, 0:d], torch.t(x))
-
- if self.cosine == False:
- z[0:int(self.m / 2), :] = torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q)
- z[int(self.m / 2):self.m, :] = torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q)
- else:
- z = torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q)
-
- return torch.t(z) * np.sqrt(self.kappa)
-
- def get_sub_indices(self, group):
- """
- :param group: group part of the embeding to embed
- :return: embeding of x in group
- """
- m2 = self.m
- mhalf = int(np.power(self.m // 2, 1. / self.d))
-
- m = 2 * mhalf
- mquater = mhalf // 2
-
- if group == 0:
- ind = np.arange(mquater * mhalf, (mquater + 1) * mhalf, 1).tolist() + np.arange(m2 // 2 + (mquater * mhalf),
- m2 // 2 + (
- mquater + 1) * mhalf,
- 1).tolist()
- return ind
- else:
- ind = np.arange(mquater, m2 // 2, mhalf).tolist() + np.arange(m2 // 2 + mquater, m2, mhalf).tolist()
- return ind
-
- def get_sum_sub_indices(self, group):
-
- # idenitfy unique values
- arr = self.W[:, group]
- values = np.unique(arr)
- # find indices of each unique value
- ind = []
- for value in values:
- ind_inside = []
- for index, elem in enumerate(arr):
- if elem == value:
- ind_inside.append(index)
- ind.append(ind_inside)
- ind_inside2 = [i + self.m // 2 for i in ind_inside]
- ind.append(ind_inside2)
- return ind
+ """
+ General quadrature embedding
+ """
+
+ def __init__(self, scale=1.0, **kwargs):
+ Embedding.__init__(self, **kwargs)
+ self.scale = scale
+ self.compute()
+
+ def reorder_complexity(self, omegas, weights):
+ abs_omegas = np.abs(omegas)
+ order = np.argsort(abs_omegas)
+ new_omegas = omegas[order]
+ new_weights = weights[order]
+ return new_omegas, new_weights
+
+ def derivative_1(self, x):
+ (times, d) = tuple(x.size())
+ # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
+ z = torch.zeros(self.d, self.m, times, dtype=x.dtype)
+ q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n)
+
+ omegas = self.W[:, 0:d] # (m,d)
+
+ if self.cosine == False:
+ z[:, 0 : int(self.m / 2), :] = -torch.einsum(
+ "ij,ik->jik",
+ omegas,
+ torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q),
+ ) # (m,d) (m,n)
+ z[:, int(self.m / 2) : self.m, :] = torch.einsum(
+ "ij,ik->jik",
+ omegas,
+ torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q),
+ )
+ else:
+ raise NotImplementedError("Cosine only features derivative not implemented")
+
+ return np.sqrt(self.kappa) * z
+
+ def derivative_2(self, x):
+ (times, d) = tuple(x.size())
+ # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
+ z = torch.zeros(self.d, self.d, self.m, times, dtype=x.dtype)
+ q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n)
+
+ omegas = self.W[:, 0:d] # (m,d)
+
+ if self.cosine == False:
+ z[:, :, 0 : int(self.m / 2), :] = -torch.einsum(
+ "il,ij,ik->jlik",
+ omegas,
+ omegas,
+ torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q),
+ ) # (m,d) (m,d) (m,n)
+ z[:, :, int(self.m / 2) : self.m, :] = -torch.einsum(
+ "il,ij,ik->jlik",
+ omegas,
+ omegas,
+ torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q),
+ )
+ else:
+ raise NotImplementedError("Cosine only features derivative not implemented")
+
+ return np.sqrt(self.kappa) * z
+
+ def product_integral(self, S):
+ """
+ Compute the outer product integral
+ :param S: Borel set
+ :return: m times m matrix with integrate entries
+ """
+ assert S.d == 1 or S.d == 2
+ if S.d == 1:
+ a = S.bounds[0, 0]
+ b = S.bounds[0, 1]
+ h = self.m // 2
+ Psi = torch.zeros(size=(self.m, self.m)).double()
+ for i in range(h):
+ for j in range(h):
+ Psi[i, j] = torch.sqrt(
+ self.weights[i] * self.weights[j]
+ ) * quad_help.integrate_cos_cos(
+ a, b, self.W[i, 0], self.W[j, 0]
+ ) # cos cos
+ Psi[i, j + h] = torch.sqrt(
+ self.weights[i] * self.weights[j]
+ ) * quad_help.integrate_sin_cos(
+ a, b, self.W[i, 0], self.W[j, 0]
+ ) # cos sin
+ Psi[i + h, j] = torch.sqrt(
+ self.weights[j] * self.weights[i]
+ ) * quad_help.integrate_sin_cos(
+ a, b, self.W[j, 0], self.W[i, 0]
+ ) # sin cos
+ Psi[i + h, j + h] = torch.sqrt(
+ self.weights[i] * self.weights[j]
+ ) * quad_help.integrate_sin_sin(
+ a, b, self.W[i, 0], self.W[j, 0]
+ ) # sin sin
+ elif S.d == 2:
+ xa = S.bounds[0, 0]
+ xb = S.bounds[0, 1]
+ ya = S.bounds[1, 0]
+ yb = S.bounds[1, 1]
+ h = self.m // 2
+ Psi = torch.zeros(size=(self.m, self.m)).double()
+ for i in range(h):
+ for j in range(h):
+ Psi[i, j] = torch.sqrt(
+ self.weights[i] * self.weights[j]
+ ) * quad_help.integrate2d_cos_cos(
+ xa,
+ ya,
+ xb,
+ yb,
+ self.W[i, 0],
+ self.W[i, 1],
+ self.W[j, 0],
+ self.W[j, 1],
+ ) # cos cos
+ Psi[i, j + h] = torch.sqrt(
+ self.weights[i] * self.weights[j]
+ ) * quad_help.integrate2d_sin_cos(
+ xa,
+ ya,
+ xb,
+ yb,
+ self.W[i, 0],
+ self.W[i, 1],
+ self.W[j, 0],
+ self.W[j, 1],
+ ) # cos cos
+ Psi[i + h, j] = torch.sqrt(
+ self.weights[j] * self.weights[i]
+ ) * quad_help.integrate2d_sin_cos(
+ xa,
+ ya,
+ xb,
+ yb,
+ self.W[j, 0],
+ self.W[j, 1],
+ self.W[i, 0],
+ self.W[i, 1],
+ ) # cos cos
+ Psi[i + h, j + h] = torch.sqrt(
+ self.weights[i] * self.weights[j]
+ ) * quad_help.integrate2d_sin_sin(
+ xa,
+ ya,
+ xb,
+ yb,
+ self.W[i, 0],
+ self.W[i, 1],
+ self.W[j, 0],
+ self.W[j, 1],
+ ) # cos cos
+ return self.kappa * Psi
+
+ def compute(self, complexity_reorder=True):
+ """
+ Computes the tensor grid for Fourier features
+ :return:
+ """
+
+ if self.cosine == False:
+ self.q = int(np.power(self.m // 2, 1.0 / self.d))
+ self.m = self.q**self.d
+ else:
+ self.q = int(np.power(self.m, 1.0 / self.d))
+ self.m = self.q**self.d
+
+ (omegas, weights) = self.nodesAndWeights(self.q)
+
+ if complexity_reorder == True:
+ (omegas, weights) = self.reorder_complexity(omegas, weights)
+
+ self.weights = helper.cartesian([weights for weight in range(self.d)])
+ self.weights = np.prod(self.weights, axis=1)
+
+ v = [omegas for omega in range(self.d)]
+ self.W = helper.cartesian(v)
+
+ if self.cosine == False:
+ self.m = self.m * 2
+ else:
+ pass
+
+ self.W = torch.from_numpy(self.W)
+ self.weights = torch.from_numpy(self.weights)
+
+ def transform(self):
+ """
+
+ :return: spectral density of a kernel
+ """
+ if self.kernel == "squared_exponential":
+ p = (
+ lambda omega: np.exp(
+ -np.sum(omega**2, axis=1).reshape(-1, 1) / 2 * (self.gamma**2)
+ )
+ * np.power((self.gamma / np.sqrt(2 * np.pi)), 1.0)
+ * np.power(np.pi / 2, 1.0)
+ )
+
+ elif self.kernel == "laplace":
+ p = lambda omega: np.prod(
+ 1.0 / ((self.gamma**2) * (omega**2) + 1.0), axis=1
+ ).reshape(-1, 1) * np.power(self.gamma / 2.0, 1.0)
+
+ elif self.kernel == "modified_matern":
+ if self.nu == 2:
+ p = lambda omega: np.prod(
+ 1.0 / ((self.gamma**2) * (omega**2) + 1.0) ** self.nu, axis=1
+ ).reshape(-1, 1) * np.power(self.gamma * 1, 1.0)
+ elif self.nu == 3:
+ p = lambda omega: np.prod(
+ 1.0 / ((self.gamma**2) * (omega**2) + 1.0) ** self.nu, axis=1
+ ).reshape(-1, 1) * np.power(self.gamma * 4 / 3, 1.0)
+ elif self.nu == 4:
+ p = lambda omega: np.prod(
+ 1.0 / ((self.gamma**2) * (omega**2) + 1.0) ** self.nu, axis=1
+ ).reshape(-1, 1) * np.power(self.gamma * 8 / 5, 1.0)
+
+ return p
+
+ def nodesAndWeights(self, q):
+ """
+ Compute nodes and weights of the quadrature scheme in 1D
+
+ :param q: degree of quadrature
+ :return: tuple of (nodes, weights)
+ """
+
+ # For osciallatory integrands even this has good properties.
+ # weights = np.ones(self.q) * self.scale * np.pi / (self.q + 1)
+ # omegas = (np.linspace(0, self.q - 1, self.q)) + 1
+ # omegas = omegas * (np.pi / (self.q + 1))
+
+ (omegas, weights) = np.polynomial.legendre.leggauss(2 * q)
+
+ omegas = omegas[q:]
+ weights = 2 * weights[q:]
+
+ omegas = ((omegas + 1.0) / 2.0) * np.pi
+ sine_scale = 1.0 / (np.sin(omegas) ** 2)
+ omegas = self.scale / np.tan(omegas)
+ prob = self.transform()
+ weights = (
+ self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten()
+ )
+ return (omegas, weights)
+
+ def embed(self, x):
+ """
+ :param x: torch array
+ :return: embeding of the x
+ """
+ (times, d) = tuple(x.size())
+ # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
+ z = torch.zeros(self.m, times, dtype=x.dtype)
+ q = torch.mm(self.W[:, 0:d], torch.t(x))
+
+ if self.cosine == False:
+ z[0 : int(self.m / 2), :] = torch.sqrt(
+ self.weights.view(-1, 1)
+ ) * torch.cos(q)
+ z[int(self.m / 2) : self.m, :] = torch.sqrt(
+ self.weights.view(-1, 1)
+ ) * torch.sin(q)
+ else:
+ z = torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q)
+
+ return torch.t(z) * np.sqrt(self.kappa)
+
+ def get_sub_indices(self, group):
+ """
+ :param group: group part of the embeding to embed
+ :return: embeding of x in group
+ """
+ m2 = self.m
+ mhalf = int(np.power(self.m // 2, 1.0 / self.d))
+
+ m = 2 * mhalf
+ mquater = mhalf // 2
+
+ if group == 0:
+ ind = (
+ np.arange(mquater * mhalf, (mquater + 1) * mhalf, 1).tolist()
+ + np.arange(
+ m2 // 2 + (mquater * mhalf), m2 // 2 + (mquater + 1) * mhalf, 1
+ ).tolist()
+ )
+ return ind
+ else:
+ ind = (
+ np.arange(mquater, m2 // 2, mhalf).tolist()
+ + np.arange(m2 // 2 + mquater, m2, mhalf).tolist()
+ )
+ return ind
+
+ def get_sum_sub_indices(self, group):
+
+ # idenitfy unique values
+ arr = self.W[:, group]
+ values = np.unique(arr)
+ # find indices of each unique value
+ ind = []
+ for value in values:
+ ind_inside = []
+ for index, elem in enumerate(arr):
+ if elem == value:
+ ind_inside.append(index)
+ ind.append(ind_inside)
+ ind_inside2 = [i + self.m // 2 for i in ind_inside]
+ ind.append(ind_inside2)
+ return ind
class TrapezoidalEmbedding(QuadratureEmbedding):
- def __init__(self, **kwargs):
- QuadratureEmbedding.__init__(self, **kwargs)
- if self.kernel != "squared_exponential":
- raise AssertionError("This embeding is allowed only with Squared Exponential Kernel")
+ def __init__(self, **kwargs):
+ QuadratureEmbedding.__init__(self, **kwargs)
+ if self.kernel != "squared_exponential":
+ raise AssertionError(
+ "This embeding is allowed only with Squared Exponential Kernel"
+ )
- def nodesAndWeights(self, q):
- prob = self.transform()
- # prob = lambda x:
- h = np.sqrt(np.pi / q) / self.gamma ** 2
+ def nodesAndWeights(self, q):
+ prob = self.transform()
+ # prob = lambda x:
+ h = np.sqrt(np.pi / q) / self.gamma**2
- nodes = np.linspace(-q // 2, q // 2, q) * h
- # print (nodes)
+ nodes = np.linspace(-q // 2, q // 2, q) * h
+ # print (nodes)
- weights = h * prob(nodes.reshape(-1, 1)).flatten() * (2 / np.pi)
+ weights = h * prob(nodes.reshape(-1, 1)).flatten() * (2 / np.pi)
- # nodes = np.sqrt(2) * nodes / self.gamma
+ # nodes = np.sqrt(2) * nodes / self.gamma
- return (nodes, weights)
+ return (nodes, weights)
class ClenshawCurtisEmbedding(QuadratureEmbedding):
- def __init__(self, **kwargs):
- QuadratureEmbedding.__init__(self, **kwargs)
- if self.kernel != "squared_exponential":
- raise AssertionError("This embeding is allowed only with Squared Exponential Kernel")
+ def __init__(self, **kwargs):
+ QuadratureEmbedding.__init__(self, **kwargs)
+ if self.kernel != "squared_exponential":
+ raise AssertionError(
+ "This embeding is allowed only with Squared Exponential Kernel"
+ )
- def nodesAndWeights(self, q):
- L = 1. / self.gamma
- prob = self.transform()
- # prob = lambda x:
+ def nodesAndWeights(self, q):
+ L = 1.0 / self.gamma
+ prob = self.transform()
+ # prob = lambda x:
- nodes_0 = np.linspace(0, q + 1, q + 2)
- nodes_0 = np.pi * nodes_0[1:-1] / (q + 2)
- nodes = L / np.tan(nodes_0)
+ nodes_0 = np.linspace(0, q + 1, q + 2)
+ nodes_0 = np.pi * nodes_0[1:-1] / (q + 2)
+ nodes = L / np.tan(nodes_0)
- weights = L * (np.pi / (q + 2)) * (1. / np.sin(nodes_0) ** 2)
- weights = weights * prob(nodes.reshape(-1, 1)).flatten() * (2. / np.pi)
+ weights = L * (np.pi / (q + 2)) * (1.0 / np.sin(nodes_0) ** 2)
+ weights = weights * prob(nodes.reshape(-1, 1)).flatten() * (2.0 / np.pi)
- return (nodes, weights)
+ return (nodes, weights)
- def nodesAndWeights2(self, q):
- prob = self.transform()
+ def nodesAndWeights2(self, q):
+ prob = self.transform()
- nodes_0 = np.linspace(0, q + 1, q + 2)
- nodes_0 = nodes_0[1:-1] / (q + 2) * np.pi
+ nodes_0 = np.linspace(0, q + 1, q + 2)
+ nodes_0 = nodes_0[1:-1] / (q + 2) * np.pi
- nodes = np.sqrt(-np.log(np.sin(nodes_0[0:q // 2])))
- nodes2 = -np.sqrt(-np.log(np.sin(nodes_0[q // 2:])))
+ nodes = np.sqrt(-np.log(np.sin(nodes_0[0 : q // 2])))
+ nodes2 = -np.sqrt(-np.log(np.sin(nodes_0[q // 2 :])))
- n1 = nodes_0[0:q // 2]
- n2 = nodes_0[q // 2:]
+ n1 = nodes_0[0 : q // 2]
+ n2 = nodes_0[q // 2 :]
- weights = (1. / np.tan(n1)) * (1. / np.sqrt(-np.log(np.sin(n1)))) * prob(
- nodes.reshape(-1, 1)).flatten() * np.pi / (q + 2)
- weights2 = -(1. / np.tan(n2)) * (1. / np.sqrt(-np.log(np.sin(n2)))) * prob(
- nodes.reshape(-1, 1)).flatten() * np.pi / (q + 2)
+ weights = (
+ (1.0 / np.tan(n1))
+ * (1.0 / np.sqrt(-np.log(np.sin(n1))))
+ * prob(nodes.reshape(-1, 1)).flatten()
+ * np.pi
+ / (q + 2)
+ )
+ weights2 = (
+ -(1.0 / np.tan(n2))
+ * (1.0 / np.sqrt(-np.log(np.sin(n2))))
+ * prob(nodes.reshape(-1, 1)).flatten()
+ * np.pi
+ / (q + 2)
+ )
- nodes = np.concatenate((nodes, nodes2))
- weights = np.concatenate((weights, weights2))
+ nodes = np.concatenate((nodes, nodes2))
+ weights = np.concatenate((weights, weights2))
- return (nodes, weights)
+ return (nodes, weights)
class HermiteEmbedding(QuadratureEmbedding):
- """
- Hermite Quadrature Fourier Features for squared exponential kernel
- """
-
- def __init__(self, ones=False, cosine=False, **kwargs):
- self.ones = ones
- self.cosine = cosine
- QuadratureEmbedding.__init__(self, **kwargs)
- if self.kernel != "squared_exponential":
- raise AssertionError("Hermite Embedding is allowed only with Squared Exponential Kernel")
-
- def nodesAndWeights(self, q):
- """
- Compute nodes and weights of the quadrature scheme in 1D
-
- :param q: degree of quadrature
- :return: tuple of (nodes, weights)
- """
- (nodes, weights) = np.polynomial.hermite.hermgauss(2 * q)
- # print (nodes)
- nodes = nodes[q:]
- weights = 2 * weights[q:]
-
- if self.ones == True:
- weights = np.ones(q)
-
- nodes = np.sqrt(2) * nodes / self.gamma
- weights = weights / np.sqrt(np.pi)
- return (nodes, weights)
+ """
+ Hermite Quadrature Fourier Features for squared exponential kernel
+ """
+
+ def __init__(self, ones=False, cosine=False, **kwargs):
+ self.ones = ones
+ self.cosine = cosine
+ QuadratureEmbedding.__init__(self, **kwargs)
+ if self.kernel != "squared_exponential":
+ raise AssertionError(
+ "Hermite Embedding is allowed only with Squared Exponential Kernel"
+ )
+
+ def nodesAndWeights(self, q):
+ """
+ Compute nodes and weights of the quadrature scheme in 1D
+
+ :param q: degree of quadrature
+ :return: tuple of (nodes, weights)
+ """
+ (nodes, weights) = np.polynomial.hermite.hermgauss(2 * q)
+ # print (nodes)
+ nodes = nodes[q:]
+ weights = 2 * weights[q:]
+
+ if self.ones == True:
+ weights = np.ones(q)
+
+ nodes = np.sqrt(2) * nodes / self.gamma
+ weights = weights / np.sqrt(np.pi)
+ return (nodes, weights)
class OverCompleteHermiteEmbedding(HermiteEmbedding):
- def nodesAndWeights(self, q):
- """
- Compute nodes and weights of the quadrature scheme in 1D
+ def nodesAndWeights(self, q):
+ """
+ Compute nodes and weights of the quadrature scheme in 1D
- :param q: degree of quadrature
- :return: tuple of (nodes, weights)
- """
- (nodes, weights) = np.polynomial.hermite.hermgauss(q)
- nodes = nodes
- weights = weights
+ :param q: degree of quadrature
+ :return: tuple of (nodes, weights)
+ """
+ (nodes, weights) = np.polynomial.hermite.hermgauss(q)
+ nodes = nodes
+ weights = weights
- nodes = np.sqrt(2) * nodes / self.gamma
- weights = weights / np.sqrt(np.pi)
- return (nodes, weights)
+ nodes = np.sqrt(2) * nodes / self.gamma
+ weights = weights / np.sqrt(np.pi)
+ return (nodes, weights)
class MaternEmbedding(QuadratureEmbedding):
- """
- Matern specific quadrature based Fourier Features
- """
+ """
+ Matern specific quadrature based Fourier Features
+ """
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- if self.kernel != "modified_matern" and self.kernel != "laplace":
- raise AssertionError("Matern Embedding is allowed only with Matern Kernel")
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ if self.kernel != "modified_matern" and self.kernel != "laplace":
+ raise AssertionError("Matern Embedding is allowed only with Matern Kernel")
- def nodesAndWeights(self, q):
- """
- Compute nodes and weights of the quadrature scheme in 1D
+ def nodesAndWeights(self, q):
+ """
+ Compute nodes and weights of the quadrature scheme in 1D
- :param q: degree of quadrature
- :return: tuple of (nodes, weights)
- """
- (nodes, weights) = np.polynomial.hermite.hermgauss(q)
- nodes = np.sqrt(2) * nodes / self.gamma
- weights = weights / np.sqrt(np.pi)
- return (nodes, weights)
+ :param q: degree of quadrature
+ :return: tuple of (nodes, weights)
+ """
+ (nodes, weights) = np.polynomial.hermite.hermgauss(q)
+ nodes = np.sqrt(2) * nodes / self.gamma
+ weights = weights / np.sqrt(np.pi)
+ return (nodes, weights)
class QuadPeriodicEmbedding(QuadratureEmbedding):
- """
- General class implementing
- """
+ """
+ General class implementing
+ """
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
- def nodesAndWeights(self, q):
- """
- Compute nodes and weights of the quadrature scheme in 1D
+ def nodesAndWeights(self, q):
+ """
+ Compute nodes and weights of the quadrature scheme in 1D
- :param q: degree of quadrature
- :return: tuple of (nodes, weights)
- """
- weights = np.ones(self.q) * self.scale * 2 / (self.q + 1)
- omegas = (np.linspace(0, self.q - 1, self.q)) + 1
- omegas = omegas * (np.pi / (self.q + 1))
+ :param q: degree of quadrature
+ :return: tuple of (nodes, weights)
+ """
+ weights = np.ones(self.q) * self.scale * 2 / (self.q + 1)
+ omegas = (np.linspace(0, self.q - 1, self.q)) + 1
+ omegas = omegas * (np.pi / (self.q + 1))
- sine_scale = (1. / (np.sin(omegas) ** 2))
- omegas = self.scale / np.tan(omegas)
- prob = self.transform()
- weights = self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten()
- return (omegas, weights)
+ sine_scale = 1.0 / (np.sin(omegas) ** 2)
+ omegas = self.scale / np.tan(omegas)
+ prob = self.transform()
+ weights = (
+ self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten()
+ )
+ return (omegas, weights)
class KLEmbedding(QuadratureEmbedding):
- """
- General class implementing Karhunen-Loeve expansion
- """
+ """
+ General class implementing Karhunen-Loeve expansion
+ """
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
class LatticeEmbedding(QuadratureEmbedding):
- """
- Class for standard basis indexed by natural numbers
- """
+ """
+ Class for standard basis indexed by natural numbers
+ """
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
- # if self.kernel != "modified_matern" and self.kernel !="laplace":
- # raise AssertionError("Matern Embedding is allowed only with Matern Kernel")
+ # if self.kernel != "modified_matern" and self.kernel !="laplace":
+ # raise AssertionError("Matern Embedding is allowed only with Matern Kernel")
- def nodesAndWeights(self, q):
- """
- Compute nodes and weights of the quadrature scheme in 1D
+ def nodesAndWeights(self, q):
+ """
+ Compute nodes and weights of the quadrature scheme in 1D
- :param q: degree of quadrature
- :return: tuple of (nodes, weights)
- """
- nodes = np.arange(1, q + 1, 1)
- nodes = np.sqrt(2) * nodes / self.gamma
- weights = np.ones(q) / (2 * q)
- return (nodes, weights)
+ :param q: degree of quadrature
+ :return: tuple of (nodes, weights)
+ """
+ nodes = np.arange(1, q + 1, 1)
+ nodes = np.sqrt(2) * nodes / self.gamma
+ weights = np.ones(q) / (2 * q)
+ return (nodes, weights)
class ConcatEmbedding(Embedding):
- def __init__(self, embeddings: List[Embedding]):
+ def __init__(self, embeddings: List[Embedding]):
- self.embeddings = embeddings
- self.m = sum([emb.get_m() for emb in embeddings])
+ self.embeddings = embeddings
+ self.m = sum([emb.get_m() for emb in embeddings])
- def embed(self, xtest):
- return torch.hstack([emb.embed(xtest) for emb in self.embeddings])
+ def embed(self, xtest):
+ return torch.hstack([emb.embed(xtest) for emb in self.embeddings])
class MaskedEmbedding(Embedding):
- def __init__(self, embedding: Embedding, mask: Callable):
- self.embedding = embedding
- self.m = self.embedding.get_m()
- self.mask = mask
-
- def embed(self, xtest):
- return torch.diag(self.mask(xtest))@self.embedding.embed(xtest)
-
-
-class AdditiveEmbeddings():
-
- def __init__(self, embeddings, ms, groups=None, scaling=None, additive=True):
- self.emebeddings = embeddings
- if scaling is None:
- self.scaling = torch.ones(len(self.emebeddings)).double() # /np.sqrt(len(self.emebeddings))
- else:
- self.scaling = scaling
- self.additive = additive
-
- if groups is not None:
- self.groups = groups
- else:
- self.groups = [[i] for i in range(len(self.emebeddings))]
-
- try:
- self.ms = torch.Tensor(ms)
- except:
- self.ms = ms
-
- self.no_emb = len(self.emebeddings)
- self.m = torch.sum(self.ms)
-
- def embed(self, x):
- if self.additive:
- r = torch.zeros(size=(x.size()[0], int(torch.sum(self.ms)))).double()
- count = 0
- for index, embedding in enumerate(self.emebeddings):
- r[:, count:count + int(self.ms[index])] = \
- embedding.embed(x[:, self.groups[index]].view(-1, len(self.groups[index]))) * self.scaling[index]
- count = count + int(self.ms[index])
- return r
- else:
- pass
-
-
-class ProjectiveEmbeddings():
-
- def __init__(self, embedding, project):
- self.embedding = embedding
- self.project = project
-
- def embed(self, x):
- r = self.embedding.embed(self.project(x))
- return r
+ def __init__(self, embedding: Embedding, mask: Callable):
+ self.embedding = embedding
+ self.m = self.embedding.get_m()
+ self.mask = mask
+
+ def embed(self, xtest):
+ return torch.diag(self.mask(xtest)) @ self.embedding.embed(xtest)
+
+
+class AdditiveEmbeddings:
+
+ def __init__(self, embeddings, ms, groups=None, scaling=None, additive=True):
+ self.emebeddings = embeddings
+ if scaling is None:
+ self.scaling = torch.ones(
+ len(self.emebeddings)
+ ).double() # /np.sqrt(len(self.emebeddings))
+ else:
+ self.scaling = scaling
+ self.additive = additive
+
+ if groups is not None:
+ self.groups = groups
+ else:
+ self.groups = [[i] for i in range(len(self.emebeddings))]
+
+ try:
+ self.ms = torch.tensor(ms)
+ except:
+ self.ms = ms
+
+ self.no_emb = len(self.emebeddings)
+ self.m = torch.sum(self.ms)
+
+ def embed(self, x):
+ if self.additive:
+ r = torch.zeros(size=(x.size()[0], int(torch.sum(self.ms)))).double()
+ count = 0
+ for index, embedding in enumerate(self.emebeddings):
+ r[:, count : count + int(self.ms[index])] = (
+ embedding.embed(
+ x[:, self.groups[index]].view(-1, len(self.groups[index]))
+ )
+ * self.scaling[index]
+ )
+ count = count + int(self.ms[index])
+ return r
+ else:
+ pass
+
+
+class ProjectiveEmbeddings:
+
+ def __init__(self, embedding, project):
+ self.embedding = embedding
+ self.project = project
+
+ def embed(self, x):
+ r = self.embedding.embed(self.project(x))
+ return r
diff --git a/stpy/embeddings/onehot_embedding.py b/stpy/embeddings/onehot_embedding.py
index cdfdda7..1b32a63 100644
--- a/stpy/embeddings/onehot_embedding.py
+++ b/stpy/embeddings/onehot_embedding.py
@@ -6,30 +6,34 @@
class OnehotEmbedding(Embedding):
- def __init__(self, p, d):
- self.p = p # max value
- self.d = d # sites
- self.m = p*d
+ def __init__(self, p, d):
+ self.p = p # max value
+ self.d = d # sites
+ self.m = p * d
- def get_m(self):
- return self.p*self.d
+ def get_m(self):
+ return self.p * self.d
+ def apply(self, x, f):
+ return torch.stack(
+ [f(x_i) for i, x_i in enumerate(torch.unbind(x, dim=0), 0)], dim=0
+ )
- def apply(self,x,f):
- return torch.stack([f(x_i) for i, x_i in enumerate(torch.unbind(x, dim=0), 0)], dim=0)
+ def embed(self, x):
+ n, d = x.size()
+ out = torch.zeros(n, self.p * self.d).double()
- def embed(self, x):
- n,d = x.size()
- out = torch.zeros(n,self.p*self.d).double()
+ f = lambda x: torch.from_numpy(
+ np.array([x[i] + 20 * i for i in range(self.d)])
+ ).int()
+ indices = self.apply(x, f).long()
+ for i in range(n):
+ out[i, indices[i]] = 1.0
- f = lambda x: torch.from_numpy(np.array([x[i]+20*i for i in range(self.d)])).int()
- indices = self.apply(x,f).long()
- for i in range(n):
- out[i,indices[i]] = 1.
+ return out
- return out
if __name__ == "__main__":
- emb = OnehotEmbedding(20,2)
- x = torch.Tensor([[2,3],[4,5],[10,19]])
- print (emb.embed(x))
\ No newline at end of file
+ emb = OnehotEmbedding(20, 2)
+ x = torch.tensor([[2, 3], [4, 5], [10, 19]])
+ print(emb.embed(x))
diff --git a/stpy/embeddings/optimal_positive_basis.py b/stpy/embeddings/optimal_positive_basis.py
index 170d018..2768527 100644
--- a/stpy/embeddings/optimal_positive_basis.py
+++ b/stpy/embeddings/optimal_positive_basis.py
@@ -1,187 +1,430 @@
-import pickle
+from typing import Literal
import numpy as np
import scipy
+from stpy.helpers.voxel_grid import voxel_grid
+from stpy.helpers.parallel_interpolation import InterpolatorArray
import torch
from stpy.borel_set import BorelSet
-from stpy.continuous_processes.nystrom_fea import NystromFeatures
from stpy.embeddings.positive_embedding import PositiveEmbedding
from stpy.kernels import KernelFunction
+from sklearn.decomposition import NMF
+from nmf import run_nmf
+from stpy.helpers.posterior_sampling import tmg
+from fast_pytorch_kmeans import KMeans
class OptimalPositiveBasis(PositiveEmbedding):
- def __init__(self, *args, samples=300, discretization_size=30, saved=False, **kwargs):
- super().__init__(*args, **kwargs)
- self.samples = np.maximum(samples, self.m)
-
- B = BorelSet(self.d, torch.Tensor([[self.interval[0], self.interval[1]] for _ in range(self.d)]).double())
- self.discretized_domain = B.return_discretization(discretization_size)
-
- y = self.discretized_domain[:, 0].view(-1, 1) * 0
-
- print("Optimal basis with arbitrary dimension, namely d =", self.d)
- print("Starting optimal basis construction, with m =", self.m)
- # self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel,
- # gamma = self.kernel_object.gamma, d = self.kernel_object.d)
-
- self.new_kernel_object = self.kernel_object
- if saved == True:
- print("Did not load GP object, it needs to loaded")
- else:
- self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx='positive_svd',
- samples=self.samples)
- self.GP.fit_gp(self.discretized_domain, y)
- print("Optimal basis constructed.")
- if torch.sum(torch.isnan(self.GP.embed(self.discretized_domain))) > 0:
- print("Failed basis? (zero is good):", torch.sum(torch.isnan(self.GP.embed(self.discretized_domain))))
- self.precomp_integral = {}
-
- def get_m(self):
- return self.m
-
- def basis_fun(self, x, j):
- return self.GP.embed(x)[:, j].view(-1, 1)
-
- def embed_internal(self, x):
- out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64)
- for j in range(self.m):
- out[:, j] = self.basis_fun(x, j).view(-1)
- return out
-
- def save_embedding(self, filename):
- filehandler = open(filename, 'w')
- pickle.dump(self.GP, filehandler)
-
- def load_embedding(self, filename):
- file_pi2 = open(filename, 'r')
- self.GP = pickle.load(file_pi2)
-
- def get_constraints(self):
- s = self.get_m()
- l = np.full(s, 0.0).astype(float)
- u = np.full(s, 10e10)
- Lambda = np.identity(s)
- return (l, Lambda, u)
-
- def integral(self, S):
- assert (S.d == self.d)
-
- if S in self.precomp_integral.keys():
- return self.precomp_integral[S]
- else:
- if S.d == 1:
- weights, nodes = S.return_legendre_discretization(n=256)
- psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0)
- Gamma_half = self.cov()
- psi = Gamma_half.T @ psi
- self.precomp_integral[S] = psi
- elif S.d == 2:
- weights, nodes = S.return_legendre_discretization(n=50)
- vals = self.embed_internal(nodes)
- psi = torch.sum(torch.diag(weights) @ vals, dim=0)
- Gamma_half = self.cov()
- psi = Gamma_half.T @ psi
- self.precomp_integral[S] = psi
- if torch.sum(torch.isnan(psi)) > 0:
- print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi)))
-
- else:
- raise NotImplementedError("Higher dimension not implemented.")
- return psi
-
- def cov(self, inverse=False):
-
- if self.precomp == False:
-
- x = self.discretized_domain
- vals = self.GP.embed(x)
- indices = torch.argmax(vals, dim=0) # the nodes are the maxima of the bump functions
- t = x[indices]
- print("nodes of functions", t.size())
-
- self.Gamma = self.kernel(t, t)
- Z = self.embed_internal(t)
-
- M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0]))
- self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy())))
-
- self.Gamma_half = torch.from_numpy(
- np.real(scipy.linalg.sqrtm(self.Gamma.numpy() + (self.s ** 2) * np.eye(self.Gamma.size()[0]))))
- self.Gamma_half = self.M @ self.Gamma_half
- self.invGamma_half = torch.pinverse(self.Gamma_half)
- self.precomp = True
- else:
- pass
-
- if inverse == True:
- return self.Gamma_half, self.invGamma_half
- else:
- return self.Gamma_half
+ def __init__(
+ self,
+ *args,
+ samples=300,
+ discretization_size=30,
+ data: torch.Tensor | BorelSet,
+ fast_sampling=True, # samples using squared gaussian instead of truncated gausian
+ memory_limit=5, # Limits the amount of points used for optimal basis construction
+ sample_algorithm: Literal[
+ "grid", "kmeans"
+ ] = "grid", # How to subsample if points are limited
+ **kwargs,
+ ):
+ # roi is the set of points that the basis is optimal for if it is a tensor
+ # else it is the region that the basis if optimal for that will be discretized
+ # by discretization_size. If it is not given the entire domain will be used.
+ super().__init__(*args, **kwargs)
+ self.sample_algorithm = sample_algorithm
+ self.num_samples = np.maximum(samples, self.m)
+ self.fast = fast_sampling
+ self.memory_limit = memory_limit if memory_limit is not None else 40
+ self.interpolators = None
+
+ if data is None:
+ B = BorelSet(
+ self.d,
+ torch.tensor(
+ [[self.interval[0], self.interval[1]] for _ in range(self.d)]
+ ).double(),
+ )
+ self.discretized_domain = B.return_discretization(discretization_size)
+ elif isinstance(data, BorelSet):
+ self.discretized_domain = data.return_discretization(discretization_size)
+ else:
+ self.discretized_domain = data
+
+ y = self.discretized_domain[:, 0].view(-1, 1) * 0
+
+ print("Optimal basis with arbitrary dimension, namely d =", self.d)
+ print("Starting optimal basis construction, with m =", self.m)
+ # self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel,
+ # gamma = self.kernel_object.gamma, d = self.kernel_object.d)
+
+ self.new_kernel_object = self.kernel_object
+ self._fit_data(data=data)
+ print("Optimal basis constructed.")
+ if torch.sum(torch.isnan(self.embed_internal(self.discretized_domain))) > 0:
+ print(
+ "Failed basis? (zero is good):",
+ torch.sum(torch.isnan(self.embed_internal(self.discretized_domain))),
+ )
+ self.precomp_integral = {}
+
+ def get_m(self):
+ return self.m
+
+ def embed_internal(self, x):
+ out = torch.zeros([len(x), self.m], dtype=torch.float64)
+ for j in range(self.m):
+ out[:, j] = self.basis_fun(x, j).view(-1)
+ return out
+
+ def basis_fun(self, x, j):
+ raise Exception("Fit on data before using")
+
+ def get_constraints(self):
+ s = self.get_m()
+ l = np.full(s, 0.0).astype(float)
+ u = np.full(s, 10e10)
+ Lambda = np.identity(s)
+ return (l, Lambda, u)
+
+ def integral(self, S):
+ assert S.d == self.d
+
+ if S in self.precomp_integral.keys():
+ return self.precomp_integral[S]
+ else:
+ if S.d == 1:
+ weights, nodes = S.return_legendre_discretization(n=256)
+ psi = torch.sum(torch.diag(weights) @ self.embed_internal(nodes), dim=0)
+ Gamma_half = self.cov()
+ psi = Gamma_half.T @ psi
+ self.precomp_integral[S] = psi
+ elif S.d == 2:
+ weights, nodes = S.return_legendre_discretization(n=50)
+ vals = self.embed_internal(nodes)
+ psi = torch.sum(torch.diag(weights) @ vals, dim=0)
+ Gamma_half = self.cov()
+ psi = Gamma_half.T @ psi
+ self.precomp_integral[S] = psi
+ if torch.sum(torch.isnan(psi)) > 0:
+ print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi)))
+
+ else:
+ raise NotImplementedError("Higher dimension not implemented.")
+ return psi
+
+ def cov(self, inverse=False):
+
+ if self.precomp == False:
+
+ x = self.discretized_domain
+ vals = self.embed_internal(x)
+ indices = torch.argmax(
+ vals, dim=0
+ ) # the nodes are the maxima of the bump functions
+ t = x[indices]
+ print("nodes of functions", t.size())
+
+ self.Gamma = self.kernel(t, t)
+ Z = self.embed_internal(t)
+
+ M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0]))
+ self.M = torch.tensor(np.real(scipy.linalg.sqrtm(M.cpu().numpy())))
+
+ self.Gamma_half = torch.tensor(
+ np.real(
+ scipy.linalg.sqrtm(
+ self.Gamma.cpu().numpy()
+ + (self.s**2) * np.eye(self.Gamma.size()[0])
+ )
+ )
+ )
+ self.Gamma_half = self.M @ self.Gamma_half
+ self.invGamma_half = torch.pinverse(self.Gamma_half)
+ self.precomp = True
+ else:
+ pass
+
+ if inverse == True:
+ return self.Gamma_half, self.invGamma_half
+ else:
+ return self.Gamma_half
+
+ def _sample_gaussian_prior(self, x: torch.Tensor):
+ n = self.num_samples
+ dim = len(x)
+ Cov = self.kernel_object.kernel(x, x) + 10e-7 * torch.eye(
+ dim, dtype=torch.float64
+ )
+ L = torch.linalg.cholesky(Cov)
+ if self.fast:
+ random_vector = torch.normal(
+ mean=torch.zeros(dim, n, dtype=torch.float64), std=1.0
+ )
+ y = torch.mm(L, random_vector) ** 2
+ else:
+ y = torch.tensor(
+ tmg(
+ n,
+ np.zeros([dim], dtype=np.float64),
+ Cov.cpu().numpy(),
+ np.ones([dim], dtype=np.float64),
+ np.eye(dim, dtype=np.float64),
+ np.zeros(dim, dtype=np.float64),
+ verbose=True,
+ ),
+ dtype=torch.float64,
+ )
+ return y, L
+
+ def _sample_gaussian_conditional(self, x_old, L_old, y_old, x):
+ dim = len(x) # dimensionality of input
+ n = y_old.size(1) # number of samples
+
+ K_new_new = self.kernel_object.kernel(x, x) + 1e-7 * torch.eye(
+ dim, dtype=torch.float64
+ )
+ K_new_old = self.kernel_object.kernel(x_old, x)
+
+ alpha = torch.linalg.solve_triangular(L_old, y_old, upper=False)
+ alpha = torch.linalg.solve_triangular(L_old.T, alpha, upper=True)
+
+ mu_star = K_new_old @ alpha # shape (dim, n)
+ # TODO check if kernel is always symmetric
+ K_old_new = K_new_old.T # shape (dim_old, dim)
+
+ tmp = torch.linalg.solve_triangular(L_old, K_old_new, upper=False)
+ tmp2 = torch.linalg.solve_triangular(L_old.T, tmp, upper=True)
+
+ Sigma_star = (
+ K_new_new - (K_new_old @ tmp2) + 1e-7 * torch.eye(dim, dtype=torch.float64)
+ )
+
+ L_star = torch.linalg.cholesky(Sigma_star)
+ if self.fast:
+ random_vector_new = torch.normal(
+ mean=torch.zeros(dim, n, dtype=torch.float64), std=1.0
+ )
+ y_new = (mu_star + L_star @ random_vector_new) ** 2
+ else:
+ y_new = torch.tensor(
+ tmg(
+ n,
+ mu_star.cpu().numpy(),
+ Sigma_star.cpu().numpy(),
+ np.ones([dim], dtype=np.float64),
+ np.eye(dim, dtype=np.float64),
+ np.zeros(dim, dtype=np.float64),
+ verbose=True,
+ ),
+ dtype=torch.float64,
+ )
+
+ return y_new
+
+ def _subsample_if_necessary(self, x: torch.Tensor):
+ # Calculate number of clusters
+ n_clusters = (self.memory_limit * 1_000_000_000) / x.element_size()
+ # Since we want to calculate the cholesky decomp of the cov matrix of the data plus roi (expected to be 1% of data)
+ n_clusters = int(np.sqrt(n_clusters) * 0.99 / 2.0)
+
+ if len(x) > n_clusters:
+ if self.sample_algorithm == "grid":
+ centroids = voxel_grid(x, max_n_voxels=n_clusters)
+ print(
+ f"Approximated data set with {len(centroids)} points for optimal"
+ " basis."
+ )
+ return centroids
+ elif self.sample_algorithm == "kmeans":
+ # Calculate maximum size of mini batch
+ n_samples, n_features = x.shape
+ SAFETY_FACTOR = 1.5
+ max_batch_size = int(
+ (
+ self.memory_limit * 1_000_000_000
+ - 0.8 * n_samples
+ - 2 * n_clusters * n_features * x.element_size()
+ )
+ // (
+ (
+ n_features * n_clusters * x.element_size()
+ + n_features * x.element_size()
+ )
+ * SAFETY_FACTOR
+ )
+ )
+ if max_batch_size >= n_samples:
+ max_batch_size = None
+
+ print(
+ f"Approximating data set with {n_clusters} points from"
+ f" {len(x)} points for optimal basis."
+ + (
+ f"Using batch size {max_batch_size}"
+ if max_batch_size is not None
+ else ""
+ )
+ )
+ kmeans = KMeans(
+ n_clusters=n_clusters,
+ mode="euclidean",
+ verbose=1,
+ minibatch=max_batch_size,
+ )
+ kmeans.fit_predict(x)
+ centroids = kmeans.centroids
+
+ return centroids
+ else:
+ print("No subsampling necessary because data fits into memory")
+ return x
+
+ def _fit_data(self, data):
+ self.data_m = self.m
+ data = self._subsample_if_necessary(data)
+ self.F_data, self.L_data = self._sample_gaussian_prior(data)
+ self.F_data = self.F_data**2
+ self.W_data, self.H_data, err = run_nmf(
+ self.F_data,
+ n_components=self.m,
+ tol=1e-12,
+ use_gpu=self.F_data.is_cuda,
+ batch_max_iter=2000,
+ fp_precision=self.F_data.dtype,
+ )
+ self.W_data = torch.tensor(self.W_data)
+ self.H_data = torch.tensor(self.H_data)
+ self.W_data = self.W_data / torch.linalg.norm(self.W_data, dim=0)
+ self.data = data
+ W_norm = self.W_data
+ self._set_interpolators(data, W_norm)
+
+ def basis_fun(self, q: torch.Tensor, j: int):
+ if self.interpolators is None:
+ raise Exception("Fit on data before using")
+
+ return self.interpolators(j, q)
+
+ def _set_interpolators(self, x: torch.Tensor, phi: torch.Tensor):
+ assert x.dtype == phi.dtype
+ self.interpolators = InterpolatorArray(x, phi, self.m)
+
+ def fit(self, roi: torch.Tensor):
+ assert self.data is not None, "Data must be given first"
+ print("Refitting optimal basis")
+ self.precomp = False
+ x = torch.cat((self.data, roi), dim=0)
+ F, _ = self._sample_gaussian_prior(x)
+ F = F**2
+ # Note: using cpu based NMF here since run_nmf has no way to pass initialization
+ model = NMF(n_components=self.data_m, max_iter=200, tol=1e-8, init="custom")
+ phi_roi_init = torch.zeros([len(roi), self.data_m], dtype=torch.float64)
+ W_start = torch.cat((self.W_data, phi_roi_init), dim=0)
+ W = torch.tensor(
+ model.fit_transform(
+ F.cpu().numpy(),
+ W=W_start.cpu().numpy(),
+ H=self.H_data.cpu().numpy(),
+ )
+ )
+ self.Phi = W / torch.linalg.norm(W, dim=0)
+ self.m = self.data_m
+ self._set_interpolators(x, self.Phi)
+ self.precomp = False
+ self.precomp_integral = {}
+
+ def add_new_functions(self, roi: torch.Tensor, n: int):
+ x = torch.cat((self.data, roi), dim=0)
+ F_new = self._sample_gaussian_conditional(
+ self.data, self.L_data, self.F_data, roi
+ )
+ F = torch.cat([self.F_data, F_new])
+ Phi_old = (
+ torch.stack([self.basis_fun(x, j) for j in range(self.data_m)]).squeeze(2).T
+ )
+ Theta_old = self.H_data
+ # TODO, theoretically this is wrong and we would have to solve over both Phi_old and Phi_new
+ # also, caping at 0 has no theoretical underpinning
+ objective = torch.clamp(F - Phi_old @ Theta_old, min=0)
+ Phi_new, Theta_new, err = run_nmf(
+ objective,
+ n_components=n,
+ tol=1e-7,
+ use_gpu=True,
+ batch_max_iter=100,
+ fp_precision=objective.dtype,
+ )
+ Phi_new = torch.tensor(Phi_new)
+ self.Phi = Phi_new / torch.linalg.norm(Phi_new, dim=0)
+ self.m = self.data_m + n
+ self.interpolators.set(1, x, self.Phi, n)
+ self.precomp = False
+ self.precomp_integral = {}
if __name__ == "__main__":
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- from stpy.helpers.helper import interval
- import matplotlib.pyplot as plt
- from scipy.interpolate import griddata
-
- d = 2
- m = 64
- n = 64
- N = 20
- sqrtbeta = 2
- s = 0.01
- b = 0
- gamma = 0.5
- k = KernelFunction(gamma=gamma, d=2)
-
- Emb = OptimalPositiveBasis(d, m, offset=0.2, s=s, b=b, discretization_size=n, B=1000., kernel_object=k)
-
- GP = GaussianProcess(d=d, s=s)
- xtest = torch.from_numpy(interval(n, d))
-
- x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, d)))
-
- F_true = lambda x: torch.sum(torch.sin(x) ** 2 - 0.1, dim=1).view(-1, 1)
- F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
- y = F(x)
-
- # Try to plot the basis functions
- msqrt = int(np.sqrt(m))
- fig, axs = plt.subplots(msqrt, msqrt, figsize=(15, 7))
- for i in range(m):
- f_i = Emb.basis_fun(xtest, i) ## basis function
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- ax = axs[int(i // msqrt), (i % msqrt)]
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_f = griddata((xx, yy), f_i[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=10)
- ax.contour(cs, colors='k')
- # cbar = fig.colorbar(cs)
- # if self.x is not None:
- # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o")
- ax.grid(c='k', ls='-', alpha=0.1)
-
- plt.savefig("positive.png")
- plt.show()
-
- Emb.fit(x, y)
- GP.fit_gp(x, y)
-
- mu, _ = Emb.mean_std(xtest)
- mu_true, _ = GP.mean_std(xtest)
-
- Emb.visualize_function(xtest, [F_true, lambda x: GP.mean_std(x)[0], lambda x: Emb.mean_std(x)[0]])
- # Emb.visualize_function(xtest,GP.mean_std)
- # Emb.visualize_function(xtest,Emb.mean_std)
-
- # plt.plot(xtest,mu_true,'b--', label = 'GP')
-
- # plt.plot(x,y,'ro')
- # plt.plot(xtest, mu, 'g-', label = 'positive basis ')
- # plt.legend()
- plt.show()
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+ from stpy.helpers.helper import interval
+ import matplotlib.pyplot as plt
+ from scipy.interpolate import griddata
+
+ d = 2
+ m = 5
+ n = 64
+ s = 0.01
+ b = 0
+ gamma = 0.5
+ k = KernelFunction(gamma=gamma, d=2)
+
+ xtest = torch.tensor(interval(n, d))
+
+ xnew = xtest[:1000]
+
+ xtest = xtest[1000:]
+
+ Emb = OptimalPositiveBasis(
+ d,
+ m,
+ offset=0.2,
+ s=s,
+ b=b,
+ discretization_size=n,
+ B=1000.0,
+ kernel_object=k,
+ data=xtest,
+ )
+
+ y, L = Emb._sample_prior(xtest, 1)
+
+ fig, ax = plt.subplots(figsize=(10, 6))
+ xx = xtest[:, 0].cpu().numpy()
+ yy = xtest[:, 1].cpu().numpy()
+ sc = ax.scatter(xx, yy, c=y.detach().numpy().reshape(-1), cmap="viridis")
+ ax.grid(c="k", ls="-", alpha=0.1)
+ plt.colorbar(sc)
+ plt.title("Interpolated plot of y over xtest")
+ plt.xlabel("x1")
+ plt.ylabel("x2")
+ plt.show()
+
+ ynew = Emb._sample_conditional(xtest, L, y, xnew)
+
+ xtest = torch.cat([xtest, xnew])
+ y = torch.cat([y, ynew])
+
+ fig, ax = plt.subplots(figsize=(10, 6))
+ xx = xtest[:, 0].cpu().numpy()
+ yy = xtest[:, 1].cpu().numpy()
+ sc = ax.scatter(xx, yy, c=y.detach().numpy().reshape(-1), cmap="viridis")
+ ax.grid(c="k", ls="-", alpha=0.1)
+ plt.colorbar(sc)
+ plt.title("Interpolated plot of y over xtest")
+ plt.xlabel("x1")
+ plt.ylabel("x2")
+ plt.show()
+
+ print("hi")
diff --git a/stpy/embeddings/packing_embedding.py b/stpy/embeddings/packing_embedding.py
index a08d2a7..ce6c77a 100755
--- a/stpy/embeddings/packing_embedding.py
+++ b/stpy/embeddings/packing_embedding.py
@@ -10,111 +10,116 @@
class PackingEmbedding(Embedding):
- def __init__(self, d, m, kernel_object, interval=[-1, 1], n=100, method='svd'):
- self.d = d
- self.m = m
- self.interval = interval
- self.size = self.get_m()
- self.kernel_object = kernel_object
-
- self.kernel = kernel_object.kernel
- self.n = n
- self.method = method
- self.construct()
-
- def construct(self):
- xtest = interval_torch(self.n, self.d, offset=[self.interval for _ in range(self.d)])
- y = xtest[:, 0].view(-1, 1) * 0
-
- self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel,
- gamma=self.kernel_object.gamma, d=self.d)
- self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx=self.method)
- self.GP.fit_gp(xtest, y)
-
- def basis_fun(self, x, j):
- return self.GP.embed(x)[:, j].view(-1, 1)
-
- def embed(self, x):
- return self.GP.embed(x)
-
- def _derivative_1(self, x):
- dphi = batch_jacobian(self.embed, x).transpose(0, 1)
- return dphi
-
- def _derivative_2(self, x):
- d2phi = batch_hessian(self.embed, x).transpose(0, 1).transpose(0, 2)
- return d2phi
-
- def derivative_1(self, x):
- if self.kernel_object.optkernel == "squared_exponential":
- xs = self.GP.xs
- M = self.GP.M
- derivative = self.kernel_object.derivative_1(xs, x)
- res = torch.einsum('ij,kil->kjl', M, derivative)
- return res
- else:
- dphi = self._derivative_1(x)
- return dphi
-
- def derivative_2(self, x):
- if self.kernel_object.optkernel == "squared_exponential":
- xs = self.GP.xs
- M = self.GP.M
- derivative = self.kernel_object.derivative_2(xs, x)
- res = torch.einsum('ij,kilm->kjlm', M, derivative)
- return res
- else:
- d2phi = self._derivative_2(x)
- return d2phi
+ def __init__(self, d, m, kernel_object, interval=[-1, 1], n=100, method="svd"):
+ self.d = d
+ self.m = m
+ self.interval = interval
+ self.size = self.get_m()
+ self.kernel_object = kernel_object
+
+ self.kernel = kernel_object.kernel
+ self.n = n
+ self.method = method
+ self.construct()
+
+ def construct(self):
+ xtest = interval_torch(
+ self.n, self.d, offset=[self.interval for _ in range(self.d)]
+ )
+ y = xtest[:, 0].view(-1, 1) * 0
+
+ self.new_kernel_object = KernelFunction(
+ kernel_name=self.kernel_object.optkernel,
+ gamma=self.kernel_object.gamma,
+ d=self.d,
+ )
+ self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx=self.method)
+ self.GP.fit_gp(xtest, y)
+
+ def basis_fun(self, x, j):
+ return self.GP.embed(x)[:, j].view(-1, 1)
+
+ def embed(self, x):
+ return self.GP.embed(x)
+
+ def _derivative_1(self, x):
+ dphi = batch_jacobian(self.embed, x).transpose(0, 1)
+ return dphi
+
+ def _derivative_2(self, x):
+ d2phi = batch_hessian(self.embed, x).transpose(0, 1).transpose(0, 2)
+ return d2phi
+
+ def derivative_1(self, x):
+ if self.kernel_object.optkernel == "squared_exponential":
+ xs = self.GP.xs
+ M = self.GP.M
+ derivative = self.kernel_object.derivative_1(xs, x)
+ res = torch.einsum("ij,kil->kjl", M, derivative)
+ return res
+ else:
+ dphi = self._derivative_1(x)
+ return dphi
+
+ def derivative_2(self, x):
+ if self.kernel_object.optkernel == "squared_exponential":
+ xs = self.GP.xs
+ M = self.GP.M
+ derivative = self.kernel_object.derivative_2(xs, x)
+ res = torch.einsum("ij,kilm->kjlm", M, derivative)
+ return res
+ else:
+ d2phi = self._derivative_2(x)
+ return d2phi
if __name__ == "__main__":
- from stpy.continuous_processes.kernelized_features import KernelizedFeatures
+ from stpy.continuous_processes.kernelized_features import KernelizedFeatures
- d = 1
- m = 200
- n = 128
- N = 10
+ d = 1
+ m = 200
+ n = 128
+ N = 10
- lam = 1.
+ lam = 1.0
- s = 0.0001
- gamma = 0.1
+ s = 0.0001
+ gamma = 0.1
- xtest = torch.from_numpy(interval(n, d))
- x = torch.from_numpy(interval(N, d))
+ xtest = torch.from_numpy(interval(n, d))
+ x = torch.from_numpy(interval(N, d))
- kernel_object = KernelFunction(gamma=gamma)
- Emb = PackingEmbedding(d, m, kernel_object=kernel_object, n=256, method='nothing')
- print(Emb.GP.M.size())
- GP = KernelizedFeatures(embedding=Emb, m=m, s=s, lam=lam, d=d)
- y = GP.sample(x) * 0
- y[5, 0] = 0.5
+ kernel_object = KernelFunction(gamma=gamma)
+ Emb = PackingEmbedding(d, m, kernel_object=kernel_object, n=256, method="nothing")
+ print(Emb.GP.M.size())
+ GP = KernelizedFeatures(embedding=Emb, m=m, s=s, lam=lam, d=d)
+ y = GP.sample(x) * 0
+ y[5, 0] = 0.5
- GP.fit_gp(x, y)
- mu, std = GP.mean_std(xtest)
+ GP.fit_gp(x, y)
+ mu, std = GP.mean_std(xtest)
- der = Emb.derivative_1(xtest)[:, :, 0]
- der_comp = Emb._derivative_1(xtest)[:, :, 0]
+ der = Emb.derivative_1(xtest)[:, :, 0]
+ der_comp = Emb._derivative_1(xtest)[:, :, 0]
- print(torch.norm(der - der_comp))
+ print(torch.norm(der - der_comp))
- der = der @ GP.theta_mean()
- der_comp = der_comp @ GP.theta_mean()
+ der = der @ GP.theta_mean()
+ der_comp = der_comp @ GP.theta_mean()
- der2 = Emb.derivative_2(xtest)[:, :, 0, 0]
- der2_comp = Emb._derivative_2(xtest)[:, :, 0, 0]
+ der2 = Emb.derivative_2(xtest)[:, :, 0, 0]
+ der2_comp = Emb._derivative_2(xtest)[:, :, 0, 0]
- print(torch.norm(der2 - der2_comp))
+ print(torch.norm(der2 - der2_comp))
- der2 = der2 @ GP.theta_mean()
- der2_comp = der2_comp @ GP.theta_mean()
+ der2 = der2 @ GP.theta_mean()
+ der2_comp = der2_comp @ GP.theta_mean()
- plt.plot(xtest, mu)
- plt.plot(xtest, der)
- plt.plot(xtest, der_comp, '--')
- plt.plot(xtest, der2)
- plt.plot(xtest, der2_comp, '--')
- plt.plot(x, y, 'bo')
- plt.grid()
- plt.show()
+ plt.plot(xtest, mu)
+ plt.plot(xtest, der)
+ plt.plot(xtest, der_comp, "--")
+ plt.plot(xtest, der2)
+ plt.plot(xtest, der2_comp, "--")
+ plt.plot(x, y, "bo")
+ plt.grid()
+ plt.show()
diff --git a/stpy/embeddings/polynomial_embedding.py b/stpy/embeddings/polynomial_embedding.py
index eba9a74..07be852 100755
--- a/stpy/embeddings/polynomial_embedding.py
+++ b/stpy/embeddings/polynomial_embedding.py
@@ -6,7 +6,7 @@
__email__ = "mojmir.mutny@inf.ethz.ch"
__status__ = "DEV"
-"""
+r"""
This file implements a polynomial embedding
k(x,y) = \Phi(x)^\top \Phi(y)
for kernels of the form (x^\top y + 1)^p
@@ -41,174 +41,189 @@
from sklearn.preprocessing import PolynomialFeatures
-class CustomEmbedding():
- def __init__(self, d, embedding_function, m, groups=None, quadrature="fixed"):
- self.d = d
- self.groups = groups
- self.embedding_function = embedding_function
- self.m = m
- self.quadrature = quadrature
-
- def embed(self, x):
- return self.embedding_function(x)
-
- def get_m(self):
- return self.m
-
- def integral(self, S):
- varphi = torch.zeros(size=(self.m, 1)).double()
-
- if self.quadrature == "fixed":
- if S.d == 1:
- weights, nodes = S.return_legendre_discretization(n=512)
- Z = self.embed(nodes)
- varphi = torch.einsum('i,ij->j', weights, Z)
- return varphi.view(-1, 1)
- elif S.d == 2:
- weights, nodes = S.return_legendre_discretization(n=50)
- Z = self.embed(nodes)
- varphi = torch.einsum('i,ij->j', weights, Z)
- return varphi.view(-1, 1)
- else:
- if S.d == 1:
- for i in range(self.m):
- Fi = lambda x: self.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1).numpy()
- val, status = integrate.quad(Fi, float(S.bounds[0, 0]), float(S.bounds[0, 1]))
- varphi[i] = val
- elif S.d == 2:
- for i in range(self.m):
- Fi = lambda x: self.embed(x).view(-1)[i]
- integrand = lambda x, y: Fi(torch.Tensor([x, y]).view(1, 2).double()).numpy()
- val, status = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- lambda x: float(S.bounds[1, 0]),
- lambda x: float(S.bounds[1, 1]), epsabs=1.49e-03, epsrel=1.49e-03)
- varphi[i] = val
- return varphi
-
-
-class PolynomialEmbedding():
-
- def __init__(self, d, p, kappa=1., groups=None, include_bias=True):
- self.d = d
- self.p = p
- self.kappa = kappa
- self.groups = groups
- self.compute(include_bias=include_bias)
- self.include_bias = include_bias
-
- def compute(self, include_bias=True):
- self.poly = PolynomialFeatures(degree=self.p, include_bias=include_bias)
- if self.groups is None:
- self.poly.fit_transform(np.random.randn(1, self.d))
- self.degrees = torch.from_numpy(self.poly.powers_).double()
- self.size = self.degrees.size()[0]
- else:
- self.degrees = []
- self.size = 0
- self.sizes = []
- for group in self.groups:
- self.poly.fit_transform(np.random.randn(1, len(group)))
- z = torch.from_numpy(self.poly.powers_).double()
- self.degrees.append(z)
- self.sizes.append(z.size()[0])
- self.size += z.size()[0]
-
- def embed_group(self, x, j):
- (n, d) = x.size()
- x = x.view(n, -1)
- Phi = torch.zeros(size=(n, self.sizes[j]), dtype=torch.float64)
- group = self.groups[j]
- for i in range(n):
- y = x[i, :]
- z = y.view(1, len(group))
- Phi[i, :] = torch.prod(torch.pow(z, self.degrees[j]), dim=1).view(-1)
- return Phi
-
- def get_sub_indices(self, group):
- ind = []
- for index, elem in enumerate(self.degrees):
- z = torch.sum(elem[0:group - 2]) + torch.sum(elem[group + 1:])
- if (elem[group] >= 0.0) and (z <= 0.):
- ind.append(index)
- return ind
-
- def embed(self, x):
- (n, d) = x.size()
- # zero = torch.pow(x[0,:] * 0, self.degrees)
- Phi = torch.zeros(size=(n, self.size), dtype=torch.float64)
-
- if self.groups is None:
- for i in range(n):
- y = x[i, :]
- Phi[i, :] = torch.prod(torch.pow(y, self.degrees), dim=1)
- else:
- for i in range(n):
- y = x[i, :]
- for j, group in enumerate(self.groups):
- z = y[group].view(1, len(group))
- start = int(np.sum(self.sizes[0:j]))
- end = np.sum(self.sizes[0:j + 1])
- Phi[i, start:end] = torch.prod(torch.pow(z, self.degrees[j]), dim=1).view(-1)
- return np.sqrt(self.kappa) * Phi
-
- def derivative_1(self, x):
- pass
-
- def derivative_2(self, x):
- pass
-
-
-class ChebyschevEmbedding():
-
-
- def get_m(self):
- return self.m
-
- def __init__(self, d, p, groups=None, include_bias=True):
- self.d = d
- self.p = p
- self.groups = groups
- self.c = np.ones(self.p)
- self.poly = cheb.Chebyshev(self.c)
- self.size = self.p
- self.m = self.p
-
- def embed(self, x):
- out = np.zeros(shape=(int(x.size()[0]), self.p))
- z = None
- for p in np.arange(1, self.p + 1, 1):
- c = np.ones(p)
- if p > 1:
- zold = z
- z = cheb.chebval(x.numpy(), c)
- out[:, p - 1] = (z - zold).reshape(-1)
- else:
- z = cheb.chebval(x.numpy(), c)
- out[:, p - 1] = z.reshape(-1)
- return torch.from_numpy(out)
-
- def derivative_1(self, x):
- pass
-
- def derivative_2(self, x):
- pass
+class CustomEmbedding:
+ def __init__(self, d, embedding_function, m, groups=None, quadrature="fixed"):
+ self.d = d
+ self.groups = groups
+ self.embedding_function = embedding_function
+ self.m = m
+ self.quadrature = quadrature
+
+ def embed(self, x):
+ return self.embedding_function(x)
+
+ def get_m(self):
+ return self.m
+
+ def integral(self, S):
+ varphi = torch.zeros(size=(self.m, 1)).double()
+
+ if self.quadrature == "fixed":
+ if S.d == 1:
+ weights, nodes = S.return_legendre_discretization(n=512)
+ Z = self.embed(nodes)
+ varphi = torch.einsum("i,ij->j", weights, Z)
+ return varphi.view(-1, 1)
+ elif S.d == 2:
+ weights, nodes = S.return_legendre_discretization(n=50)
+ Z = self.embed(nodes)
+ varphi = torch.einsum("i,ij->j", weights, Z)
+ return varphi.view(-1, 1)
+ else:
+ if S.d == 1:
+ for i in range(self.m):
+ Fi = (
+ lambda x: self.embed(torch.from_numpy(np.array(x)).view(1, -1))
+ .view(-1)
+ .numpy()
+ )
+ val, status = integrate.quad(
+ Fi, float(S.bounds[0, 0]), float(S.bounds[0, 1])
+ )
+ varphi[i] = val
+ elif S.d == 2:
+ for i in range(self.m):
+ Fi = lambda x: self.embed(x).view(-1)[i]
+ integrand = lambda x, y: Fi(
+ torch.tensor([x, y]).view(1, 2).double()
+ ).numpy()
+ val, status = integrate.dblquad(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ lambda x: float(S.bounds[1, 0]),
+ lambda x: float(S.bounds[1, 1]),
+ epsabs=1.49e-03,
+ epsrel=1.49e-03,
+ )
+ varphi[i] = val
+ return varphi
+
+
+class PolynomialEmbedding:
+
+ def __init__(self, d, p, kappa=1.0, groups=None, include_bias=True):
+ self.d = d
+ self.p = p
+ self.kappa = kappa
+ self.groups = groups
+ self.compute(include_bias=include_bias)
+ self.include_bias = include_bias
+
+ def compute(self, include_bias=True):
+ self.poly = PolynomialFeatures(degree=self.p, include_bias=include_bias)
+ if self.groups is None:
+ self.poly.fit_transform(np.random.randn(1, self.d))
+ self.degrees = torch.from_numpy(self.poly.powers_).double()
+ self.size = self.degrees.size()[0]
+ else:
+ self.degrees = []
+ self.size = 0
+ self.sizes = []
+ for group in self.groups:
+ self.poly.fit_transform(np.random.randn(1, len(group)))
+ z = torch.from_numpy(self.poly.powers_).double()
+ self.degrees.append(z)
+ self.sizes.append(z.size()[0])
+ self.size += z.size()[0]
+
+ def embed_group(self, x, j):
+ (n, d) = x.size()
+ x = x.view(n, -1)
+ Phi = torch.zeros(size=(n, self.sizes[j]), dtype=torch.float64)
+ group = self.groups[j]
+ for i in range(n):
+ y = x[i, :]
+ z = y.view(1, len(group))
+ Phi[i, :] = torch.prod(torch.pow(z, self.degrees[j]), dim=1).view(-1)
+ return Phi
+
+ def get_sub_indices(self, group):
+ ind = []
+ for index, elem in enumerate(self.degrees):
+ z = torch.sum(elem[0 : group - 2]) + torch.sum(elem[group + 1 :])
+ if (elem[group] >= 0.0) and (z <= 0.0):
+ ind.append(index)
+ return ind
+
+ def embed(self, x):
+ (n, d) = x.size()
+ # zero = torch.pow(x[0,:] * 0, self.degrees)
+ Phi = torch.zeros(size=(n, self.size), dtype=torch.float64)
+
+ if self.groups is None:
+ for i in range(n):
+ y = x[i, :]
+ Phi[i, :] = torch.prod(torch.pow(y, self.degrees), dim=1)
+ else:
+ for i in range(n):
+ y = x[i, :]
+ for j, group in enumerate(self.groups):
+ z = y[group].view(1, len(group))
+ start = int(np.sum(self.sizes[0:j]))
+ end = np.sum(self.sizes[0 : j + 1])
+ Phi[i, start:end] = torch.prod(
+ torch.pow(z, self.degrees[j]), dim=1
+ ).view(-1)
+ return np.sqrt(self.kappa) * Phi
+
+ def derivative_1(self, x):
+ pass
+
+ def derivative_2(self, x):
+ pass
+
+
+class ChebyschevEmbedding:
+
+ def get_m(self):
+ return self.m
+
+ def __init__(self, d, p, groups=None, include_bias=True):
+ self.d = d
+ self.p = p
+ self.groups = groups
+ self.c = np.ones(self.p)
+ self.poly = cheb.Chebyshev(self.c)
+ self.size = self.p
+ self.m = self.p
+
+ def embed(self, x):
+ out = np.zeros(shape=(int(x.size()[0]), self.p))
+ z = None
+ for p in np.arange(1, self.p + 1, 1):
+ c = np.ones(p)
+ if p > 1:
+ zold = z
+ z = cheb.chebval(x.numpy(), c)
+ out[:, p - 1] = (z - zold).reshape(-1)
+ else:
+ z = cheb.chebval(x.numpy(), c)
+ out[:, p - 1] = z.reshape(-1)
+ return torch.from_numpy(out)
+
+ def derivative_1(self, x):
+ pass
+
+ def derivative_2(self, x):
+ pass
if __name__ == "__main__":
- d = 2
- p = 4
- emb = PolynomialEmbedding(d, p, groups=[[0], [1]])
- x1 = torch.randn(size=(1, d), dtype=torch.float64)
- x2 = torch.randn(size=(1, d), dtype=torch.float64)
- xc = torch.cat((x1, x2))
-
- print(emb.embed(x1).size())
- print(emb.embed(x2).size())
- print(emb.embed(xc).size())
-
- print("--------")
- emb = PolynomialEmbedding(d, p)
- print(emb.get_sub_indices(0))
+ d = 2
+ p = 4
+ emb = PolynomialEmbedding(d, p, groups=[[0], [1]])
+ x1 = torch.randn(size=(1, d), dtype=torch.float64)
+ x2 = torch.randn(size=(1, d), dtype=torch.float64)
+ xc = torch.cat((x1, x2))
+
+ print(emb.embed(x1).size())
+ print(emb.embed(x2).size())
+ print(emb.embed(xc).size())
+
+ print("--------")
+ emb = PolynomialEmbedding(d, p)
+ print(emb.get_sub_indices(0))
# d = 1
# emb = ChebyschevEmbedding(d,3)
# x1 = torch.randn(size = (1,d), dtype = torch.float64)
diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py
index 7899ad0..bef991b 100644
--- a/stpy/embeddings/positive_embedding.py
+++ b/stpy/embeddings/positive_embedding.py
@@ -1,7 +1,9 @@
+from typing import Optional
import cvxpy as cp
import mosek
import numpy as np
import scipy
+from stpy.kernels import KernelFunction
import torch
from stpy.borel_set import BorelSet
@@ -11,160 +13,246 @@
class PositiveEmbedding(Embedding):
- def __init__(self, d, m, kernel_object=None, interval=(-1, 1), B=1, b=0, s=0.001, offset=0.):
- self.d = d
- self.m = m
- self.b = b
- self.size = self.get_m()
- self.interval = interval
- if kernel_object is None:
- #self.kernel_object = KernelFunction()
- #self.kernel = lambda x, y: self.kernel_object.kernel(x, y)
- self.kernel = None
- else:
- self.kernel_object = kernel_object
- self.kernel = self.kernel_object.kernel
- self.B = B
- self.s = s
- self.offset = offset
-
- self.interval = (self.interval[0] - offset, self.interval[1] + offset)
-
- self.borel_set = BorelSet(d=1, bounds=torch.Tensor([[self.interval[0], self.interval[1]]]).double())
- self.mu = None
- self.precomp = False
- self.procomp_integrals = {}
-
- def get_size(self):
- return self.m ** self.d
-
- def integral(self, S):
- pass
-
- def basis_fun(self, x, j):
- pass
-
- def get_constraints(self):
- s = self.m ** self.d
- l = torch.from_numpy(np.full(s, self.b))
- u = torch.from_numpy(np.full(s, self.B))
- Lambda = torch.from_numpy(np.identity(s))
- return (l, Lambda, u)
-
- def cov(self, inverse=False):
- if self.precomp == False:
- dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
- t = self.interval[0] + torch.linspace(0, self.m - 1, self.m) * dm
-
- if self.d == 1:
- t = t.view(-1, 1).double()
- elif self.d == 2:
- t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double()
- elif self.d == 3:
- t = torch.from_numpy(cartesian([t.numpy(), t.numpy(), t.numpy()])).double()
- if self.kernel is not None:
- self.Gamma = self.kernel(t, t)
- Z = self.embed_internal(t)
- M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0]))
- self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy())))
- self.Gamma_half = torch.from_numpy(
- np.real(scipy.linalg.sqrtm(self.Gamma.numpy() + 1e-5 * (self.s ** 2) * np.eye(self.Gamma.size()[0]))))
- self.Gamma_half = self.M @ self.Gamma_half
- self.invGamma_half = torch.pinverse(self.Gamma_half)
- else:
- self.Gamma_half = torch.eye(self.m).double()
- self.precomp = True
- else:
- pass
-
- if inverse == True:
- return self.Gamma_half, self.invGamma_half
- else:
- return self.Gamma_half
-
- def embed_internal(self, x):
- if self.d == 1:
- out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64)
- for j in range(self.m):
- out[:, j] = self.basis_fun(x, j).view(-1)
- return out
-
- elif self.d == 2:
- phi_1 = torch.cat([self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)], dim=1)
- phi_2 = torch.cat([self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)], dim=1)
- n = x.size()[0]
- out = []
- for i in range(n):
- out.append(torch.from_numpy(np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy())).view(1, -1))
- out = torch.cat(out, dim=0)
- return out
- elif self.d == 3:
- phi_1 = torch.cat([self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)], dim=1)
- phi_2 = torch.cat([self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)], dim=1)
- phi_3 = torch.cat([self.basis_fun(x[:, 2].view(-1, 1), j) for j in range(0, self.m)], dim=1)
-
- n = x.size()[0]
- out = []
- for i in range(n):
- out.append(
- torch.from_numpy(np.kron(phi_3[i, :], np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy()))).view(1,
- -1))
- out = torch.cat(out, dim=0)
- return out
-
- def fit(self, x, y, already_embeded=False):
- m = self.get_m()
-
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov()
-
- if already_embeded == False:
- Phi = self.embed(x).numpy()
- else:
- Phi = x.numpy()
-
- xi = cp.Variable(m)
- obj = cp.Minimize(self.s ** 2 * cp.norm2(xi) + cp.sum_squares(Phi @ xi - y.numpy().reshape(-1)))
-
- constraints = []
- Lambda = Lambda @ Gamma_half.numpy()
- if not np.all(l == -np.inf):
- constraints.append(Lambda[l != -np.inf] @ xi >= l[l != -np.inf])
- if not np.all(u == np.inf):
- constraints.append(Lambda[u != np.inf] @ xi <= u[u != np.inf])
-
- prob = cp.Problem(obj, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False,
- verbose=False, mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual})
-
- if prob.status != "optimal":
- raise ValueError('cannot compute the mode')
-
- mode = xi.value
- self.mode = torch.from_numpy(mode).view(-1, 1)
- self.mu = self.mode
- return mode
-
- def embed(self, x):
- Gamma_half = self.cov()
- return self.embed_internal(x) @ Gamma_half
-
- def mean(self, xtest):
- embeding = self.embed(xtest)
- mean = embeding @ self.mu
- return mean
-
- def mean_std(self, xtest):
- embeding = self.embed(xtest)
- mean = embeding @ self.mu
- return mean, None
-
- def sample_theta(self):
- self.mu = torch.randn(size=(self.get_m(), 1))
- return self.mu
-
- def sample(self, xtest, size=1):
- return self.embed(xtest) @ self.sample_theta()
-
- def get_m(self):
- return self.m ** self.d
+ def __init__(
+ self,
+ d,
+ m,
+ kernel_object: Optional[KernelFunction] = None,
+ interval=(-1, 1),
+ B=1.0,
+ b=0.0,
+ s=0.001,
+ offset=0.0,
+ ):
+ """
+
+ Parameters
+ ----------
+ d
+ Dimension of the embedding
+ m
+ Number of basis functions
+ b, optional
+ Minimal value of the rate function, by default 0
+ B, optional
+ Maximal value of the rate function, by default 1
+ """
+ self.d = d
+ """ Dimension of the embedding """
+ self.m = m
+ """ Number of basis functions """
+ self.b = b
+ """ Minimal value of the rate function """
+ self.size = self.get_m()
+ """ Number of basis functions times number of dimensions """
+ self.interval = interval
+ if kernel_object is None:
+ # self.kernel_object = KernelFunction()
+ # self.kernel = lambda x, y: self.kernel_object.kernel(x, y)
+ self.kernel = None
+ else:
+ self.kernel_object = kernel_object
+ self.kernel = self.kernel_object.kernel
+ self.B = B
+ self.s = s
+ self.offset = offset
+
+ self.interval = (self.interval[0] - offset, self.interval[1] + offset)
+
+ self.borel_set = BorelSet(
+ d=1, bounds=torch.tensor([[self.interval[0], self.interval[1]]]).double()
+ )
+ self.mu = None
+ self.precomp = False
+ self.precomp_integral = {}
+
+ def get_size(self):
+ return self.m**self.d
+
+ def integral(self, S) -> torch.Tensor:
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def basis_fun(self, x, j):
+ r"""
+ Return the value of basis function \phi_j(x)
+
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: \phi_j(x)
+ """
+ pass
+
+ def get_constraints(self):
+ s = self.m**self.d
+ l = torch.tensor(np.full(s, self.b))
+ u = torch.tensor(np.full(s, self.B))
+ Lambda = torch.tensor(np.identity(s))
+ return (l, Lambda, u)
+
+ def cov(self, inverse=False):
+ r"""Should return $\Gamma^T = \sqrt{V^{-1} K V^{-1}}^T$
+
+ $\sqrt{(V^TV)^* \cdot K}$ where $V_{ij} = \phi_i(t_j)$ and
+ $K_{ij} = k(t_i, t_j)$ and the $t_i$ are equally spaced grid points
+ in the cartesian product set $i^d$ where i is `self.interval`
+
+ """
+ if self.precomp == False:
+ dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m
+ t = self.interval[0] + torch.linspace(0, self.m - 1, self.m) * dm
+
+ if self.d == 1:
+ t = t.view(-1, 1).double()
+ elif self.d == 2:
+ t = torch.tensor(cartesian([t.cpu().numpy(), t.cpu().numpy()])).double()
+ elif self.d == 3:
+ t = torch.tensor(
+ cartesian([t.cpu().numpy(), t.cpu().numpy(), t.cpu().numpy()])
+ ).double()
+ if self.kernel is not None:
+ self.Gamma = self.kernel(t, t)
+ Z = self.embed_internal(t)
+ M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0]))
+ self.M = torch.tensor(np.real(scipy.linalg.sqrtm(M.cpu().numpy())))
+ self.Gamma_half = torch.tensor(
+ np.real(
+ scipy.linalg.sqrtm(
+ self.Gamma.cpu().numpy()
+ + 1e-5 * (self.s**2) * np.eye(self.Gamma.size()[0])
+ )
+ )
+ )
+ self.Gamma_half = self.M @ self.Gamma_half
+ self.invGamma_half = torch.pinverse(self.Gamma_half)
+ else:
+ self.Gamma_half = torch.eye(self.m).double()
+ self.precomp = True
+ else:
+ pass
+
+ if inverse == True:
+ return self.Gamma_half, self.invGamma_half
+ else:
+ return self.Gamma_half
+
+ def embed_internal(self, x):
+ r"""Returns a tensor $T$ where $T_{i,j} = \phi_j(x_i)$."""
+ if self.d == 1:
+ out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64)
+ for j in range(self.m):
+ out[:, j] = self.basis_fun(x, j).view(-1)
+ return out
+
+ elif self.d == 2:
+ phi_1 = torch.cat(
+ [self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)],
+ dim=1,
+ )
+ phi_2 = torch.cat(
+ [self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)],
+ dim=1,
+ )
+ n = x.size()[0]
+ out = []
+ for i in range(n):
+ out.append(
+ torch.tensor(
+ np.kron(phi_1[i, :].cpu().numpy(), phi_2[i, :].cpu().numpy()),
+ ).view(1, -1)
+ )
+ out = torch.cat(out, dim=0)
+ return out
+ elif self.d == 3:
+ phi_1 = torch.cat(
+ [self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)],
+ dim=1,
+ )
+ phi_2 = torch.cat(
+ [self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)],
+ dim=1,
+ )
+ phi_3 = torch.cat(
+ [self.basis_fun(x[:, 2].view(-1, 1), j) for j in range(0, self.m)],
+ dim=1,
+ )
+
+ n = x.size()[0]
+ out = []
+ for i in range(n):
+ out.append(
+ torch.tensor(
+ np.kron(
+ phi_3[i, :],
+ np.kron(
+ phi_1[i, :].cpu().numpy(), phi_2[i, :].cpu().numpy()
+ ),
+ )
+ ).view(1, -1)
+ )
+ out = torch.cat(out, dim=0)
+ return out
+
+ def fit(self, x, y, already_embeded=False):
+ m = self.get_m()
+
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov()
+
+ if already_embeded == False:
+ Phi = self.embed(x).numpy()
+ else:
+ Phi = x.cpu().numpy()
+
+ xi = cp.Variable(m)
+ obj = cp.Minimize(
+ self.s**2 * cp.norm2(xi)
+ + cp.sum_squares(Phi @ xi - y.cpu().numpy().reshape(-1))
+ )
+
+ constraints = []
+ Lambda = Lambda @ Gamma_half.cpu().numpy()
+ if not np.all(l == -np.inf):
+ constraints.append(Lambda[l != -np.inf] @ xi >= l[l != -np.inf])
+ if not np.all(u == np.inf):
+ constraints.append(Lambda[u != np.inf] @ xi <= u[u != np.inf])
+
+ prob = cp.Problem(obj, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual},
+ )
+
+ if prob.status != "optimal":
+ raise ValueError("cannot compute the mode")
+
+ mode = xi.value
+ self.mode = torch.tensor(mode).view(-1, 1)
+ self.mu = self.mode
+ return mode
+
+ def embed(self, x):
+ r"""Calculates $\Phi(x)^T = \phi(x)^T \Gamma^T$"""
+ Gamma_half = self.cov()
+ return self.embed_internal(x) @ Gamma_half
+
+ def mean(self, xtest):
+ embeding = self.embed(xtest)
+ mean = embeding @ self.mu
+ return mean
+
+ def mean_std(self, xtest):
+ embeding = self.embed(xtest)
+ mean = embeding @ self.mu
+ return mean, None
+
+ def sample_theta(self):
+ self.mu = torch.randn(size=(self.get_m(), 1))
+ return self.mu
+
+ def sample(self, xtest, size=1):
+ return self.embed(xtest) @ self.sample_theta()
+
+ def get_m(self):
+ return self.m**self.d
diff --git a/stpy/embeddings/random_nn.py b/stpy/embeddings/random_nn.py
index bf5c57a..b94ae50 100755
--- a/stpy/embeddings/random_nn.py
+++ b/stpy/embeddings/random_nn.py
@@ -5,184 +5,216 @@
class RandomMap(nn.Module):
- def __init__(self, d, m, fun, output=2):
- super(RandomMap, self).__init__()
- self.W = torch.normal(mean=torch.zeros(m, d, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.W.requires_grad_(True)
- self.w = torch.normal(mean=torch.zeros(m, output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.w.requires_grad_(True)
- self.b = torch.normal(mean=torch.zeros(output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.b.requires_grad_(True)
- self.fun = fun
- self.output = output
-
- def map(self, x):
- y = self.fun(torch.mm(self.W, torch.t(x)))
- return y
-
- def forward(self, x):
- z = self.map(x)
- z = torch.mm(torch.t(z), self.w)
- return z
-
- def get_params(self):
- return [self.W, self.w]
-
- def get_params_last(self):
- return [self.w]
-
- def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1):
- criterion = nn.MSELoss()
-
- import torch.optim as optim
- optimizer = optim.SGD([self.W, self.w], lr=lr)
-
- batch_size = 100
-
- for i in range(epochs):
- for j in range(x.size()[0] // batch_size):
- optimizer.zero_grad() # zero the gradient buffers
- output = self.forward(x[j * batch_size:(j + 1) * batch_size])
- loss = criterion(output, y[j * batch_size:(j + 1) * batch_size])
- loss.backward(retain_graph=True)
- optimizer.step() # Does the update
-
- if verbose == True or i % verbose == 0:
- output = self.forward(x)
- loss_full = criterion(output, y)
- print(i, loss_full)
- optimizer.step() # Does the update
-
- def fit_map_lasso(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1, l1=0.1):
- criterion = nn.MSELoss()
-
- import torch.optim as optim
- optimizer = optim.SGD([self.W, self.w], lr=lr)
-
- batch_size = 100
-
- for i in range(epochs):
- for j in range(x.size()[0] // batch_size):
- optimizer.zero_grad() # zero the gradient buffers
- output = self.forward(x[j * batch_size:(j + 1) * batch_size])
- loss = criterion(output, y[j * batch_size:(j + 1) * batch_size]) + l1 * torch.norm(self.W, 2)
- loss.backward(retain_graph=True)
- optimizer.step() # Does the update
-
- if verbose == True or i % verbose == 0:
- output = self.forward(x)
- loss_full = criterion(output, y)
- print(i, loss_full)
- optimizer.step() # Does the update
-
- def loss(self, x, y):
- criterion = nn.MSELoss()
- output = self.forward(x)
- loss = criterion(output, y)
-
- return loss
-
- def fit_last_layer(self):
- # same as before but different parameters
- pass
+ def __init__(self, d, m, fun, output=2):
+ super(RandomMap, self).__init__()
+ self.W = torch.normal(
+ mean=torch.zeros(m, d, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2
+ )
+ self.W.requires_grad_(True)
+ self.w = torch.normal(
+ mean=torch.zeros(m, output, dtype=torch.float64),
+ std=1.0 / np.sqrt(d * m) ** 2,
+ )
+ self.w.requires_grad_(True)
+ self.b = torch.normal(
+ mean=torch.zeros(output, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2
+ )
+ self.b.requires_grad_(True)
+ self.fun = fun
+ self.output = output
+
+ def map(self, x):
+ y = self.fun(torch.mm(self.W, torch.t(x)))
+ return y
+
+ def forward(self, x):
+ z = self.map(x)
+ z = torch.mm(torch.t(z), self.w)
+ return z
+
+ def get_params(self):
+ return [self.W, self.w]
+
+ def get_params_last(self):
+ return [self.w]
+
+ def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1):
+ criterion = nn.MSELoss()
+
+ import torch.optim as optim
+
+ optimizer = optim.SGD([self.W, self.w], lr=lr)
+
+ batch_size = 100
+
+ for i in range(epochs):
+ for j in range(x.size()[0] // batch_size):
+ optimizer.zero_grad() # zero the gradient buffers
+ output = self.forward(x[j * batch_size : (j + 1) * batch_size])
+ loss = criterion(output, y[j * batch_size : (j + 1) * batch_size])
+ loss.backward(retain_graph=True)
+ optimizer.step() # Does the update
+
+ if verbose == True or i % verbose == 0:
+ output = self.forward(x)
+ loss_full = criterion(output, y)
+ print(i, loss_full)
+ optimizer.step() # Does the update
+
+ def fit_map_lasso(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1, l1=0.1):
+ criterion = nn.MSELoss()
+
+ import torch.optim as optim
+
+ optimizer = optim.SGD([self.W, self.w], lr=lr)
+
+ batch_size = 100
+
+ for i in range(epochs):
+ for j in range(x.size()[0] // batch_size):
+ optimizer.zero_grad() # zero the gradient buffers
+ output = self.forward(x[j * batch_size : (j + 1) * batch_size])
+ loss = criterion(
+ output, y[j * batch_size : (j + 1) * batch_size]
+ ) + l1 * torch.norm(self.W, 2)
+ loss.backward(retain_graph=True)
+ optimizer.step() # Does the update
+
+ if verbose == True or i % verbose == 0:
+ output = self.forward(x)
+ loss_full = criterion(output, y)
+ print(i, loss_full)
+ optimizer.step() # Does the update
+
+ def loss(self, x, y):
+ criterion = nn.MSELoss()
+ output = self.forward(x)
+ loss = criterion(output, y)
+
+ return loss
+
+ def fit_last_layer(self):
+ # same as before but different parameters
+ pass
class SpecificMap(RandomMap):
- def __init__(self, d, m, fun, map, output=2):
- super(SpecificMap, self).__init__(d, m, fun, output=2)
- self.map = map
+ def __init__(self, d, m, fun, map, output=2):
+ super(SpecificMap, self).__init__(d, m, fun, output=2)
+ self.map = map
- def forward(self, x):
- z = self.map(x)
- z = torch.mm(torch.t(z), self.w)
- return z
+ def forward(self, x):
+ z = self.map(x)
+ z = torch.mm(torch.t(z), self.w)
+ return z
- def get_params(self):
- return [self.w]
+ def get_params(self):
+ return [self.w]
def RandomMapStacked(RandomMap):
- def __init__(self, d, m, fun, output=2):
- super(RandomMap, self).__init__()
- self.W = torch.normal(mean=torch.zeros(m, d, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.W.requires_grad_(True)
- self.w = torch.normal(mean=torch.zeros(m, output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.w.requires_grad_(True)
- self.b = torch.normal(mean=torch.zeros(m, 1, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.b.requires_grad_(True)
- self.fun = fun
- self.output = output
-
- def map(self, x):
- y = self.fun(torch.mm(self.W, torch.t(x)) + self.b)
- return y
-
- def fit_map(self, x, y):
- pass
+ def __init__(self, d, m, fun, output=2):
+ super(RandomMap, self).__init__()
+ self.W = torch.normal(
+ mean=torch.zeros(m, d, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2
+ )
+ self.W.requires_grad_(True)
+ self.w = torch.normal(
+ mean=torch.zeros(m, output, dtype=torch.float64),
+ std=1.0 / np.sqrt(d * m) ** 2,
+ )
+ self.w.requires_grad_(True)
+ self.b = torch.normal(
+ mean=torch.zeros(m, 1, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2
+ )
+ self.b.requires_grad_(True)
+ self.fun = fun
+ self.output = output
+
+ def map(self, x):
+ y = self.fun(torch.mm(self.W, torch.t(x)) + self.b)
+ return y
+
+ def fit_map(self, x, y):
+ pass
class RandomOrthogonalMap(RandomMap):
- def __init__(self, d, m, fun, output=1):
- super(RandomMap, self).__init__()
- self.m = m
+ def __init__(self, d, m, fun, output=1):
+ super(RandomMap, self).__init__()
+ self.m = m
- self.R = torch.normal(mean=torch.zeros(m, d, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.R = nn.init.orthogonal_(self.R)
- self.R.requires_grad_(True)
+ self.R = torch.normal(
+ mean=torch.zeros(m, d, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2
+ )
+ self.R = nn.init.orthogonal_(self.R)
+ self.R.requires_grad_(True)
- self.w = torch.normal(mean=torch.zeros(m, output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2)
- self.w.requires_grad_(True)
+ self.w = torch.normal(
+ mean=torch.zeros(m, output, dtype=torch.float64),
+ std=1.0 / np.sqrt(d * m) ** 2,
+ )
+ self.w.requires_grad_(True)
- self.fun = fun
- self.output = output
+ self.fun = fun
+ self.output = output
- def map(self, x):
- y = self.fun(torch.mm(self.R, torch.t(x)))
- return y
+ def map(self, x):
+ y = self.fun(torch.mm(self.R, torch.t(x)))
+ return y
- def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1):
- criterion = nn.MSELoss()
+ def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1):
+ criterion = nn.MSELoss()
- import torch.optim as optim
+ import torch.optim as optim
- optimizer = optim.SGD([self.R, self.w], lr=lr)
- orth_loss = torch.norm(torch.mm(self.R, torch.t(self.R)) - torch.eye(self.m, self.m, dtype=torch.float64)) ** 2
+ optimizer = optim.SGD([self.R, self.w], lr=lr)
+ orth_loss = (
+ torch.norm(
+ torch.mm(self.R, torch.t(self.R))
+ - torch.eye(self.m, self.m, dtype=torch.float64)
+ )
+ ** 2
+ )
- batch_size = 100
+ batch_size = 100
- for i in range(epochs):
- for j in range(x.size()[0] // batch_size):
- optimizer.zero_grad() # zero the gradient buffers
- output = self.forward(x[j * batch_size:(j + 1) * batch_size])
- loss = criterion(output, y[j * batch_size:(j + 1) * batch_size]) + reg * orth_loss
- loss.backward(retain_graph=True)
- optimizer.step() # Does the update
+ for i in range(epochs):
+ for j in range(x.size()[0] // batch_size):
+ optimizer.zero_grad() # zero the gradient buffers
+ output = self.forward(x[j * batch_size : (j + 1) * batch_size])
+ loss = (
+ criterion(output, y[j * batch_size : (j + 1) * batch_size])
+ + reg * orth_loss
+ )
+ loss.backward(retain_graph=True)
+ optimizer.step() # Does the update
- if verbose == True or i % verbose == 0:
- output = self.forward(x)
- loss_full = criterion(output, y) + reg * orth_loss
- print(i, loss_full)
+ if verbose == True or i % verbose == 0:
+ output = self.forward(x)
+ loss_full = criterion(output, y) + reg * orth_loss
+ print(i, loss_full)
-class RandomNestedMap():
+class RandomNestedMap:
- def __init__(self):
- pass
+ def __init__(self):
+ pass
if __name__ == "__main__":
- ridge = lambda x: torch.tanh(x)
+ ridge = lambda x: torch.tanh(x)
- N = 1000
- d = 10
- m = 2
+ N = 1000
+ d = 10
+ m = 2
- NetOriginal = RandomMap(d, m, ridge)
+ NetOriginal = RandomMap(d, m, ridge)
- x = 10 * torch.normal(mean=torch.zeros(N, d, dtype=torch.float64) + 2, std=100.)
- y = NetOriginal.forward(x)
+ x = 10 * torch.normal(mean=torch.zeros(N, d, dtype=torch.float64) + 2, std=100.0)
+ y = NetOriginal.forward(x)
- Net = RandomMap(d, m, ridge)
- Net.fit_map(x, y)
+ Net = RandomMap(d, m, ridge)
+ Net.fit_map(x, y)
diff --git a/stpy/embeddings/transformations.py b/stpy/embeddings/transformations.py
index 84f58a0..75cc74b 100755
--- a/stpy/embeddings/transformations.py
+++ b/stpy/embeddings/transformations.py
@@ -9,66 +9,68 @@
class Transformation(Embedding):
- def __init__(self):
- pass
+ def __init__(self):
+ pass
- def embed(self, x):
- pass
+ def embed(self, x):
+ pass
- def linear_embedding(self):
- embed = lambda x: x
- return embed
+ def linear_embedding(self):
+ embed = lambda x: x
+ return embed
- def create_polynomial_embeding(self, degree, d, kappa=1., bias=False):
- """
- create polynomial embeding
+ def create_polynomial_embeding(self, degree, d, kappa=1.0, bias=False):
+ """
+ create polynomial embeding
- :param degree:
- :param d:
- :return:
- """
- m = int(comb(degree + d - 1, degree - 1)) + int(bias)
- poly = PolynomialFeatures(degree, include_bias=bias)
- embed = lambda x: kappa * torch.from_numpy(poly.fit_transform(x.numpy()))
- return embed, m
- return (nodes, weights)
+ :param degree:
+ :param d:
+ :return:
+ """
+ m = int(comb(degree + d - 1, degree - 1)) + int(bias)
+ poly = PolynomialFeatures(degree, include_bias=bias)
+ embed = lambda x: kappa * torch.from_numpy(poly.fit_transform(x.numpy()))
+ return embed, m
+ return (nodes, weights)
- def embed(self, x):
- (times, d) = tuple(x.size())
- # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
- z = torch.zeros(self.m, times, dtype=x.dtype)
- q = torch.mm(self.W[:, 0:d], torch.t(x))
- z[0:int(self.m / 2), :] = torch.cos(q)
- z[int(self.m / 2):self.m, :] = torch.sin(q)
- return torch.t(z)
+ def embed(self, x):
+ (times, d) = tuple(x.size())
+ # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype))
+ z = torch.zeros(self.m, times, dtype=x.dtype)
+ q = torch.mm(self.W[:, 0:d], torch.t(x))
+ z[0 : int(self.m / 2), :] = torch.cos(q)
+ z[int(self.m / 2) : self.m, :] = torch.sin(q)
+ return torch.t(z)
- def create_fourier_embeding(self, cutoff, d, domain, bias=False):
- self.m = 2 * cutoff - 2 * int(bias)
- self.d = d
- omegas = np.arange(int(bias), cutoff, 1) * 2. * np.pi / (2 * domain)
- print(omegas)
- v = [omegas for omega in range(self.d)]
- self.W = torch.from_numpy(helper.cartesian(v))
- embed = lambda x: self.embed(x)
- return embed, self.m
+ def create_fourier_embeding(self, cutoff, d, domain, bias=False):
+ self.m = 2 * cutoff - 2 * int(bias)
+ self.d = d
+ omegas = np.arange(int(bias), cutoff, 1) * 2.0 * np.pi / (2 * domain)
+ print(omegas)
+ v = [omegas for omega in range(self.d)]
+ self.W = torch.from_numpy(helper.cartesian(v))
+ embed = lambda x: self.embed(x)
+ return embed, self.m
- def create_cosine_embeding(self, cutoff, d, domain):
- self.m = cutoff
- self.d = d
- omegas = np.arange(0, cutoff, 1) * 2. * np.pi / (2 * domain)
- print(omegas)
- v = [omegas for omega in range(self.d)]
- self.W = torch.from_numpy(helper.cartesian(v))
- embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x))))
- return embed, self.m
+ def create_cosine_embeding(self, cutoff, d, domain):
+ self.m = cutoff
+ self.d = d
+ omegas = np.arange(0, cutoff, 1) * 2.0 * np.pi / (2 * domain)
+ print(omegas)
+ v = [omegas for omega in range(self.d)]
+ self.W = torch.from_numpy(helper.cartesian(v))
+ embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x))))
+ return embed, self.m
- def create_cosine_power_embeding(self, cutoff, d, domain):
- self.m = cutoff + 1
- self.d = d
- print(np.logspace(0, cutoff, num=cutoff + 1, base=2))
- omegas = np.logspace(0, cutoff, num=cutoff + 1, base=2) * 2. * np.pi / (2 * domain)
- print(omegas)
- v = [omegas for omega in range(self.d)]
- self.W = torch.from_numpy(helper.cartesian(v))
- embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x))))
- return embed, self.m
+ def create_cosine_power_embeding(self, cutoff, d, domain):
+ self.m = cutoff + 1
+ self.d = d
+ print(np.logspace(0, cutoff, num=cutoff + 1, base=2))
+ omegas = (
+ np.logspace(0, cutoff, num=cutoff + 1, base=2) * 2.0 * np.pi / (2 * domain)
+ )
+ print(omegas)
+ v = [omegas for omega in range(self.d)]
+ self.W = torch.from_numpy(helper.cartesian(v))
+ embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x))))
+ return embed, self.m
diff --git a/stpy/embeddings/triangle_base.py b/stpy/embeddings/triangle_base.py
new file mode 100644
index 0000000..4b040ad
--- /dev/null
+++ b/stpy/embeddings/triangle_base.py
@@ -0,0 +1,85 @@
+import numpy as np
+import scipy
+import torch
+
+from stpy.borel_set import BorelSet
+from stpy.continuous_processes.nystrom_fea import NystromFeatures
+from stpy.embeddings.positive_embedding import PositiveEmbedding
+from stpy.kernels import KernelFunction
+
+
+class EfficientTriangleEmbedding(PositiveEmbedding):
+
+ def __init__(self, *args, **kwargs):
+
+ super().__init__(*args, **kwargs)
+
+ self._t = torch.linspace(
+ self.interval[0], self.interval[1], steps=self.m, dtype=torch.float64
+ )
+ self._dm = (self.interval[1] - self.interval[0]) / (self.m - 1)
+
+ def basis_fun(self, x: torch.Tensor, j: int):
+ r"""
+ Return the value of 1d basis function $\phi_{j}$
+ over all dimensions of x
+
+ :param x: double, need to be in the interval
+ :param j: integer, index of hat functions, 0 <= j <= m-1
+ :return: $\{\phi_j(x_1), \ldots, \phi_j(x_n)}$
+ """
+ res = torch.clamp(1 - torch.abs((x - self._t[j]) / self._dm), min=0)
+ return res
+
+ def integrate_1d(self, a: torch.Tensor, b: torch.Tensor, t: torch.Tensor):
+ """
+ :param l: from
+ :param u: to
+ :param t: tensor of triangle centers
+ :return: 1d integral over triangle basis functions given by centers and self.dm
+ """
+
+ def rising_integral(x):
+ return (x - t + self._dm) ** 2 / (self._dm * 2.0)
+
+ def falling_integral(x):
+ return -((x - t - self._dm) ** 2) / (self._dm * 2.0)
+
+ i = rising_integral(torch.clamp(b, t - self._dm, t)) - rising_integral(
+ torch.clamp(a, t - self._dm, t)
+ )
+ i += falling_integral(torch.clamp(b, t, t + self._dm)) - falling_integral(
+ torch.clamp(a, t, t + self._dm)
+ )
+
+ return i
+
+ def integral(self, S):
+ r"""
+ Integrate the Phi(x) over S
+ :param S: borel set
+ :return: $\int_S \Phi(x) dx$
+ """
+ if S in self.precomp_integral.keys():
+ return self.precomp_integral[S]
+
+ else:
+ assert S.d == self.d
+ psi = torch.ones(self.m).double()
+ if S.type == "box":
+ psi = torch.tensor([1.0]).double()
+ for i in range(self.d):
+ a, b = S.bounds[i, 0].double(), S.bounds[i, 1].double()
+ p = self.integrate_1d(a, b, self._t)
+ # multiply each with each element and flatten
+ psi = torch.outer(psi, p).flatten()
+
+ elif S.type == "round":
+ weights, nodes = S.return_legendre_discretization(30)
+ vals = self.embed_internal(nodes)
+ psi = weights.view(1, -1) @ vals
+
+ Gamma_half = self.cov()
+ emb = psi @ Gamma_half
+ self.precomp_integral[S] = emb
+ return emb
diff --git a/stpy/embeddings/weighted_embedding.py b/stpy/embeddings/weighted_embedding.py
index 0c03c99..6ff5a85 100644
--- a/stpy/embeddings/weighted_embedding.py
+++ b/stpy/embeddings/weighted_embedding.py
@@ -5,11 +5,7 @@
class WeightedEmbedding(Embedding):
- def __init__(self,
- embedding: Embedding,
- weights = None,
- weight_function = None
- ):
+ def __init__(self, embedding: Embedding, weights=None, weight_function=None):
self.base_embedding = embedding
self.m = self.base_embedding.get_m()
self.weights = weights
@@ -29,8 +25,3 @@ def embed(self, xtest):
return Phi @ np.diag(self.weights)
else:
return Phi @ np.diag(self.weight_function(self.base_embedding))
-
-
-
-
-
diff --git a/stpy/estimator.py b/stpy/estimator.py
index ec00812..d107bf4 100755
--- a/stpy/estimator.py
+++ b/stpy/estimator.py
@@ -12,632 +12,902 @@
from stpy.helpers import helper
from stpy.optim.custom_optimizers import bisection
+
class Estimator(ABC):
- def fit(self):
- pass
-
- @abstractmethod
- def ucb(self, x):
- pass
-
- @abstractmethod
- def lcb(self, x):
- pass
-
- def load_data(self,d):
- self.x = d[0]
- self.y = d[1]
-
- def log_marginal(self, kernel, X, weight):
- func = kernel.get_kernel()
- K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.s * self.s
- L = torch.linalg.cholesky(K)
- logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) * weight
- alpha = torch.cholesky_solve(self.y, L)
- logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet
- logprob = -logprob
- return logprob
-
- def optimize_params_general(self, params={}, restarts=2,
- optimizer="pymanopt", maxiter=1000,
- mingradnorm=1e-4, regularizer_func=None,
- verbose=False, scale=1., weight=1., save = False,
- save_name = 'model.np', parallel = False, cores = None):
- """
-
- :param params:
- :param restarts:
- :param optimizer:
- :param maxiter:
- :param mingradnorm:
- :param regularizer_func:
- :param verbose:
- :return:
- """
- manifolds = []
- bounds = []
- init_values = []
-
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- init_value, manifold, bound = value
- manifolds.append(manifold)
- bounds.append(bound)
- init_values.append(init_value)
-
- if optimizer == "pymanopt":
-
- manifold = Product(tuple(manifolds))
-
- @pymanopt.function.pytorch(manifold)
- def cost(*args):
- # print (args)
- input_dict = {}
- i = 0
- for key, dict_params in params.items():
- small_param = {}
- for var_name, value in dict_params.items():
- small_param[var_name] = args[i]
- i = i + 1
- input_dict[key] = small_param
-
- if regularizer_func is not None:
- f = self.log_marginal(self.kernel_object, input_dict, weight) + regularizer_func(args)
- else:
- f = self.log_marginal(self.kernel_object, input_dict, weight)
- return f
-
- problem = pymanopt.Problem(manifold, cost=cost)
- solver = SteepestDescent(verbosity = verbose , max_iterations=maxiter, min_gradient_norm=mingradnorm)
-
- # get initial point
- objective_values = []
- objective_params = []
-
- for rep in range(restarts):
- x_init = []
- for index, man in enumerate(manifolds):
- if init_values[index] is None:
- x_sub = man.random_point() * scale
- else:
- x_sub = np.array([init_values[index]])
- x_init.append(x_sub)
- # try:
- res = solver.run(problem, initial_point=x_init)
-
- objective_params.append(res.point)
- objective_values.append(res.cost)#log['final_values']['f(x)'])
- # except Exception as e:
- # print (e)
- # print ("Optimization restart failed:", x_init)
- # pick the smallest objective
- best_index = np.argmin(objective_values)
- x_opt = [torch.from_numpy(j) for j in objective_params[best_index]]
-
- elif optimizer == "scipy":
- cost_numpy = lambda x: cost(x).detach.numpy()
- egrad_numpy = lambda x: egrad(x).detach().numpy()
-
- elif optimizer == "bisection":
-
- def cost(x):
- input_dict = self.kernel_object.params_dict
- counter = 0
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- input_dict[key][var_name] = x
- counter += 1
-
- if regularizer_func is not None:
- f = self.log_marginal(self.kernel_object, input_dict, weight) + regularizer_func(x)
- else:
- f = self.log_marginal(self.kernel_object, input_dict, weight)
- return f
-
- a,b = bounds[0]
- x_opt = [bisection(cost,a,b,100)]
-
- elif optimizer == "pytorch-minimize":
- var_names = []
- dims = [0,]
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- init_value, manifold, bound = value
-
- manifolds.append(manifold)
- bounds.append(bound)
- init_values.append(init_value)
- var_names.append(var_name)
- dims.append(manifold.dim)
-
- dims = np.cumsum(dims).astype(int)
-
- def cost(x):
- input_dict = self.kernel_object.params_dict
- counter = 0
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- if key != "likelihood":
- input_dict[key][var_name] = x[dims[counter]:dims[counter+1]]
- else:
- self.s = x[dims[counter]:dims[counter+1]]
- counter += 1
-
- if regularizer_func is not None:
- f = self.log_marginal(self.kernel_object, input_dict, weight) + regularizer_func(x)
- else:
- f = self.log_marginal(self.kernel_object, input_dict, weight)
- return f
-
- objective_values = []
- objective_params = []
- x_opt = []
-
- dim = dims[-1]
- self.prepared_log_marginal = False
- for rep in range(restarts):
- #try:
- if init_values[0] is None:
- x_init = torch.randn(size=(dim, 1)).double().view(-1)**2 * scale
- else:
- x_init = init_values[0](dim)
-
- if bounds[0] is None:
- res = minimize_torch(cost, x_init, method='l-bfgs', tol=1e-10, disp=verbose + 1,
- options={'max_iter': maxiter, 'gtol':mingradnorm})
- objective_params.append(res.x)
- objective_values.append(res.fun)
- else:
- print ("Constrained optimization with bounds", bounds[0])
- res = minimize(cost, x_init.numpy(), backend='torch', method='L-BFGS-B',
- bounds=bounds[0], precision='float64', tol=1e-8,
- options={'ftol': 1e-10,
- 'gtol': mingradnorm, 'eps': 1e-08,
- 'maxfun': 15000, 'maxiter': maxiter,
- 'maxls': 20, 'disp' : verbose + 1})
-
- objective_params.append(torch.from_numpy(res.x))
- objective_values.append(torch.from_numpy(res.fun))
- #except Exception as e:
- # print(e)
- # save models
-
- if save:
- vals = {'params': objective_params,
- 'evidence':objective_values,
- 'repeats':restarts,
- 'dim':dims,
- 'param_names':params}
-
- with open(save_name, 'wb') as f:
- pickle.dump(vals, f)
-
-
- best_index = np.argmin(objective_values)
-
- counter = 0
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- x_opt.append(objective_params[best_index][dims[counter]:dims[counter+1]])
- counter += 1
-
- elif optimizer == "discrete":
- values = []
- configurations = manifolds[0]
- for config in manifolds[0]:
- values.append(cost(config))
-
- best_index = np.argmin(values)
- x_opt = [configurations[best_index]]
- else:
- raise AssertionError("Optimizer not implemented.")
-
- # put back into default dic
- i = 0
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- if key == "likelihood":
- self.s = x_opt[i]
-
- else:
- self.kernel_object.params_dict[key][var_name] = x_opt[i]
- i = i + 1
-
- # print ("--------- Finished. ------------")
- # print (self.kernel_object.params_dict)
-
- # disable back_prop
- self.back_prop = False
-
- # refit the model
- self.fitted = False
- print(self.description())
- self.fit_gp(self.x, self.y)
- return True
-
- def load_params(self, objective_params, params, dims):
- self.fig = False
- self.back_prop = False
- x_opt = []
- counter = 0
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- x_opt.append(objective_params[dims[counter]:dims[counter + 1]])
- counter += 1
-
- counter = 0
- for key, dict_params in params.items():
- for var_name, value in dict_params.items():
- self.kernel_object.params_dict[key][var_name] = x_opt[counter]
- counter += 1
-
- print(self.description())
-
-
-
- def visualize_function(self, xtest, f_trues, filename=None, colors=None, figsize = (15, 7)):
- d = xtest.size()[1]
- if d == 1:
- if isinstance(f_trues, list):
- for f_true in f_trues:
- plt.plot(xtest, f_true(xtest))
- else:
- plt.plot(xtest, f_trues(xtest))
- elif d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=figsize)
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
-
-
- if isinstance(f_trues, list):
- for index, f_true in enumerate(f_trues):
- grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear')
- if colors is not None:
- color = colors[index]
- ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color=color)
- else:
- grid_z = griddata((xx, yy), f_trues(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4)
-
- if filename is not None:
- plt.xticks(fontsize=20, rotation=0)
- plt.yticks(fontsize=20, rotation=0)
- plt.savefig(filename, dpi=300)
-
- def visualize_function_contour(self, xtest, f_true,
- filename=None, levels=10, figsize=(15, 7),
- alpha = 1., colorbar = True, cmap = 'hot',
- mean_point = None, point_color = 'tab:red', ax = None,
- fig = None):
- d = xtest.size()[1]
- if d == 1:
- pass
- elif d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- f = f_true(xtest)
- grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- if ax is None:
- fig, ax = plt.subplots(figsize=figsize)
-
- cs = ax.contourf(grid_x, grid_y, grid_z_f, alpha = 0.5, cmap = cmap, linewidths=1, levels = [0,1])
- ax.contour(cs, colors='k', levels = [0.5], alpha = 0.5)
- if colorbar:
- cbar = fig.colorbar(cs)
- # if self.x is not None:
- # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o")
- ax.grid(c='k', ls='-', alpha=0.1)
- if mean_point is not None:
- plt.plot(mean_point[0],mean_point[1], 'o', ms = 10, color = point_color)
-
- if filename is not None:
- plt.xticks(fontsize=24, rotation=0)
- plt.yticks(fontsize=24, rotation=0)
- plt.savefig(filename, dpi=300)
- return fig, ax
- # plt.show()
-
- def visualize(self, xtest,bounds = False, f_true=None, points=True, show=True, size=2,
- norm=1, fig=True, sqrtbeta=2, constrained=None, d=None,
- matheron_kernel=None, color = None, label = "", visualize_point = None):
-
- if not bounds:
- [mu, std] = self.mean_std(xtest)
- lcb = mu - sqrtbeta *std
- ucb = mu + sqrtbeta *std
- else:
- print ("using bounds")
- lcb = self.lcb(xtest)
- ucb = self.ucb(xtest)
- mu = self.mean(xtest)
-
- if d is None:
- d = self.d
-
-
-
- if d == 1:
- if fig == True:
- plt.figure(figsize=(15, 7))
- plt.clf()
- if self.x is not None:
- plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'ro', ms=10)
-
- if visualize_point is not None:
- [x, y] = visualize_point
- plt.plot(x, y, 'go', ms = 10)
-
- if size > 0:
-
- if matheron_kernel is not None:
- z = self.sample_matheron(xtest, matheron_kernel, size=size).numpy().T
- else:
- z = self.sample(xtest, size=size).numpy().T
-
- for z_arr, label in zip(z, ['sample'] + [None for _ in range(size - 1)]):
- plt.plot(xtest.view(-1).numpy(), z_arr, 'k--', lw=2, label=label)
-
- plt.fill_between(xtest.view(-1).numpy(), lcb.view(-1).numpy(), ucb.view(-1).numpy(),
- color="#dddddd")
-
- if f_true is not None:
- plt.plot(xtest.numpy(), f_true(xtest).numpy(), 'b-', lw=2, label="truth")
-
- if color is None:
- plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean")
- else:
- plt.plot(xtest.numpy(), mu.numpy(), linestyle = '-', lw=2, label="posterior mean"+label, color = color)
-
- plt.legend()
- if show == True:
- plt.show()
-
- elif d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=(15, 7))
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4, label="mu")
-
- if f_true is not None:
- grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4, label="truth")
-
- if points == True and self.fitted == True:
- ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:, 0].detach().numpy(),
- c='r', s=100, marker="o", depthshade=False)
-
- if hasattr(self,"beta"):
- if self.beta is not None:
- beta = self.beta(norm=norm)
- grid_z2 = griddata((xx, yy), (mu.detach() + beta * std.detach())[:, 0].detach().numpy(),
- (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2)
- grid_z3 = griddata((xx, yy), (mu.detach() - beta * std.detach())[:, 0].detach().numpy(),
- (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2)
-
- ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4)
- # plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.show()
-
- else:
- print("Visualization not implemented")
-
- def visualize_subopt(self, xtest, f_true=None, points=True, show=True, size=2, norm=1, fig=True, beta=2):
- [mu, std] = self.mean_std(xtest)
-
- print("Visualizing in: ", self.d, "dimensions...")
-
- if self.d == 1:
- if fig == True:
- plt.figure(figsize=(15, 7))
- plt.clf()
- if self.x is not None:
- plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o")
- plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample")
- plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat,
- color="#dddddd")
- if f_true is not None:
- plt.plot(xtest.numpy(), f_true(xtest).numpy(), 'b-', lw=2, label="truth")
- plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean")
-
- min = torch.max(mu - beta * std)
- mask = (mu + beta * std < min)
- v = torch.min(mu - beta * std).numpy() - 1
- plt.plot(xtest.numpy()[mask], 0 * xtest.numpy()[mask] + v, 'ko', lw=6, label="Discarted Region")
-
- plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.legend()
-
- if show == True:
- plt.show()
-
- def visualize_slice(self, xtest, slice, show=True, eps=None, size=1, beta=2):
- append = torch.ones(size=(xtest.size()[0], 1), dtype=torch.float64) * slice
- xtest2 = torch.cat((xtest, append), dim=1)
-
- [mu, std] = self.mean_std(xtest2)
-
- plt.figure(figsize=(15, 7))
- plt.clf()
- plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample")
- print(std.size(), mu.size())
- if self.x is not None:
- plt.plot(self.x[:, 0].detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o")
- plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat, color="#dddddd")
- plt.fill_between(xtest.numpy().flat, (mu + 2 * std).numpy().flat, (mu + 2 * std + 2 * self.s).numpy().flat,
- color="#bbdefb")
- plt.fill_between(xtest.numpy().flat, (mu - 2 * std - 2 * self.s).numpy().flat, (mu - 2 * std).numpy().flat,
- color="#bbdefb")
-
- if eps is not None:
- mask = (beta * std < eps)
- v = torch.min(mu - beta * std - 2 * self.s).numpy()
- plt.plot(xtest.numpy()[mask], 0 * xtest.numpy()[mask] + v, 'k', lw=6,
- label="$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace")
-
- plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean")
- plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.legend()
- if show == True:
- plt.show()
-
- def visualize_contour_with_gap(self, xtest, f_true=None, gap=None, show=False):
- [mu, _] = self.mean_std(xtest)
-
- if self.d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
-
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu)
- ax.contour(cs, colors='k')
-
- ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'ro', ms=10)
- cbar = fig.colorbar(cs)
-
- ax.grid(c='k', ls='-', alpha=0.1)
-
- if f_true is not None:
- f = f_true(xtest)
- grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_f)
- ax.contour(cs, colors='k')
- cbar = fig.colorbar(cs)
- ax.grid(c='k', ls='-', alpha=0.1)
- if show == True:
- plt.show()
-
- def visualize_contour(self, xtest, f_true=None, show=True, points=True, ms=5, levels=20):
- [mu, _] = self.mean_std(xtest)
-
- if self.d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
-
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
-
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu)
- ax.contour(cs, colors='k')
-
- if points == True:
- ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'wo', ms=ms, alpha=0.5)
- cbar = fig.colorbar(cs)
- ax.grid(c='k', ls='-', alpha=0.1)
-
- if f_true is not None:
- f = f_true(xtest)
- grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels)
- ax.contour(cs, colors='k')
- cbar = fig.colorbar(cs)
- ax.grid(c='k', ls='-', alpha=0.1)
- if show == True:
- plt.show()
- return ax
-
- def visualize_quiver(self, xtest, size=2, norm=1):
- [mu, std] = self.mean_std(xtest)
- if self.d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=(15, 7))
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- #
-
- ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:, 0].detach().numpy(),
- c='r', s=100, marker="o", depthshade=False)
-
- if self.beta is not None:
- beta = self.beta(norm=norm)
- grid_z2 = griddata((xx, yy), (mu.detach() + beta * std.detach())[:, 0].detach().numpy(),
- (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2)
- grid_z3 = griddata((xx, yy), (mu.detach() - beta * std.detach())[:, 0].detach().numpy(),
- (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2)
-
- ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4)
- plt.title('Posterior mean prediction plus 2 st.deviation')
-
- derivatives = torch.zeros(xtest.size()[0], 2)
- for index, point in enumerate(xtest):
- derivatives[index, :] = self.mean_gradient_hessian(point.view(-1, 2))
- print(derivatives[index, :])
-
- print(derivatives.size())
-
- grid_der_x_mu = griddata((xx, yy), derivatives[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- grid_der_y_mu = griddata((xx, yy), derivatives[:, 1].detach().numpy(), (grid_x, grid_y), method='linear')
-
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu)
-
- ax.contour(cs, colors='k')
-
- # Plot grid.
- ax.grid(c='k', ls='-', alpha=0.1)
- ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu)
-
- plt.show()
-
- else:
- print("Visualization not implemented")
+ def fit(self):
+ pass
+
+ @abstractmethod
+ def ucb(self, x):
+ pass
+
+ @abstractmethod
+ def lcb(self, x):
+ pass
+
+ def load_data(self, d):
+ self.x = d[0]
+ self.y = d[1]
+
+ def log_marginal(self, kernel, X, weight):
+ func = kernel.get_kernel()
+ K = (
+ func(self.x, self.x, **X)
+ + torch.eye(self.n, dtype=torch.float64) * self.s * self.s
+ )
+ L = torch.linalg.cholesky(K)
+ logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) * weight
+ alpha = torch.cholesky_solve(self.y, L)
+ logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet
+ logprob = -logprob
+ return logprob
+
+ def optimize_params_general(
+ self,
+ params={},
+ restarts=2,
+ optimizer="pymanopt",
+ maxiter=1000,
+ mingradnorm=1e-4,
+ regularizer_func=None,
+ verbose=False,
+ scale=1.0,
+ weight=1.0,
+ save=False,
+ save_name="model.np",
+ parallel=False,
+ cores=None,
+ ):
+ """
+
+ :param params:
+ :param restarts:
+ :param optimizer:
+ :param maxiter:
+ :param mingradnorm:
+ :param regularizer_func:
+ :param verbose:
+ :return:
+ """
+ manifolds = []
+ bounds = []
+ init_values = []
+
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ init_value, manifold, bound = value
+ manifolds.append(manifold)
+ bounds.append(bound)
+ init_values.append(init_value)
+
+ if optimizer == "pymanopt":
+
+ manifold = Product(tuple(manifolds))
+
+ @pymanopt.function.pytorch(manifold)
+ def cost(*args):
+ # print (args)
+ input_dict = {}
+ i = 0
+ for key, dict_params in params.items():
+ small_param = {}
+ for var_name, value in dict_params.items():
+ small_param[var_name] = args[i]
+ i = i + 1
+ input_dict[key] = small_param
+
+ if regularizer_func is not None:
+ f = self.log_marginal(
+ self.kernel_object, input_dict, weight
+ ) + regularizer_func(args)
+ else:
+ f = self.log_marginal(self.kernel_object, input_dict, weight)
+ return f
+
+ problem = pymanopt.Problem(manifold, cost=cost)
+ solver = SteepestDescent(
+ verbosity=verbose, max_iterations=maxiter, min_gradient_norm=mingradnorm
+ )
+
+ # get initial point
+ objective_values = []
+ objective_params = []
+
+ for rep in range(restarts):
+ x_init = []
+ for index, man in enumerate(manifolds):
+ if init_values[index] is None:
+ x_sub = man.random_point() * scale
+ else:
+ x_sub = np.array([init_values[index]])
+ x_init.append(x_sub)
+ # try:
+ res = solver.run(problem, initial_point=x_init)
+
+ objective_params.append(res.point)
+ objective_values.append(res.cost) # log['final_values']['f(x)'])
+ # except Exception as e:
+ # print (e)
+ # print ("Optimization restart failed:", x_init)
+ # pick the smallest objective
+ best_index = np.argmin(objective_values)
+ x_opt = [torch.from_numpy(j) for j in objective_params[best_index]]
+
+ elif optimizer == "scipy":
+ cost_numpy = lambda x: cost(x).detach.numpy()
+ egrad_numpy = lambda x: egrad(x).detach().numpy()
+
+ elif optimizer == "bisection":
+
+ def cost(x):
+ input_dict = self.kernel_object.params_dict
+ counter = 0
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ input_dict[key][var_name] = x
+ counter += 1
+
+ if regularizer_func is not None:
+ f = self.log_marginal(
+ self.kernel_object, input_dict, weight
+ ) + regularizer_func(x)
+ else:
+ f = self.log_marginal(self.kernel_object, input_dict, weight)
+ return f
+
+ a, b = bounds[0]
+ x_opt = [bisection(cost, a, b, 100)]
+
+ elif optimizer == "pytorch-minimize":
+ var_names = []
+ dims = [
+ 0,
+ ]
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ init_value, manifold, bound = value
+
+ manifolds.append(manifold)
+ bounds.append(bound)
+ init_values.append(init_value)
+ var_names.append(var_name)
+ dims.append(manifold.dim)
+
+ dims = np.cumsum(dims).astype(int)
+
+ def cost(x):
+ input_dict = self.kernel_object.params_dict
+ counter = 0
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ if key != "likelihood":
+ input_dict[key][var_name] = x[
+ dims[counter] : dims[counter + 1]
+ ]
+ else:
+ self.s = x[dims[counter] : dims[counter + 1]]
+ counter += 1
+
+ if regularizer_func is not None:
+ f = self.log_marginal(
+ self.kernel_object, input_dict, weight
+ ) + regularizer_func(x)
+ else:
+ f = self.log_marginal(self.kernel_object, input_dict, weight)
+ return f
+
+ objective_values = []
+ objective_params = []
+ x_opt = []
+
+ dim = dims[-1]
+ self.prepared_log_marginal = False
+ for rep in range(restarts):
+ # try:
+ if init_values[0] is None:
+ x_init = torch.randn(size=(dim, 1)).double().view(-1) ** 2 * scale
+ else:
+ x_init = init_values[0](dim)
+
+ if bounds[0] is None:
+ res = minimize_torch(
+ cost,
+ x_init,
+ method="l-bfgs",
+ tol=1e-10,
+ disp=verbose + 1,
+ options={"max_iter": maxiter, "gtol": mingradnorm},
+ )
+ objective_params.append(res.x)
+ objective_values.append(res.fun)
+ else:
+ print("Constrained optimization with bounds", bounds[0])
+ res = minimize(
+ cost,
+ x_init.numpy(),
+ backend="torch",
+ method="L-BFGS-B",
+ bounds=bounds[0],
+ precision="float64",
+ tol=1e-8,
+ options={
+ "ftol": 1e-10,
+ "gtol": mingradnorm,
+ "eps": 1e-08,
+ "maxfun": 15000,
+ "maxiter": maxiter,
+ "maxls": 20,
+ "disp": verbose + 1,
+ },
+ )
+
+ objective_params.append(torch.from_numpy(res.x))
+ objective_values.append(torch.from_numpy(res.fun))
+ # except Exception as e:
+ # print(e)
+ # save models
+
+ if save:
+ vals = {
+ "params": objective_params,
+ "evidence": objective_values,
+ "repeats": restarts,
+ "dim": dims,
+ "param_names": params,
+ }
+
+ with open(save_name, "wb") as f:
+ pickle.dump(vals, f)
+
+ best_index = np.argmin(objective_values)
+
+ counter = 0
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ x_opt.append(
+ objective_params[best_index][dims[counter] : dims[counter + 1]]
+ )
+ counter += 1
+
+ elif optimizer == "discrete":
+ values = []
+ configurations = manifolds[0]
+ for config in manifolds[0]:
+ values.append(cost(config))
+
+ best_index = np.argmin(values)
+ x_opt = [configurations[best_index]]
+ else:
+ raise AssertionError("Optimizer not implemented.")
+
+ # put back into default dic
+ i = 0
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ if key == "likelihood":
+ self.s = x_opt[i]
+
+ else:
+ self.kernel_object.params_dict[key][var_name] = x_opt[i]
+ i = i + 1
+
+ # print ("--------- Finished. ------------")
+ # print (self.kernel_object.params_dict)
+
+ # disable back_prop
+ self.back_prop = False
+
+ # refit the model
+ self.fitted = False
+ print(self.description())
+ self.fit_gp(self.x, self.y)
+ return True
+
+ def load_params(self, objective_params, params, dims):
+ self.fig = False
+ self.back_prop = False
+ x_opt = []
+ counter = 0
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ x_opt.append(objective_params[dims[counter] : dims[counter + 1]])
+ counter += 1
+
+ counter = 0
+ for key, dict_params in params.items():
+ for var_name, value in dict_params.items():
+ self.kernel_object.params_dict[key][var_name] = x_opt[counter]
+ counter += 1
+
+ print(self.description())
+
+ def visualize_function(
+ self, xtest, f_trues, filename=None, colors=None, figsize=(15, 7)
+ ):
+ d = xtest.size()[1]
+ if d == 1:
+ if isinstance(f_trues, list):
+ for f_true in f_trues:
+ plt.plot(xtest, f_true(xtest))
+ else:
+ plt.plot(xtest, f_trues(xtest))
+ elif d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=figsize)
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].numpy()
+ yy = xtest[:, 1].numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+
+ if isinstance(f_trues, list):
+ for index, f_true in enumerate(f_trues):
+ grid_z = griddata(
+ (xx, yy),
+ f_true(xtest)[:, 0].numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ if colors is not None:
+ color = colors[index]
+ ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color=color)
+ else:
+ grid_z = griddata(
+ (xx, yy),
+ f_trues(xtest)[:, 0].numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4)
+
+ if filename is not None:
+ plt.xticks(fontsize=20, rotation=0)
+ plt.yticks(fontsize=20, rotation=0)
+ plt.savefig(filename, dpi=300)
+
+ def visualize_function_contour(
+ self,
+ xtest,
+ f_true,
+ filename=None,
+ levels=10,
+ figsize=(15, 7),
+ alpha=1.0,
+ colorbar=True,
+ cmap="hot",
+ mean_point=None,
+ point_color="tab:red",
+ ax=None,
+ fig=None,
+ ):
+ d = xtest.size()[1]
+ if d == 1:
+ pass
+ elif d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].numpy()
+ yy = xtest[:, 1].numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ f = f_true(xtest)
+ grid_z_f = griddata(
+ (xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ if ax is None:
+ fig, ax = plt.subplots(figsize=figsize)
+
+ cs = ax.contourf(
+ grid_x,
+ grid_y,
+ grid_z_f,
+ alpha=0.5,
+ cmap=cmap,
+ linewidths=1,
+ levels=[0, 1],
+ )
+ ax.contour(cs, colors="k", levels=[0.5], alpha=0.5)
+ if colorbar:
+ cbar = fig.colorbar(cs)
+ # if self.x is not None:
+ # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o")
+ ax.grid(c="k", ls="-", alpha=0.1)
+ if mean_point is not None:
+ plt.plot(mean_point[0], mean_point[1], "o", ms=10, color=point_color)
+
+ if filename is not None:
+ plt.xticks(fontsize=24, rotation=0)
+ plt.yticks(fontsize=24, rotation=0)
+ plt.savefig(filename, dpi=300)
+ return fig, ax
+
+ # plt.show()
+
+ def visualize(
+ self,
+ xtest,
+ bounds=False,
+ f_true=None,
+ points=True,
+ show=True,
+ size=2,
+ norm=1,
+ fig=True,
+ sqrtbeta=2,
+ constrained=None,
+ d=None,
+ matheron_kernel=None,
+ color=None,
+ label="",
+ visualize_point=None,
+ ):
+
+ if not bounds:
+ [mu, std] = self.mean_std(xtest)
+ lcb = mu - sqrtbeta * std
+ ucb = mu + sqrtbeta * std
+ else:
+ print("using bounds")
+ lcb = self.lcb(xtest)
+ ucb = self.ucb(xtest)
+ mu = self.mean(xtest)
+
+ if d is None:
+ d = self.d
+
+ if d == 1:
+ if fig == True:
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ if self.x is not None:
+ plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), "ro", ms=10)
+
+ if visualize_point is not None:
+ [x, y] = visualize_point
+ plt.plot(x, y, "go", ms=10)
+
+ if size > 0:
+
+ if matheron_kernel is not None:
+ z = (
+ self.sample_matheron(xtest, matheron_kernel, size=size)
+ .numpy()
+ .T
+ )
+ else:
+ z = self.sample(xtest, size=size).numpy().T
+
+ for z_arr, label in zip(
+ z, ["sample"] + [None for _ in range(size - 1)]
+ ):
+ plt.plot(xtest.view(-1).numpy(), z_arr, "k--", lw=2, label=label)
+
+ plt.fill_between(
+ xtest.view(-1).numpy(),
+ lcb.view(-1).numpy(),
+ ucb.view(-1).numpy(),
+ color="#dddddd",
+ )
+
+ if f_true is not None:
+ plt.plot(
+ xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth"
+ )
+
+ if color is None:
+ plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean")
+ else:
+ plt.plot(
+ xtest.numpy(),
+ mu.numpy(),
+ linestyle="-",
+ lw=2,
+ label="posterior mean" + label,
+ color=color,
+ )
+
+ plt.legend()
+ if show == True:
+ plt.show()
+
+ elif d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].numpy()
+ yy = xtest[:, 1].numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4, label="mu")
+
+ if f_true is not None:
+ grid_z = griddata(
+ (xx, yy),
+ f_true(xtest)[:, 0].numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(
+ grid_x, grid_y, grid_z, color="b", alpha=0.4, label="truth"
+ )
+
+ if points == True and self.fitted == True:
+ ax.scatter(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ self.y[:, 0].detach().numpy(),
+ c="r",
+ s=100,
+ marker="o",
+ depthshade=False,
+ )
+
+ if hasattr(self, "beta"):
+ if self.beta is not None:
+ beta = self.beta(norm=norm)
+ grid_z2 = griddata(
+ (xx, yy),
+ (mu.detach() + beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2)
+ grid_z3 = griddata(
+ (xx, yy),
+ (mu.detach() - beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2)
+
+ ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4)
+ # plt.title('Posterior mean prediction plus 2 st.deviation')
+ plt.show()
+
+ else:
+ print("Visualization not implemented")
+
+ def visualize_subopt(
+ self,
+ xtest,
+ f_true=None,
+ points=True,
+ show=True,
+ size=2,
+ norm=1,
+ fig=True,
+ beta=2,
+ ):
+ [mu, std] = self.mean_std(xtest)
+
+ print("Visualizing in: ", self.d, "dimensions...")
+
+ if self.d == 1:
+ if fig == True:
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ if self.x is not None:
+ plt.plot(
+ self.x.detach().numpy(),
+ self.y.detach().numpy(),
+ "r+",
+ ms=10,
+ marker="o",
+ )
+ plt.plot(
+ xtest.numpy(),
+ self.sample(xtest, size=size).numpy(),
+ "k--",
+ lw=2,
+ label="sample",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu - 2 * std).numpy().flat,
+ (mu + 2 * std).numpy().flat,
+ color="#dddddd",
+ )
+ if f_true is not None:
+ plt.plot(
+ xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth"
+ )
+ plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean")
+
+ min = torch.max(mu - beta * std)
+ mask = mu + beta * std < min
+ v = torch.min(mu - beta * std).numpy() - 1
+ plt.plot(
+ xtest.numpy()[mask],
+ 0 * xtest.numpy()[mask] + v,
+ "ko",
+ lw=6,
+ label="Discarted Region",
+ )
+
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+ plt.legend()
+
+ if show == True:
+ plt.show()
+
+ def visualize_slice(self, xtest, slice, show=True, eps=None, size=1, beta=2):
+ append = torch.ones(size=(xtest.size()[0], 1), dtype=torch.float64) * slice
+ xtest2 = torch.cat((xtest, append), dim=1)
+
+ [mu, std] = self.mean_std(xtest2)
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ plt.plot(
+ xtest.numpy(),
+ self.sample(xtest, size=size).numpy(),
+ "k--",
+ lw=2,
+ label="sample",
+ )
+ print(std.size(), mu.size())
+ if self.x is not None:
+ plt.plot(
+ self.x[:, 0].detach().numpy(),
+ self.y.detach().numpy(),
+ "r+",
+ ms=10,
+ marker="o",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu - 2 * std).numpy().flat,
+ (mu + 2 * std).numpy().flat,
+ color="#dddddd",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu + 2 * std).numpy().flat,
+ (mu + 2 * std + 2 * self.s).numpy().flat,
+ color="#bbdefb",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu - 2 * std - 2 * self.s).numpy().flat,
+ (mu - 2 * std).numpy().flat,
+ color="#bbdefb",
+ )
+
+ if eps is not None:
+ mask = beta * std < eps
+ v = torch.min(mu - beta * std - 2 * self.s).numpy()
+ plt.plot(
+ xtest.numpy()[mask],
+ 0 * xtest.numpy()[mask] + v,
+ "k",
+ lw=6,
+ label="$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace",
+ )
+
+ plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean")
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+ plt.legend()
+ if show == True:
+ plt.show()
+
+ def visualize_contour_with_gap(self, xtest, f_true=None, gap=None, show=False):
+ [mu, _] = self.mean_std(xtest)
+
+ if self.d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu)
+ ax.contour(cs, colors="k")
+
+ ax.plot(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ "ro",
+ ms=10,
+ )
+ cbar = fig.colorbar(cs)
+
+ ax.grid(c="k", ls="-", alpha=0.1)
+
+ if f_true is not None:
+ f = f_true(xtest)
+ grid_z_f = griddata(
+ (xx, yy),
+ f[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_f)
+ ax.contour(cs, colors="k")
+ cbar = fig.colorbar(cs)
+ ax.grid(c="k", ls="-", alpha=0.1)
+ if show == True:
+ plt.show()
+
+ def visualize_contour(
+ self, xtest, f_true=None, show=True, points=True, ms=5, levels=20
+ ):
+ [mu, _] = self.mean_std(xtest)
+
+ if self.d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu)
+ ax.contour(cs, colors="k")
+
+ if points == True:
+ ax.plot(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ "wo",
+ ms=ms,
+ alpha=0.5,
+ )
+ cbar = fig.colorbar(cs)
+ ax.grid(c="k", ls="-", alpha=0.1)
+
+ if f_true is not None:
+ f = f_true(xtest)
+ grid_z_f = griddata(
+ (xx, yy),
+ f[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels)
+ ax.contour(cs, colors="k")
+ cbar = fig.colorbar(cs)
+ ax.grid(c="k", ls="-", alpha=0.1)
+ if show == True:
+ plt.show()
+ return ax
+
+ def visualize_quiver(self, xtest, size=2, norm=1):
+ [mu, std] = self.mean_std(xtest)
+ if self.d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ #
+
+ ax.scatter(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ self.y[:, 0].detach().numpy(),
+ c="r",
+ s=100,
+ marker="o",
+ depthshade=False,
+ )
+
+ if self.beta is not None:
+ beta = self.beta(norm=norm)
+ grid_z2 = griddata(
+ (xx, yy),
+ (mu.detach() + beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2)
+ grid_z3 = griddata(
+ (xx, yy),
+ (mu.detach() - beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2)
+
+ ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4)
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+
+ derivatives = torch.zeros(xtest.size()[0], 2)
+ for index, point in enumerate(xtest):
+ derivatives[index, :] = self.mean_gradient_hessian(point.view(-1, 2))
+ print(derivatives[index, :])
+
+ print(derivatives.size())
+
+ grid_der_x_mu = griddata(
+ (xx, yy),
+ derivatives[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ grid_der_y_mu = griddata(
+ (xx, yy),
+ derivatives[:, 1].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu)
+
+ ax.contour(cs, colors="k")
+
+ # Plot grid.
+ ax.grid(c="k", ls="-", alpha=0.1)
+ ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu)
+
+ plt.show()
+
+ else:
+ print("Visualization not implemented")
if __name__ == "__main__":
- from stpy.continuous_processes.kernelized_features import KernelizedFeatures
- from stpy.kernels import KernelFunction
- from stpy.embeddings.embedding import HermiteEmbedding
- import stpy
- import torch
- import matplotlib.pyplot as plt
- import numpy as np
-
- n = 1024
- N = 256
- gamma = 0.09
- s = 0.1
- # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n)
- benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s)
-
- x = benchmark.initial_guess(N, adv_inv=True)
- y = benchmark.eval(x)
- xtest = benchmark.interval(1024)
-
- # GP = GaussianProcess(gamma=gamma, s=s)
- # GP.fit_gp(x, y)
- # GP.visualize(xtest, show=False, size=5)
- # plt.show()
-
- m = 64
- kernel = KernelFunction(gamma=gamma)
- embedding = HermiteEmbedding(gamma=gamma, m=m)
- RFF = KernelizedFeatures(embedding=embedding, s=s, m=m)
- RFF.fit_gp(x, y)
- RFF.visualize(xtest, fig=False, show=False, size=5, matheron_kernel=kernel)
- plt.show()
+ from stpy.continuous_processes.kernelized_features import KernelizedFeatures
+ from stpy.kernels import KernelFunction
+ from stpy.embeddings.embedding import HermiteEmbedding
+ import stpy
+ import torch
+ import matplotlib.pyplot as plt
+ import numpy as np
+
+ n = 1024
+ N = 256
+ gamma = 0.09
+ s = 0.1
+ # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n)
+ benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s)
+
+ x = benchmark.initial_guess(N, adv_inv=True)
+ y = benchmark.eval(x)
+ xtest = benchmark.interval(1024)
+
+ # GP = GaussianProcess(gamma=gamma, s=s)
+ # GP.fit_gp(x, y)
+ # GP.visualize(xtest, show=False, size=5)
+ # plt.show()
+
+ m = 64
+ kernel = KernelFunction(gamma=gamma)
+ embedding = HermiteEmbedding(gamma=gamma, m=m)
+ RFF = KernelizedFeatures(embedding=embedding, s=s, m=m)
+ RFF.fit_gp(x, y)
+ RFF.visualize(xtest, fig=False, show=False, size=5, matheron_kernel=kernel)
+ plt.show()
diff --git a/stpy/feature_importance/feature_ranker.py b/stpy/feature_importance/feature_ranker.py
index 17ccbe9..42c131a 100644
--- a/stpy/feature_importance/feature_ranker.py
+++ b/stpy/feature_importance/feature_ranker.py
@@ -4,46 +4,44 @@
from stpy.estimator import Estimator
import copy
-class FeatureRanker():
-
- def __init__(self,
- model: Estimator,
- mode: str = 'explained variance'
- ):
- self.model = model
- self.mode = mode
-
- if not hasattr(self.model, "kernel_object"):
- print ("Invalid estimator structure to run feature importance analysis")
-
- def importance(self):
-
- if self.mode == 'explained variance':
- return self.one_off_importance()
- elif self.mode == 'cross_validation':
- raise NotImplementedError("This is not implemented.")
-
- def one_off_importance(self):
- n,d = self.model.x.size()
- x = self.model.x
- y = self.model.y
- # iterate over features and
- importance = torch.zeros(size=(d,1)).double().view(-1)
- res_total = torch.sum(self.model.residuals(x, y) ** 2)
-
- for i in range(d):
- # define new data
- xnew = x.clone()
- xnew[:,i] = 0.
-
- # define new model
- GP = copy.deepcopy(self.model)
- GP.fit_gp(xnew,y)
-
- # evaluate residuals
- res = torch.sum(GP.residuals(xnew,y)**2)
-
- # store
- importance[i] = res_total/res
- print(i + 1, "/", d,':', res_total/res)
- return importance
\ No newline at end of file
+
+class FeatureRanker:
+
+ def __init__(self, model: Estimator, mode: str = "explained variance"):
+ self.model = model
+ self.mode = mode
+
+ if not hasattr(self.model, "kernel_object"):
+ print("Invalid estimator structure to run feature importance analysis")
+
+ def importance(self):
+
+ if self.mode == "explained variance":
+ return self.one_off_importance()
+ elif self.mode == "cross_validation":
+ raise NotImplementedError("This is not implemented.")
+
+ def one_off_importance(self):
+ n, d = self.model.x.size()
+ x = self.model.x
+ y = self.model.y
+ # iterate over features and
+ importance = torch.zeros(size=(d, 1)).double().view(-1)
+ res_total = torch.sum(self.model.residuals(x, y) ** 2)
+
+ for i in range(d):
+ # define new data
+ xnew = x.clone()
+ xnew[:, i] = 0.0
+
+ # define new model
+ GP = copy.deepcopy(self.model)
+ GP.fit_gp(xnew, y)
+
+ # evaluate residuals
+ res = torch.sum(GP.residuals(xnew, y) ** 2)
+
+ # store
+ importance[i] = res_total / res
+ print(i + 1, "/", d, ":", res_total / res)
+ return importance
diff --git a/stpy/generative_models/conditional_generative_model.py b/stpy/generative_models/conditional_generative_model.py
index 63b7e75..3d3f3a5 100644
--- a/stpy/generative_models/conditional_generative_model.py
+++ b/stpy/generative_models/conditional_generative_model.py
@@ -1,5 +1,7 @@
-class GenerativeModel():
+class GenerativeModel:
pass
+
+
class ConditionalGenerativeModel(GenerativeModel):
x = np.random.randn(10)
@@ -9,6 +11,5 @@ class ConditionalGenerativeModel(GenerativeModel):
# find the largest element
np.max(x)
-
- pass
\ No newline at end of file
+ pass
diff --git a/stpy/generative_models/cvae.py b/stpy/generative_models/cvae.py
index d9f38a5..56918cf 100644
--- a/stpy/generative_models/cvae.py
+++ b/stpy/generative_models/cvae.py
@@ -7,7 +7,7 @@
# cuda setup
device = torch.device("cpu")
-kwargs = {'num_workers': 1, 'pin_memory': True}
+kwargs = {"num_workers": 1, "pin_memory": True}
# hyper params
batch_size = 64
@@ -16,7 +16,6 @@
epochs = 10
-
def one_hot(labels, class_size):
targets = torch.zeros(labels.size(0), class_size)
for i, label in enumerate(labels):
@@ -25,13 +24,13 @@ def one_hot(labels, class_size):
class CVAE(nn.Module):
- def __init__(self, feature_size, latent_size, ouput_size, midsize = 400):
+ def __init__(self, feature_size, latent_size, ouput_size, midsize=400):
super(CVAE, self).__init__()
self.feature_size = feature_size
self.class_size = ouput_size
# encode
- self.fc1 = nn.Linear(feature_size + ouput_size, midsize)
+ self.fc1 = nn.Linear(feature_size + ouput_size, midsize)
self.fc21 = nn.Linear(midsize, latent_size)
self.fc22 = nn.Linear(midsize, latent_size)
@@ -42,28 +41,28 @@ def __init__(self, feature_size, latent_size, ouput_size, midsize = 400):
self.elu = nn.ELU()
self.sigmoid = nn.Sigmoid()
- def encode(self, x, y): # Q(z|x, c)
- '''
+ def encode(self, x, y): # Q(z|x, c)
+ """
x: (bs, feature_size)
y: (bs, class_size)
- '''
- inputs = torch.cat([x, y], 1) # (bs, feature_size+class_size)
+ """
+ inputs = torch.cat([x, y], 1) # (bs, feature_size+class_size)
h1 = self.elu(self.fc1(inputs))
z_mu = self.fc21(h1)
z_var = self.fc22(h1)
return z_mu, z_var
def reparameterize(self, mu, logvar):
- std = torch.exp(0.5*logvar)
+ std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
- return mu + eps*std
+ return mu + eps * std
- def decode(self, z, y): # P(x|z, c)
- '''
+ def decode(self, z, y): # P(x|z, c)
+ """
z: (bs, latent_size)
c: (bs, class_size)
- '''
- inputs = torch.cat([z, y], 1) # (bs, latent_size+class_size)
+ """
+ inputs = torch.cat([z, y], 1) # (bs, latent_size+class_size)
h3 = self.elu(self.fc3(inputs))
return self.sigmoid(self.fc4(h3))
@@ -72,13 +71,15 @@ def forward(self, x, y):
z = self.reparameterize(mu, logvar)
return self.decode(z, y), mu, logvar
+
# create a CVAE model
model = CVAE(1, 20, 1).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
+
# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
- BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
+ BCE = F.binary_cross_entropy(recon_x, x, reduction="sum")
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + KLD
@@ -96,13 +97,21 @@ def train(epoch):
train_loss += loss.detach().cpu().numpy()
optimizer.step()
if batch_idx % 20 == 0:
- print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
- epoch, batch_idx * len(data), len(train_loader.dataset),
- 100. * batch_idx / len(train_loader),
- loss.item() / len(data)))
-
- print('====> Epoch: {} Average loss: {:.4f}'.format(
- epoch, train_loss / len(train_loader.dataset)))
+ print(
+ "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
+ epoch,
+ batch_idx * len(data),
+ len(train_loader.dataset),
+ 100.0 * batch_idx / len(train_loader),
+ loss.item() / len(data),
+ )
+ )
+
+ print(
+ "====> Epoch: {} Average loss: {:.4f}".format(
+ epoch, train_loss / len(train_loader.dataset)
+ )
+ )
def test(epoch):
@@ -113,26 +122,27 @@ def test(epoch):
data, labels = data.to(device), labels.to(device)
labels = one_hot(labels, 10)
recon_batch, mu, logvar = model(data, labels)
- test_loss += loss_function(recon_batch, data, mu, logvar).detach().cpu().numpy()
+ test_loss += (
+ loss_function(recon_batch, data, mu, logvar).detach().cpu().numpy()
+ )
if i == 0:
n = min(data.size(0), 5)
- comparison = torch.cat([data[:n],
- recon_batch.view(-1, 1, 28, 28)[:n]])
- save_image(comparison.cpu(),
- 'reconstruction_' + str(epoch) + '.png', nrow=n)
+ comparison = torch.cat([data[:n], recon_batch.view(-1, 1, 28, 28)[:n]])
+ save_image(
+ comparison.cpu(), "reconstruction_" + str(epoch) + ".png", nrow=n
+ )
test_loss /= len(test_loader.dataset)
- print('====> Test set loss: {:.4f}'.format(test_loss))
+ print("====> Test set loss: {:.4f}".format(test_loss))
for epoch in range(1, epochs + 1):
- train(epoch)
+ train(epoch)
- test(epoch)
+ test(epoch)
- with torch.no_grad():
- c = torch.eye(10, 10).cuda()
- sample = torch.randn(10, 20).to(device)
- sample = model.decode(sample, c).cpu()
- save_image(sample.view(10, 1, 28, 28),
- 'sample_' + str(epoch) + '.png')
\ No newline at end of file
+ with torch.no_grad():
+ c = torch.eye(10, 10).cuda()
+ sample = torch.randn(10, 20).to(device)
+ sample = model.decode(sample, c).cpu()
+ save_image(sample.view(10, 1, 28, 28), "sample_" + str(epoch) + ".png")
diff --git a/stpy/generative_models/generative_sampler.py b/stpy/generative_models/generative_sampler.py
index 3ed89b1..919c5f3 100644
--- a/stpy/generative_models/generative_sampler.py
+++ b/stpy/generative_models/generative_sampler.py
@@ -1,6 +1,7 @@
import torch
-class GenerativeSampler():
+
+class GenerativeSampler:
def __init__(self):
- pass
\ No newline at end of file
+ pass
diff --git a/stpy/helpers/ColorDB.py b/stpy/helpers/ColorDB.py
index d964fce..fd16997 100644
--- a/stpy/helpers/ColorDB.py
+++ b/stpy/helpers/ColorDB.py
@@ -26,140 +26,139 @@
class BadColor(Exception):
- pass
+ pass
DEFAULT_DB = None
-SPACE = ' '
-COMMASPACE = ', '
+SPACE = " "
+COMMASPACE = ", "
# generic class
class ColorDB:
- def __init__(self, fp):
- lineno = 2
- self.__name = fp.name
- # Maintain several dictionaries for indexing into the color database.
- # Note that while Tk supports RGB intensities of 4, 8, 12, or 16 bits,
- # for now we only support 8 bit intensities. At least on OpenWindows,
- # all intensities in the /usr/openwin/lib/rgb.txt file are 8-bit
- #
- # key is (red, green, blue) tuple, value is (name, [aliases])
- self.__byrgb = {}
- # key is name, value is (red, green, blue)
- self.__byname = {}
- # all unique names (non-aliases). built-on demand
- self.__allnames = None
- for line in fp:
- # get this compiled regular expression from derived class
- mo = self._re.match(line)
- if not mo:
- print('Error in', fp.name, ' line', lineno, file=sys.stderr)
- lineno += 1
- continue
- # extract the red, green, blue, and name
- red, green, blue = self._extractrgb(mo)
- name = self._extractname(mo)
- keyname = name.lower()
- # BAW: for now the `name' is just the first named color with the
- # rgb values we find. Later, we might want to make the two word
- # version the `name', or the CapitalizedVersion, etc.
- key = (red, green, blue)
- foundname, aliases = self.__byrgb.get(key, (name, []))
- if foundname != name and foundname not in aliases:
- aliases.append(name)
- self.__byrgb[key] = (foundname, aliases)
- # add to byname lookup
- self.__byname[keyname] = key
- lineno = lineno + 1
-
- # override in derived classes
- def _extractrgb(self, mo):
- return [int(x) for x in mo.group('red', 'green', 'blue')]
-
- def _extractname(self, mo):
- return mo.group('name')
-
- def filename(self):
- return self.__name
-
- def find_byrgb(self, rgbtuple):
- """Return name for rgbtuple"""
- try:
- return self.__byrgb[rgbtuple]
- except KeyError:
- raise BadColor(rgbtuple) from None
-
- def find_byname(self, name):
- """Return (red, green, blue) for name"""
- name = name.lower()
- try:
- return self.__byname[name]
- except KeyError:
- raise BadColor(name) from None
-
- def nearest(self, red, green, blue):
- """Return the name of color nearest (red, green, blue)"""
- # BAW: should we use Voronoi diagrams, Delaunay triangulation, or
- # octree for speeding up the locating of nearest point? Exhaustive
- # search is inefficient, but seems fast enough.
- nearest = -1
- nearest_name = ''
- for name, aliases in self.__byrgb.values():
- r, g, b = self.__byname[name.lower()]
- rdelta = red - r
- gdelta = green - g
- bdelta = blue - b
- distance = rdelta * rdelta + gdelta * gdelta + bdelta * bdelta
- if nearest == -1 or distance < nearest:
- nearest = distance
- nearest_name = name
- return nearest_name
-
- def unique_names(self):
- # sorted
- if not self.__allnames:
- self.__allnames = []
- for name, aliases in self.__byrgb.values():
- self.__allnames.append(name)
- self.__allnames.sort(key=str.lower)
- return self.__allnames
-
- def aliases_of(self, red, green, blue):
- try:
- name, aliases = self.__byrgb[(red, green, blue)]
- except KeyError:
- raise BadColor((red, green, blue)) from None
- return [name] + aliases
+ def __init__(self, fp):
+ lineno = 2
+ self.__name = fp.name
+ # Maintain several dictionaries for indexing into the color database.
+ # Note that while Tk supports RGB intensities of 4, 8, 12, or 16 bits,
+ # for now we only support 8 bit intensities. At least on OpenWindows,
+ # all intensities in the /usr/openwin/lib/rgb.txt file are 8-bit
+ #
+ # key is (red, green, blue) tuple, value is (name, [aliases])
+ self.__byrgb = {}
+ # key is name, value is (red, green, blue)
+ self.__byname = {}
+ # all unique names (non-aliases). built-on demand
+ self.__allnames = None
+ for line in fp:
+ # get this compiled regular expression from derived class
+ mo = self._re.match(line)
+ if not mo:
+ print("Error in", fp.name, " line", lineno, file=sys.stderr)
+ lineno += 1
+ continue
+ # extract the red, green, blue, and name
+ red, green, blue = self._extractrgb(mo)
+ name = self._extractname(mo)
+ keyname = name.lower()
+ # BAW: for now the `name' is just the first named color with the
+ # rgb values we find. Later, we might want to make the two word
+ # version the `name', or the CapitalizedVersion, etc.
+ key = (red, green, blue)
+ foundname, aliases = self.__byrgb.get(key, (name, []))
+ if foundname != name and foundname not in aliases:
+ aliases.append(name)
+ self.__byrgb[key] = (foundname, aliases)
+ # add to byname lookup
+ self.__byname[keyname] = key
+ lineno = lineno + 1
+
+ # override in derived classes
+ def _extractrgb(self, mo):
+ return [int(x) for x in mo.group("red", "green", "blue")]
+
+ def _extractname(self, mo):
+ return mo.group("name")
+
+ def filename(self):
+ return self.__name
+
+ def find_byrgb(self, rgbtuple):
+ """Return name for rgbtuple"""
+ try:
+ return self.__byrgb[rgbtuple]
+ except KeyError:
+ raise BadColor(rgbtuple) from None
+
+ def find_byname(self, name):
+ """Return (red, green, blue) for name"""
+ name = name.lower()
+ try:
+ return self.__byname[name]
+ except KeyError:
+ raise BadColor(name) from None
+
+ def nearest(self, red, green, blue):
+ """Return the name of color nearest (red, green, blue)"""
+ # BAW: should we use Voronoi diagrams, Delaunay triangulation, or
+ # octree for speeding up the locating of nearest point? Exhaustive
+ # search is inefficient, but seems fast enough.
+ nearest = -1
+ nearest_name = ""
+ for name, aliases in self.__byrgb.values():
+ r, g, b = self.__byname[name.lower()]
+ rdelta = red - r
+ gdelta = green - g
+ bdelta = blue - b
+ distance = rdelta * rdelta + gdelta * gdelta + bdelta * bdelta
+ if nearest == -1 or distance < nearest:
+ nearest = distance
+ nearest_name = name
+ return nearest_name
+
+ def unique_names(self):
+ # sorted
+ if not self.__allnames:
+ self.__allnames = []
+ for name, aliases in self.__byrgb.values():
+ self.__allnames.append(name)
+ self.__allnames.sort(key=str.lower)
+ return self.__allnames
+
+ def aliases_of(self, red, green, blue):
+ try:
+ name, aliases = self.__byrgb[(red, green, blue)]
+ except KeyError:
+ raise BadColor((red, green, blue)) from None
+ return [name] + aliases
class RGBColorDB(ColorDB):
- _re = re.compile(
- r'\s*(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P.*)')
+ _re = re.compile(r"\s*(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P.*)")
class HTML40DB(ColorDB):
- _re = re.compile(r'(?P\S+)\s+(?P#[0-9a-fA-F]{6})')
+ _re = re.compile(r"(?P\S+)\s+(?P#[0-9a-fA-F]{6})")
- def _extractrgb(self, mo):
- return rrggbb_to_triplet(mo.group('hexrgb'))
+ def _extractrgb(self, mo):
+ return rrggbb_to_triplet(mo.group("hexrgb"))
class LightlinkDB(HTML40DB):
- _re = re.compile(r'(?P(.+))\s+(?P#[0-9a-fA-F]{6})')
+ _re = re.compile(r"(?P(.+))\s+(?P#[0-9a-fA-F]{6})")
- def _extractname(self, mo):
- return mo.group('name').strip()
+ def _extractname(self, mo):
+ return mo.group("name").strip()
class WebsafeDB(ColorDB):
- _re = re.compile('(?P#[0-9a-fA-F]{6})')
+ _re = re.compile("(?P#[0-9a-fA-F]{6})")
- def _extractrgb(self, mo):
- return rrggbb_to_triplet(mo.group('hexrgb'))
+ def _extractrgb(self, mo):
+ return rrggbb_to_triplet(mo.group("hexrgb"))
- def _extractname(self, mo):
- return mo.group('hexrgb').upper()
+ def _extractname(self, mo):
+ return mo.group("hexrgb").upper()
# format is a tuple (RE, SCANLINES, CLASS) where RE is a compiled regular
@@ -167,112 +166,111 @@ def _extractname(self, mo):
# the class to instantiate if a match is found
FILETYPES = [
- (re.compile('Xorg'), RGBColorDB),
- (re.compile('XConsortium'), RGBColorDB),
- (re.compile('HTML'), HTML40DB),
- (re.compile('lightlink'), LightlinkDB),
- (re.compile('Websafe'), WebsafeDB),
+ (re.compile("Xorg"), RGBColorDB),
+ (re.compile("XConsortium"), RGBColorDB),
+ (re.compile("HTML"), HTML40DB),
+ (re.compile("lightlink"), LightlinkDB),
+ (re.compile("Websafe"), WebsafeDB),
]
def get_colordb(file, filetype=None):
- colordb = None
- fp = open(file)
- try:
- line = fp.readline()
- if not line:
- return None
- # try to determine the type of RGB file it is
- if filetype is None:
- filetypes = FILETYPES
- else:
- filetypes = [filetype]
- for typere, class_ in filetypes:
- mo = typere.search(line)
- if mo:
- break
- else:
- # no matching type
- return None
- # we know the type and the class to grok the type, so suck it in
- colordb = class_(fp)
- finally:
- fp.close()
- # save a global copy
- global DEFAULT_DB
- DEFAULT_DB = colordb
- return colordb
+ colordb = None
+ fp = open(file)
+ try:
+ line = fp.readline()
+ if not line:
+ return None
+ # try to determine the type of RGB file it is
+ if filetype is None:
+ filetypes = FILETYPES
+ else:
+ filetypes = [filetype]
+ for typere, class_ in filetypes:
+ mo = typere.search(line)
+ if mo:
+ break
+ else:
+ # no matching type
+ return None
+ # we know the type and the class to grok the type, so suck it in
+ colordb = class_(fp)
+ finally:
+ fp.close()
+ # save a global copy
+ global DEFAULT_DB
+ DEFAULT_DB = colordb
+ return colordb
_namedict = {}
def rrggbb_to_triplet(color):
- """Converts a #rrggbb color to the tuple (red, green, blue)."""
- rgbtuple = _namedict.get(color)
- if rgbtuple is None:
- if color[0] != '#':
- raise BadColor(color)
- red = color[1:3]
- green = color[3:5]
- blue = color[5:7]
- rgbtuple = int(red, 16), int(green, 16), int(blue, 16)
- _namedict[color] = rgbtuple
- return rgbtuple
+ """Converts a #rrggbb color to the tuple (red, green, blue)."""
+ rgbtuple = _namedict.get(color)
+ if rgbtuple is None:
+ if color[0] != "#":
+ raise BadColor(color)
+ red = color[1:3]
+ green = color[3:5]
+ blue = color[5:7]
+ rgbtuple = int(red, 16), int(green, 16), int(blue, 16)
+ _namedict[color] = rgbtuple
+ return rgbtuple
_tripdict = {}
def triplet_to_rrggbb(rgbtuple):
- """Converts a (red, green, blue) tuple to #rrggbb."""
- global _tripdict
- hexname = _tripdict.get(rgbtuple)
- if hexname is None:
- hexname = '#%02x%02x%02x' % rgbtuple
- _tripdict[rgbtuple] = hexname
- return hexname
+ """Converts a (red, green, blue) tuple to #rrggbb."""
+ global _tripdict
+ hexname = _tripdict.get(rgbtuple)
+ if hexname is None:
+ hexname = "#%02x%02x%02x" % rgbtuple
+ _tripdict[rgbtuple] = hexname
+ return hexname
def triplet_to_fractional_rgb(rgbtuple):
- return [x / 256 for x in rgbtuple]
+ return [x / 256 for x in rgbtuple]
def triplet_to_brightness(rgbtuple):
- # return the brightness (grey level) along the scale 0.0==black to
- # 1.0==white
- r = 0.299
- g = 0.587
- b = 0.114
- return r * rgbtuple[0] + g * rgbtuple[1] + b * rgbtuple[2]
-
-
-if __name__ == '__main__':
- colordb = get_colordb('colors.txt')
- if not colordb:
- print('No parseable color database found')
- sys.exit(1)
- # on my system, this color matches exactly
- target = 'navy'
- red, green, blue = rgbtuple = colordb.find_byname(target)
- print(target, ':', red, green, blue, triplet_to_rrggbb(rgbtuple))
- print ("-----")
- print (rgbtuple)
- name, aliases = colordb.find_byrgb(rgbtuple)
- print('name:', name, 'aliases:', COMMASPACE.join(aliases))
- r, g, b = (1, 1, 128) # nearest to navy
- r, g, b = (145, 238, 144) # nearest to lightgreen
- r, g, b = (255, 251, 250) # snow
- print('finding nearest to', target, '...')
- import time
-
- t0 = time.time()
- nearest = colordb.nearest(r, g, b)
- t1 = time.time()
- print('found nearest color', nearest, 'in', t1 - t0, 'seconds')
- # dump the database
- for n in colordb.unique_names():
- r, g, b = colordb.find_byname(n)
- aliases = colordb.aliases_of(r, g, b)
- print('%20s: (%3d/%3d/%3d) == %s' % (n, r, g, b,
- SPACE.join(aliases[1:])))
\ No newline at end of file
+ # return the brightness (grey level) along the scale 0.0==black to
+ # 1.0==white
+ r = 0.299
+ g = 0.587
+ b = 0.114
+ return r * rgbtuple[0] + g * rgbtuple[1] + b * rgbtuple[2]
+
+
+if __name__ == "__main__":
+ colordb = get_colordb("colors.txt")
+ if not colordb:
+ print("No parseable color database found")
+ sys.exit(1)
+ # on my system, this color matches exactly
+ target = "navy"
+ red, green, blue = rgbtuple = colordb.find_byname(target)
+ print(target, ":", red, green, blue, triplet_to_rrggbb(rgbtuple))
+ print("-----")
+ print(rgbtuple)
+ name, aliases = colordb.find_byrgb(rgbtuple)
+ print("name:", name, "aliases:", COMMASPACE.join(aliases))
+ r, g, b = (1, 1, 128) # nearest to navy
+ r, g, b = (145, 238, 144) # nearest to lightgreen
+ r, g, b = (255, 251, 250) # snow
+ print("finding nearest to", target, "...")
+ import time
+
+ t0 = time.time()
+ nearest = colordb.nearest(r, g, b)
+ t1 = time.time()
+ print("found nearest color", nearest, "in", t1 - t0, "seconds")
+ # dump the database
+ for n in colordb.unique_names():
+ r, g, b = colordb.find_byname(n)
+ aliases = colordb.aliases_of(r, g, b)
+ print("%20s: (%3d/%3d/%3d) == %s" % (n, r, g, b, SPACE.join(aliases[1:])))
diff --git a/stpy/helpers/abitrary_sampling.py b/stpy/helpers/abitrary_sampling.py
index 428c03c..e14887d 100644
--- a/stpy/helpers/abitrary_sampling.py
+++ b/stpy/helpers/abitrary_sampling.py
@@ -6,207 +6,217 @@
def sample_uniform_sphere(n, d, radius=1):
- X = np.random.randn(n, d)
- X_n = np.random.randn(n, d)
- for i in range(n):
- X_n[i, :] = (X[i, :] / np.linalg.norm(X[i, :])) * radius
- return X_n
+ X = np.random.randn(n, d)
+ X_n = np.random.randn(n, d)
+ for i in range(n):
+ X_n[i, :] = (X[i, :] / np.linalg.norm(X[i, :])) * radius
+ return X_n
def rejection_sampling(pdf, size=(1, 1)):
- """
- Implements rejection sampling
-
- :param pdf:
- :param size:
- :return:
- """
- n = size[0]
- d = size[1]
- output = np.zeros(shape=size)
- i = 0
- while i < n:
- Z = np.random.normal(size=(1, d))
- u = np.random.uniform()
- if pdf(Z) < u:
- output[i, :] = Z
- i = i + 1
-
- return output
+ """
+ Implements rejection sampling
+
+ :param pdf:
+ :param size:
+ :return:
+ """
+ n = size[0]
+ d = size[1]
+ output = np.zeros(shape=size)
+ i = 0
+ while i < n:
+ Z = np.random.normal(size=(1, d))
+ u = np.random.uniform()
+ if pdf(Z) < u:
+ output[i, :] = Z
+ i = i + 1
+
+ return output
def next_prime():
- def is_prime(num):
- "Checks if num is a prime value"
- for i in range(2, int(num ** 0.5) + 1):
- if (num % i) == 0: return False
- return True
+ def is_prime(num):
+ "Checks if num is a prime value"
+ for i in range(2, int(num**0.5) + 1):
+ if (num % i) == 0:
+ return False
+ return True
- prime = 3
- while (1):
- if is_prime(prime):
- yield prime
- prime += 2
+ prime = 3
+ while 1:
+ if is_prime(prime):
+ yield prime
+ prime += 2
def vdc(n, base=2):
- vdc, denom = 0, 1
- while n:
- denom *= base
- n, remainder = divmod(n, base)
- vdc += remainder / float(denom)
- return vdc
+ vdc, denom = 0, 1
+ while n:
+ denom *= base
+ n, remainder = divmod(n, base)
+ vdc += remainder / float(denom)
+ return vdc
def halton_sequence(size, dim):
- seq = []
- primeGen = next_prime()
- next(primeGen)
- for d in range(dim):
- base = next(primeGen)
- seq.append([vdc(i, base) for i in range(size)])
- return seq
+ seq = []
+ primeGen = next_prime()
+ next(primeGen)
+ for d in range(dim):
+ base = next(primeGen)
+ seq.append([vdc(i, base) for i in range(size)])
+ return seq
def sample_qmc_halton_normal(size=(1, 1)):
- Z = np.array(halton_sequence(size[0], size[1])).T
- Z[0, :] += 10e-5
- from scipy.stats import norm
- Z = norm.ppf(Z)
- return Z
+ Z = np.array(halton_sequence(size[0], size[1])).T
+ Z[0, :] += 10e-5
+ from scipy.stats import norm
+
+ Z = norm.ppf(Z)
+ return Z
def sample_qmc_halton(sampler, size=(1, 1)):
- Z = np.array(halton_sequence(size[0], size[1]), dtype=np.float64).T
- Z[0, :] += 10e-5
- Z = sampler(Z)
- return Z
+ Z = np.array(halton_sequence(size[0], size[1]), dtype=np.float64).T
+ Z[0, :] += 10e-5
+ Z = sampler(Z)
+ return Z
def sample_bounded(bounds):
- d = len(bounds)
- x = np.zeros(shape=(d))
- for i in range(d):
- x[i] = np.uniform(bounds[i][0], bounds[i][1])
- return x
-
-
-def randomly_split_set_without_duplicates_balanced(x: torch.Tensor,
- y: torch.Tensor,
- max_bins: int = 2,
- alpha: float = 0.2,
- size: Union[int, float, None] = None):
- # sort tensor
- N = x.size()[0]
-
- out, indices = torch.unique(x, dim=0, return_inverse=True)
- n, d = out.size()
- if size is None:
- ntest = int(alpha * n)
- else:
- ntest = size
- y_out = y[np.unique(indices)]
-
- # bin the data
- samples_per_bin, bins, = np.histogram(y_out, bins=max_bins) # Doane's method worked best for me
- classes = np.digitize(y_out, bins)
- classes[classes == max_bins+1] = max_bins
-
- # randomly split
- s = StratifiedShuffleSplit(n_splits=1, test_size=ntest)
-
- for _, n_test_indices in s.split(out,classes):
- mask_test = torch.zeros(N).bool()
- for index in n_test_indices:
- mask_test = torch.logical_or(mask_test, indices == index)
-
- return mask_test, ~mask_test
-
-
-def randomly_split_set_without_duplicates(x: torch.Tensor,
- alpha: float = 0.2,
- size: Union[int, float, None] = None):
- """
- Randomly splits the dataset and returns the mask of the
- :param x:
- :param alpha:
- :return:
- """
-
- # sort tensor
- N = x.size()[0]
-
- out, indices = torch.unique(x, dim=0, return_inverse=True)
-
- n, d = out.size()
- if size is None:
- ntest = int(alpha * n)
- else:
- ntest = size
-
- # randomly split
- n_test_indices = np.random.choice(np.arange(0, n, 1), size=ntest, replace=False)
- mask_test = torch.zeros(N).bool()
-
- for index in n_test_indices:
- mask_test = torch.logical_or(mask_test, indices == index)
-
- return mask_test, ~mask_test
-
-
-def randomly_split_set_without_duplicates_general(x: torch.Tensor,
- sizes: List = [None]):
- """
- Randomly splits the dataset and returns the mask of the
- :param x:
- :param alpha:
- :return:
- """
-
- # sort tensor
- N = x.size()[0]
-
- out, indices = torch.unique(x, dim=0, return_inverse=True)
- # is number of unique elements
- n, d = out.size()
-
- # randomly permute indices
- inde = torch.from_numpy(np.random.permutation(np.arange(0, n, 1)))
- cumsum_indices = torch.cumsum(torch.Tensor(sizes),0).int()
- cumsum_indices = torch.cat((torch.Tensor([0]),cumsum_indices)).int()
-
- masks = [torch.zeros(N).bool() for _ in sizes]
- for j in range(len(sizes)):
- n_test_indices = inde[cumsum_indices[j]:min(n,cumsum_indices[j+1])]
- for index in n_test_indices:
- masks[j] = torch.logical_or(masks[j], indices == index)
-
- return masks
+ d = len(bounds)
+ x = np.zeros(shape=(d))
+ for i in range(d):
+ x[i] = np.uniform(bounds[i][0], bounds[i][1])
+ return x
+
+
+def randomly_split_set_without_duplicates_balanced(
+ x: torch.Tensor,
+ y: torch.Tensor,
+ max_bins: int = 2,
+ alpha: float = 0.2,
+ size: Union[int, float, None] = None,
+):
+ # sort tensor
+ N = x.size()[0]
+
+ out, indices = torch.unique(x, dim=0, return_inverse=True)
+ n, d = out.size()
+ if size is None:
+ ntest = int(alpha * n)
+ else:
+ ntest = size
+ y_out = y[np.unique(indices)]
+
+ # bin the data
+ (
+ samples_per_bin,
+ bins,
+ ) = np.histogram(
+ y_out, bins=max_bins
+ ) # Doane's method worked best for me
+ classes = np.digitize(y_out, bins)
+ classes[classes == max_bins + 1] = max_bins
+
+ # randomly split
+ s = StratifiedShuffleSplit(n_splits=1, test_size=ntest)
+
+ for _, n_test_indices in s.split(out, classes):
+ mask_test = torch.zeros(N).bool()
+ for index in n_test_indices:
+ mask_test = torch.logical_or(mask_test, indices == index)
+
+ return mask_test, ~mask_test
+
+
+def randomly_split_set_without_duplicates(
+ x: torch.Tensor, alpha: float = 0.2, size: Union[int, float, None] = None
+):
+ """
+ Randomly splits the dataset and returns the mask of the
+ :param x:
+ :param alpha:
+ :return:
+ """
+
+ # sort tensor
+ N = x.size()[0]
+
+ out, indices = torch.unique(x, dim=0, return_inverse=True)
+
+ n, d = out.size()
+ if size is None:
+ ntest = int(alpha * n)
+ else:
+ ntest = size
+
+ # randomly split
+ n_test_indices = np.random.choice(np.arange(0, n, 1), size=ntest, replace=False)
+ mask_test = torch.zeros(N).bool()
+
+ for index in n_test_indices:
+ mask_test = torch.logical_or(mask_test, indices == index)
+
+ return mask_test, ~mask_test
+
+
+def randomly_split_set_without_duplicates_general(
+ x: torch.Tensor, sizes: List = [None]
+):
+ """
+ Randomly splits the dataset and returns the mask of the
+ :param x:
+ :param alpha:
+ :return:
+ """
+
+ # sort tensor
+ N = x.size()[0]
+
+ out, indices = torch.unique(x, dim=0, return_inverse=True)
+ # is number of unique elements
+ n, d = out.size()
+
+ # randomly permute indices
+ inde = torch.from_numpy(np.random.permutation(np.arange(0, n, 1)))
+ cumsum_indices = torch.cumsum(torch.tensor(sizes), 0).int()
+ cumsum_indices = torch.cat((torch.tensor([0]), cumsum_indices)).int()
+
+ masks = [torch.zeros(N).bool() for _ in sizes]
+ for j in range(len(sizes)):
+ n_test_indices = inde[cumsum_indices[j] : min(n, cumsum_indices[j + 1])]
+ for index in n_test_indices:
+ masks[j] = torch.logical_or(masks[j], indices == index)
+
+ return masks
#
if __name__ == "__main__":
- # x = torch.Tensor([[2, 1, 1], [2, 1, 1], [2, 2, 2],
- # [3, 2, 2], [2, 1, 1], [4, 2, 1],
- # [4, 2, 4], [4,4,4], [1,2,2]]).double()
- #
- x = torch.randint(0, 10, size = (2000,3))
- y = torch.randn(size = (x.size()[0],1))*10
-
- # masks = randomly_split_set_without_duplicates_general(x, sizes=[1,2,3])
- #
- # for mask in masks:
- # print (mask)
-
- masks = randomly_split_set_without_duplicates_balanced(x,y, size = 100, max_bins = 10)
- masks2 = randomly_split_set_without_duplicates(x, size = 100)
- import matplotlib.pyplot as plt
- labels = ['test', 'train']
- for index,(mask,mask2) in enumerate(zip(masks,masks2)):
- plt.hist(y[mask].T, alpha = 0.2, density= True, label = labels[index])
- plt.hist(y[mask2].T, alpha=0.2, density=True, label=labels[index]+"_random")
- plt.legend()
- plt.show()
-
+ # x = torch.tensor([[2, 1, 1], [2, 1, 1], [2, 2, 2],
+ # [3, 2, 2], [2, 1, 1], [4, 2, 1],
+ # [4, 2, 4], [4,4,4], [1,2,2]]).double()
+ #
+ x = torch.randint(0, 10, size=(2000, 3))
+ y = torch.randn(size=(x.size()[0], 1)) * 10
+
+ # masks = randomly_split_set_without_duplicates_general(x, sizes=[1,2,3])
+ #
+ # for mask in masks:
+ # print (mask)
+
+ masks = randomly_split_set_without_duplicates_balanced(x, y, size=100, max_bins=10)
+ masks2 = randomly_split_set_without_duplicates(x, size=100)
+ import matplotlib.pyplot as plt
+
+ labels = ["test", "train"]
+ for index, (mask, mask2) in enumerate(zip(masks, masks2)):
+ plt.hist(y[mask].T, alpha=0.2, density=True, label=labels[index])
+ plt.hist(y[mask2].T, alpha=0.2, density=True, label=labels[index] + "_random")
+ plt.legend()
+ plt.show()
diff --git a/stpy/helpers/coreset_helper.py b/stpy/helpers/coreset_helper.py
index 84aaccf..85eae1f 100644
--- a/stpy/helpers/coreset_helper.py
+++ b/stpy/helpers/coreset_helper.py
@@ -3,26 +3,28 @@
def epsilon_net(borel_set, k):
- pass
+ pass
def coreset(borel_set, k):
- pass
+ pass
def coreset_leverage_score_greedy(borel_set, kernel, n, tol=10e-4):
- xtest = borel_set.return_discretization(n)
- k = kernel.kernel
- N = xtest.size()[0]
- score = 1
- K = k(xtest, xtest)
- x = xtest[torch.randint(0, N, (1,)), :].view(1, -1)
- c = 1
- while score > tol:
- I = torch.eye(c).double()
- scores = np.diag(K - k(xtest, x).T @ torch.pinverse(k(x, x) + tol * I) @ k(x, xtest).T)
- index = np.argmax(scores)
- x = torch.cat((x, xtest[index, :].view(1, -1)))
- score = scores[index]
- c = c + 1
- return x
+ xtest = borel_set.return_discretization(n)
+ k = kernel.kernel
+ N = xtest.size()[0]
+ score = 1
+ K = k(xtest, xtest)
+ x = xtest[torch.randint(0, N, (1,)), :].view(1, -1)
+ c = 1
+ while score > tol:
+ I = torch.eye(c).double()
+ scores = np.diag(
+ K - k(xtest, x).T @ torch.pinverse(k(x, x) + tol * I) @ k(x, xtest).T
+ )
+ index = np.argmax(scores)
+ x = torch.cat((x, xtest[index, :].view(1, -1)))
+ score = scores[index]
+ c = c + 1
+ return x
diff --git a/stpy/helpers/ellipsoid_algorithms.py b/stpy/helpers/ellipsoid_algorithms.py
index 112d077..2514883 100644
--- a/stpy/helpers/ellipsoid_algorithms.py
+++ b/stpy/helpers/ellipsoid_algorithms.py
@@ -7,424 +7,487 @@
def maximum_volume_ellipsoid_l1_polytope_ellipse(ellipse, l1_polytope, verbose=False):
- """
- ellipse is
- xA_ix + 2b_i x + c_i \leq 0
-
- \sum q_i | x^\top a_i - b_i |
-
- :param ellipse:
- :param polytope:
- :param verbose:
- :return:
- """
-
- p = ellipse[0].shape[0]
-
- B = cp.Variable((p, p), PSD=True)
- d = cp.Variable((p, 1))
- lam = cp.Variable((1, 1))
- obj_max = cp.Maximize(cp.log_det(B))
-
- constraints = []
- A, b, c = ellipse
-
- eye = np.eye(p)
- zeros = np.zeros(shape=(1, p))
- invA = np.linalg.inv(A)
-
- constraints.append(
- cp.bmat([
- [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T],
- [zeros.T, lam * eye, B],
- [d + invA @ b, B, invA]]) >> 0)
-
- q, X, y, eps = l1_polytope
- m = X.shape[0]
- t = cp.Variable((m, 1))
- constraints.append(q.T @ t <= eps)
- constraints.append(t >= 0.)
- for i in range(m):
- ai = X[i, :]
- bi = y[i]
- constraints.append(cp.norm2(B @ ai) + ai.T @ d - bi <= t[i])
- constraints.append(cp.norm2(B @ ai) - ai.T @ d + bi <= t[i])
-
- prob = cp.Problem(obj_max, constraints)
- prob.solve(solver=cp.MOSEK, verbose=verbose)
-
- print(prob.status)
- if B.value is not None:
- return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value
- else:
- return None, None
-
-
-def maximum_volume_ellipsoid_relu_polytope_ellipse(ellipse, relu_polytope, verbose=False):
- """
- ellipse is
- xA_ix + 2b_i x + c_i \leq 0
-
-
- (eta_i + x^x_i) \leq eps_i
-
- :param ellipse:
- :param polytope:
- :param verbose:
- :return:
- """
-
- p = ellipse[0].shape[0]
-
- B = cp.Variable((p, p), PSD=True)
- d = cp.Variable((p, 1))
- lam = cp.Variable((1, 1))
- obj_max = cp.Maximize(cp.log_det(B))
-
- constraints = []
- A, b, c = ellipse
-
- eye = np.eye(p)
- zeros = np.zeros(shape=(1, p))
- invA = np.linalg.inv(A)
-
- constraints.append(
- cp.bmat([
- [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T],
- [zeros.T, lam * eye, B],
- [d + invA @ b, B, invA]]) >> 0)
-
- q, X, y, eps = relu_polytope
- m = X.shape[0]
- t = cp.Variable((m, 1))
- constraints.append(q.T @ t <= eps)
- constraints.append(t >= 0.)
- for i in range(m):
- ai = X[i, :]
- bi = y[i]
- constraints.append(cp.pos(cp.norm2(B @ ai) + ai.T @ d - bi) <= t[i])
-
- prob = cp.Problem(obj_max, constraints)
- prob.solve(solver=cp.MOSEK, verbose=verbose)
-
- print(prob.status)
- if B.value is not None:
- return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value
- else:
- return None, None
-
-
-def maximum_volume_ellipsoid_intersection_ellipsoids(ellipses, planes=None, verbose=False):
- """
- Each ellipse is
- xA_ix + 2b_i x + c_i \leq 0
-
- :param elipses: list of [A,b,c]
-
- :return:elipse ||x-v||_B^2 < 1
- """
-
- p = ellipses[0][0].shape[0]
- m = len(ellipses)
-
- B = cp.Variable((p, p), PSD=True)
- d = cp.Variable((p, 1))
- lam = cp.Variable((m, 1))
-
- obj_max = cp.Maximize(cp.log_det(B))
-
- constraints = []
- for index, ellipse in enumerate(ellipses):
- A, b, c = ellipse
-
- eye = np.eye(p)
- zeros = np.zeros(shape=(1, p))
- invA = np.linalg.inv(A)
-
- constraints.append(
- cp.bmat([
- [-lam[index, 0] - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T],
- [zeros.T, lam[index, 0] * eye, B],
- [d + invA @ b, B, invA]]) >> 0)
-
- if planes is not None:
- for index, plane in enumerate(planes):
- a, b = plane
- constraints.append(cp.norm2(B @ a) + a.T @ d <= b)
-
- prob = cp.Problem(obj_max, constraints)
- prob.solve(solver=cp.MOSEK, verbose=verbose)
-
- print(prob.status)
- if B.value is not None:
- return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value
- else:
- return None, None
+ r"""
+ ellipse is
+ xA_ix + 2b_i x + c_i \leq 0
+
+ \sum q_i | x^\top a_i - b_i |
+
+ :param ellipse:
+ :param polytope:
+ :param verbose:
+ :return:
+ """
+
+ p = ellipse[0].shape[0]
+
+ B = cp.Variable((p, p), PSD=True)
+ d = cp.Variable((p, 1))
+ lam = cp.Variable((1, 1))
+ obj_max = cp.Maximize(cp.log_det(B))
+
+ constraints = []
+ A, b, c = ellipse
+
+ eye = np.eye(p)
+ zeros = np.zeros(shape=(1, p))
+ invA = np.linalg.inv(A)
+
+ constraints.append(
+ cp.bmat(
+ [
+ [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T],
+ [zeros.T, lam * eye, B],
+ [d + invA @ b, B, invA],
+ ]
+ )
+ >> 0
+ )
+
+ q, X, y, eps = l1_polytope
+ m = X.shape[0]
+ t = cp.Variable((m, 1))
+ constraints.append(q.T @ t <= eps)
+ constraints.append(t >= 0.0)
+ for i in range(m):
+ ai = X[i, :]
+ bi = y[i]
+ constraints.append(cp.norm2(B @ ai) + ai.T @ d - bi <= t[i])
+ constraints.append(cp.norm2(B @ ai) - ai.T @ d + bi <= t[i])
+
+ prob = cp.Problem(obj_max, constraints)
+ prob.solve(solver=cp.MOSEK, verbose=verbose)
+
+ print(prob.status)
+ if B.value is not None:
+ return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value
+ else:
+ return None, None
+
+
+def maximum_volume_ellipsoid_relu_polytope_ellipse(
+ ellipse, relu_polytope, verbose=False
+):
+ """
+ ellipse is
+ xA_ix + 2b_i x + c_i \leq 0
+
+
+ (eta_i + x^x_i) \leq eps_i
+
+ :param ellipse:
+ :param polytope:
+ :param verbose:
+ :return:
+ """
+
+ p = ellipse[0].shape[0]
+
+ B = cp.Variable((p, p), PSD=True)
+ d = cp.Variable((p, 1))
+ lam = cp.Variable((1, 1))
+ obj_max = cp.Maximize(cp.log_det(B))
+
+ constraints = []
+ A, b, c = ellipse
+
+ eye = np.eye(p)
+ zeros = np.zeros(shape=(1, p))
+ invA = np.linalg.inv(A)
+
+ constraints.append(
+ cp.bmat(
+ [
+ [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T],
+ [zeros.T, lam * eye, B],
+ [d + invA @ b, B, invA],
+ ]
+ )
+ >> 0
+ )
+
+ q, X, y, eps = relu_polytope
+ m = X.shape[0]
+ t = cp.Variable((m, 1))
+ constraints.append(q.T @ t <= eps)
+ constraints.append(t >= 0.0)
+ for i in range(m):
+ ai = X[i, :]
+ bi = y[i]
+ constraints.append(cp.pos(cp.norm2(B @ ai) + ai.T @ d - bi) <= t[i])
+
+ prob = cp.Problem(obj_max, constraints)
+ prob.solve(solver=cp.MOSEK, verbose=verbose)
+
+ print(prob.status)
+ if B.value is not None:
+ return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value
+ else:
+ return None, None
+
+
+def maximum_volume_ellipsoid_intersection_ellipsoids(
+ ellipses, planes=None, verbose=False
+):
+ r"""
+ Each ellipse is
+ xA_ix + 2b_i x + c_i \leq 0
+
+ :param elipses: list of [A,b,c]
+
+ :return:elipse ||x-v||_B^2 < 1
+ """
+
+ p = ellipses[0][0].shape[0]
+ m = len(ellipses)
+
+ B = cp.Variable((p, p), PSD=True)
+ d = cp.Variable((p, 1))
+ lam = cp.Variable((m, 1))
+
+ obj_max = cp.Maximize(cp.log_det(B))
+
+ constraints = []
+ for index, ellipse in enumerate(ellipses):
+ A, b, c = ellipse
+
+ eye = np.eye(p)
+ zeros = np.zeros(shape=(1, p))
+ invA = np.linalg.inv(A)
+
+ constraints.append(
+ cp.bmat(
+ [
+ [-lam[index, 0] - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T],
+ [zeros.T, lam[index, 0] * eye, B],
+ [d + invA @ b, B, invA],
+ ]
+ )
+ >> 0
+ )
+
+ if planes is not None:
+ for index, plane in enumerate(planes):
+ a, b = plane
+ constraints.append(cp.norm2(B @ a) + a.T @ d <= b)
+
+ prob = cp.Problem(obj_max, constraints)
+ prob.solve(solver=cp.MOSEK, verbose=verbose)
+
+ print(prob.status)
+ if B.value is not None:
+ return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value
+ else:
+ return None, None
# return B.value, -d.value
+
def ellipsoid_cut(c, B, a, beta):
- """
- :param c: elipsoid center
- :param B: elipsoid covariance
- :param a: a
- :param beta:
-
- (x-c)^\top B^{-1} (x-c) \leq 1
- a^x \leq \beta
-
- :return:
- """
- N = a.T @ B @ a
- print(N)
- alpha = (a.T @ c - beta) / np.sqrt(N)
- if alpha > 0:
- d = c.shape[0]
- tau = (1 + d * alpha) / (d + 1)
- delta = ((d ** 2) / (d ** 2 - 1)) * (1 - alpha ** 2)
- sigma = (2. * (1 + d * alpha)) / ((d + 1) * (1 + alpha))
-
- s = B @ a
- c = c + tau * (s / np.sqrt(N))
- B = delta * (B - sigma * (s @ s.T) / (N))
- return (c, B)
+ r"""
+ :param c: elipsoid center
+ :param B: elipsoid covariance
+ :param a: a
+ :param beta:
+
+ (x-c)^\top B^{-1} (x-c) \leq 1
+ a^x \leq \beta
+
+ :return:
+ """
+ N = a.T @ B @ a
+ print(N)
+ alpha = (a.T @ c - beta) / np.sqrt(N)
+ if alpha > 0:
+ d = c.shape[0]
+ tau = (1 + d * alpha) / (d + 1)
+ delta = ((d**2) / (d**2 - 1)) * (1 - alpha**2)
+ sigma = (2.0 * (1 + d * alpha)) / ((d + 1) * (1 + alpha))
+
+ s = B @ a
+ c = c + tau * (s / np.sqrt(N))
+ B = delta * (B - sigma * (s @ s.T) / (N))
+ return (c, B)
def maximize_on_elliptical_slice(x, Sigma, mu, c, l, Lambda, u):
- """
- solves the problem
- min x^\top \theta
- s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
- l \leq Lambda \theta \leq u
- """
-
- m = x.shape[0]
- zero = np.zeros(m)
- theta = cp.Variable(m)
- obj_max = cp.Maximize(x @ theta)
- Sigma_sqrt = np.linalg.cholesky(Sigma)
- constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))]
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
- prob = cp.Problem(obj_max, constraints)
- prob.solve(solver=cp.MOSEK, verbose=False
- , mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual})
- val = prob.value
- theta = theta.value
- return val, theta
+ r"""
+ solves the problem
+ min x^\top \theta
+ s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
+ l \leq Lambda \theta \leq u
+ """
+
+ m = x.shape[0]
+ zero = np.zeros(m)
+ theta = cp.Variable(m)
+ obj_max = cp.Maximize(x @ theta)
+ Sigma_sqrt = np.linalg.cholesky(Sigma)
+ constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))]
+ constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+ prob = cp.Problem(obj_max, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ verbose=False,
+ mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual},
+ )
+ val = prob.value
+ theta = theta.value
+ return val, theta
def maximize_matrix_quadratic_on_ellipse(X, Sigma, mu, c, threads=4):
- """
- solves the problem
- max \theta ^top Z \theta
- s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
- """
- a = -X @ mu.reshape(-1)
- val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads)
- val = -val + mu @ X @ mu
- return val, theta
+ r"""
+ solves the problem
+ max \theta ^top Z \theta
+ s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
+ """
+ a = -X @ mu.reshape(-1)
+ val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads)
+ val = -val + mu @ X @ mu
+ return val, theta
def minimize_matrix_quadratic_on_ellipse(Z, Sigma, mu, c, threads=4):
- """
- solves the problem
- min \theta ^top Z \theta
- s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
- """
-
- m = Z.shape[0]
- zero = np.zeros(m)
- Sigma_sqrt = np.linalg.cholesky(Sigma)
- theta = cp.Variable(m)
- obj = cp.Minimize(cp.quad_form(theta, Z))
- constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))]
- prob = cp.Problem(obj, constraints)
- prob.solve(solver=cp.MOSEK, verbose=False,
- mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.iparam.num_threads: threads})
- val = prob.value
- theta = theta.value
- return val, theta
+ r"""
+ solves the problem
+ min \theta ^top Z \theta
+ s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
+ """
+
+ m = Z.shape[0]
+ zero = np.zeros(m)
+ Sigma_sqrt = np.linalg.cholesky(Sigma)
+ theta = cp.Variable(m)
+ obj = cp.Minimize(cp.quad_form(theta, Z))
+ constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))]
+ prob = cp.Problem(obj, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.iparam.num_threads: threads,
+ },
+ )
+ val = prob.value
+ theta = theta.value
+ return val, theta
def maximize_quadratic_on_ellipse(x, Sigma, mu, c, threads=4):
- """
- solves the problem
- max (x^\top \theta)^2
- s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
- """
- X = x.reshape(-1, 1) @ x.reshape(1, -1)
- a = -X @ mu.reshape(-1)
- val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads)
- val = -val + mu @ X @ mu
- return val, theta
+ r"""
+ solves the problem
+ max (x^\top \theta)^2
+ s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
+ """
+ X = x.reshape(-1, 1) @ x.reshape(1, -1)
+ a = -X @ mu.reshape(-1)
+ val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads)
+ val = -val + mu @ X @ mu
+ return val, theta
def minimize_quadratic_on_ellipse(x, Sigma, mu, c, threads=4):
- """
- solves the problem
- min (x^\top \theta)^2
- s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
- """
-
- m = x.shape[0]
- zero = np.zeros(m)
- Sigma_sqrt = np.linalg.cholesky(Sigma)
- theta = cp.Variable(m)
- obj = cp.Minimize((x @ theta) ** 2)
- constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))]
- prob = cp.Problem(obj, constraints)
- prob.solve(solver=cp.MOSEK, verbose=False,
- mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.iparam.num_threads: threads})
- val = prob.value
- theta = theta.value
- return val, theta
+ r"""
+ solves the problem
+ min (x^\top \theta)^2
+ s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c
+ """
+
+ m = x.shape[0]
+ zero = np.zeros(m)
+ Sigma_sqrt = np.linalg.cholesky(Sigma)
+ theta = cp.Variable(m)
+ obj = cp.Minimize((x @ theta) ** 2)
+ constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))]
+ prob = cp.Problem(obj, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.iparam.num_threads: threads,
+ },
+ )
+ val = prob.value
+ theta = theta.value
+ return val, theta
def KY_initialization(X):
- (n, d) = X.shape
- y = np.zeros(shape=(d, d,))
- zs = []
- c = np.random.randn(d)
- for j in range(d):
- id_max = np.argmax(X @ c)
- id_min = np.argmin(X @ c)
-
- z_max = X[np.argmax(X @ c), :]
- z_min = X[np.argmin(X @ c), :]
-
- zs = zs + [id_max, id_min]
- y[j, :] = z_max - z_min
-
- c = np.random.randn(d)
- for i in range(j):
- c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :]
-
- mu = np.zeros(shape=(n))
- mu[zs] = 1.
- mu = mu / np.sum(mu)
- return mu
+ (n, d) = X.shape
+ y = np.zeros(
+ shape=(
+ d,
+ d,
+ )
+ )
+ zs = []
+ c = np.random.randn(d)
+ for j in range(d):
+ id_max = np.argmax(X @ c)
+ id_min = np.argmin(X @ c)
+
+ z_max = X[np.argmax(X @ c), :]
+ z_min = X[np.argmin(X @ c), :]
+
+ zs = zs + [id_max, id_min]
+ y[j, :] = z_max - z_min
+
+ c = np.random.randn(d)
+ for i in range(j):
+ c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :]
+
+ mu = np.zeros(shape=(n))
+ mu[zs] = 1.0
+ mu = mu / np.sum(mu)
+ return mu
def KY_initialization_modified(X):
- (n, d) = X.shape
- y = np.zeros(shape=(d, d,))
- zs = []
- c = np.random.randn(d)
- for j in range(d):
- id_max = np.argmax(X @ c)
- id_min = np.argmin(X @ c)
-
- z_max = X[np.argmax(X @ c), :]
- z_min = X[np.argmin(X @ c), :]
-
- zs = zs + [id_max]
- y[j, :] = z_max - z_min
-
- c = np.random.randn(d)
- for i in range(j):
- c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :]
-
- mu = np.zeros(shape=(n))
- mu[zs] = 1.
- mu = mu / np.sum(mu)
- return mu
-
-
-def plot_ellipse(offset, cov, scale=1, theta_num=1000, axis=None, plot_kwargs=None, fill=False, fill_kwargs=None,
- color='r'):
- '''
- offset = 2d array which gives center of ellipse
- cov = covariance of ellipse
- scale = scale ellipse by constant factor
- theta_num = used for a linspace below, not sure exactly (?)
-
- '''
- # Get Ellipse Properties from cov matrix
-
- eig_vec, eig_val, u = np.linalg.svd(cov)
- # Make sure 0th eigenvector has positive x-coordinate
- if eig_vec[0][0] < 0:
- eig_vec[0] *= -1
- semimaj = np.sqrt(eig_val[0])
- semimin = np.sqrt(eig_val[1])
- semimaj *= scale
- semimin *= scale
- phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0])))
- if eig_vec[0][1] < 0 and phi > 0:
- phi *= -1
-
- # Generate data for ellipse structure
- theta = np.linspace(0, 2 * np.pi, theta_num)
- r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2)
- x = r * np.cos(theta)
- y = r * np.sin(theta)
- data = np.array([x, y])
- S = np.array([[semimaj, 0], [0, semimin]])
- R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]])
- T = np.dot(R, S)
- data = np.dot(T, data)
- data[0] += offset[0]
- data[1] += offset[1]
-
- # Plot!
- return_fig = False
- if axis is None:
- axis = plt.gca()
-
- if plot_kwargs is None:
- p, = axis.plot(data[0], data[1], color=color, linestyle='-')
- else:
- p, = axis.plot(data[0], data[1], **plot_kwargs)
-
- if fill == True:
- if fill_kwargs is None:
- fill_kwargs = dict()
- axis.fill(data[0], data[1], alpha=0.2, color=color)
+ (n, d) = X.shape
+ y = np.zeros(
+ shape=(
+ d,
+ d,
+ )
+ )
+ zs = []
+ c = np.random.randn(d)
+ for j in range(d):
+ id_max = np.argmax(X @ c)
+ id_min = np.argmin(X @ c)
+
+ z_max = X[np.argmax(X @ c), :]
+ z_min = X[np.argmin(X @ c), :]
+
+ zs = zs + [id_max]
+ y[j, :] = z_max - z_min
+
+ c = np.random.randn(d)
+ for i in range(j):
+ c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :]
+
+ mu = np.zeros(shape=(n))
+ mu[zs] = 1.0
+ mu = mu / np.sum(mu)
+ return mu
+
+
+def plot_ellipse(
+ offset,
+ cov,
+ scale=1,
+ theta_num=1000,
+ axis=None,
+ plot_kwargs=None,
+ fill=False,
+ fill_kwargs=None,
+ color="r",
+):
+ """
+ offset = 2d array which gives center of ellipse
+ cov = covariance of ellipse
+ scale = scale ellipse by constant factor
+ theta_num = used for a linspace below, not sure exactly (?)
+
+ """
+ # Get Ellipse Properties from cov matrix
+
+ eig_vec, eig_val, u = np.linalg.svd(cov)
+ # Make sure 0th eigenvector has positive x-coordinate
+ if eig_vec[0][0] < 0:
+ eig_vec[0] *= -1
+ semimaj = np.sqrt(eig_val[0])
+ semimin = np.sqrt(eig_val[1])
+ semimaj *= scale
+ semimin *= scale
+ phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0])))
+ if eig_vec[0][1] < 0 and phi > 0:
+ phi *= -1
+
+ # Generate data for ellipse structure
+ theta = np.linspace(0, 2 * np.pi, theta_num)
+ r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2)
+ x = r * np.cos(theta)
+ y = r * np.sin(theta)
+ data = np.array([x, y])
+ S = np.array([[semimaj, 0], [0, semimin]])
+ R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]])
+ T = np.dot(R, S)
+ data = np.dot(T, data)
+ data[0] += offset[0]
+ data[1] += offset[1]
+
+ # Plot!
+ return_fig = False
+ if axis is None:
+ axis = plt.gca()
+
+ if plot_kwargs is None:
+ (p,) = axis.plot(data[0], data[1], color=color, linestyle="-")
+ else:
+ (p,) = axis.plot(data[0], data[1], **plot_kwargs)
+
+ if fill == True:
+ if fill_kwargs is None:
+ fill_kwargs = dict()
+ axis.fill(data[0], data[1], alpha=0.2, color=color)
if __name__ == "__main__":
- d = 2
+ d = 2
- s1 = 1
- s2 = 1
+ s1 = 1
+ s2 = 1
- A1 = np.random.randn(d, d)
- A1 = A1.T @ A1
+ A1 = np.random.randn(d, d)
+ A1 = A1.T @ A1
- A2 = np.random.randn(d, d)
- A2 = A2.T @ A2
+ A2 = np.random.randn(d, d)
+ A2 = A2.T @ A2
- center1 = np.zeros((d, 1))
- center2 = np.ones((d, 1))
+ center1 = np.zeros((d, 1))
+ center2 = np.ones((d, 1))
- b1 = - A1 @ center1
- b2 = - A2 @ center2
+ b1 = -A1 @ center1
+ b2 = -A2 @ center2
- c1 = -s1 + center1.T @ A1 @ center1
- c2 = -s2 + center2.T @ A2 @ center2
+ c1 = -s1 + center1.T @ A1 @ center1
+ c2 = -s2 + center2.T @ A2 @ center2
- # ellipsoids = [[A1,b1,c1],[A2,b2,c2]]
- ellipsoids = [[A2, b2, c2]]
- planes = [[center2, np.array([[0.]])]]
+ # ellipsoids = [[A1,b1,c1],[A2,b2,c2]]
+ ellipsoids = [[A2, b2, c2]]
+ planes = [[center2, np.array([[0.0]])]]
- A, b = maximum_volume_ellipsoid_intersection_ellipsoids(ellipsoids, planes=planes)
- # c = 1
+ A, b = maximum_volume_ellipsoid_intersection_ellipsoids(ellipsoids, planes=planes)
+ # c = 1
- axis = plt.gca()
+ axis = plt.gca()
- ## the cov is
- # (x-center)cov^{-1}(x-center)
- # plot_ellipse(np.array([0.,0.]), cov=np.array([[2,0.],[0.0,2.]]), scale = 1., axis=axis, fill=True, color = 'purple')
+ ## the cov is
+ # (x-center)cov^{-1}(x-center)
+ # plot_ellipse(np.array([0.,0.]), cov=np.array([[2,0.],[0.0,2.]]), scale = 1., axis=axis, fill=True, color = 'purple')
- plot_ellipse(center1.reshape(-1), cov=np.linalg.inv(A1), scale=1., axis=axis, fill=True)
- plot_ellipse(center2.reshape(-1), cov=np.linalg.inv(A2), scale=1., axis=axis, fill=True, color='b')
+ plot_ellipse(
+ center1.reshape(-1), cov=np.linalg.inv(A1), scale=1.0, axis=axis, fill=True
+ )
+ plot_ellipse(
+ center2.reshape(-1),
+ cov=np.linalg.inv(A2),
+ scale=1.0,
+ axis=axis,
+ fill=True,
+ color="b",
+ )
- plot_ellipse(b.reshape(-1), cov=np.linalg.inv(A), scale=1., axis=axis, fill=True, color='g')
+ plot_ellipse(
+ b.reshape(-1), cov=np.linalg.inv(A), scale=1.0, axis=axis, fill=True, color="g"
+ )
- plt.xlim([-4, 4])
- plt.ylim([-4, 4])
- plt.show()
+ plt.xlim([-4, 4])
+ plt.ylim([-4, 4])
+ plt.show()
diff --git a/stpy/helpers/haarfisz_transform.py b/stpy/helpers/haarfisz_transform.py
index 0c975d6..3c95a8e 100644
--- a/stpy/helpers/haarfisz_transform.py
+++ b/stpy/helpers/haarfisz_transform.py
@@ -3,98 +3,103 @@
"""
+
import numpy as np
def haar_fisz_transform(data):
- a = 2.
- n = data.shape[0]
- nhalf = n // 2
+ a = 2.0
+ n = data.shape[0]
+ nhalf = n // 2
- J = np.log2(n)
- res = data.copy()
- sm = np.zeros(shape=nhalf, dtype=float)
- det = sm.copy()
+ J = np.log2(n)
+ res = data.copy()
+ sm = np.zeros(shape=nhalf, dtype=float)
+ det = sm.copy()
- for i in np.arange(0, J, 1):
- indices = np.arange(0, nhalf, 1)
+ for i in np.arange(0, J, 1):
+ indices = np.arange(0, nhalf, 1)
- sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a
- det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a
+ sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a
+ det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a
- det[sm > 0] = det[sm > 0] / np.sqrt(sm[sm > 0])
+ det[sm > 0] = det[sm > 0] / np.sqrt(sm[sm > 0])
- res[0:nhalf] = sm[0:nhalf]
- res[nhalf:n] = det[0:nhalf]
+ res[0:nhalf] = sm[0:nhalf]
+ res[nhalf:n] = det[0:nhalf]
- n = n // 2
- nhalf = nhalf // 2
- sm = np.zeros(shape=nhalf)
- det = sm.copy()
+ n = n // 2
+ nhalf = nhalf // 2
+ sm = np.zeros(shape=nhalf)
+ det = sm.copy()
- nhalf = 1
- n = 2
- sm = np.zeros(shape=nhalf)
- det = sm.copy()
- for i in np.arange(0, J, 1):
- indices = np.arange(0, nhalf, 1)
- sm[indices] = res[indices]
- det[indices] = res[nhalf:n]
- res[2 * indices] = a / 2. * (sm[indices] + det[indices])
- res[2 * indices + 1] = a / 2. * (sm[indices] - det[indices])
+ nhalf = 1
+ n = 2
+ sm = np.zeros(shape=nhalf)
+ det = sm.copy()
+ for i in np.arange(0, J, 1):
+ indices = np.arange(0, nhalf, 1)
+ sm[indices] = res[indices]
+ det[indices] = res[nhalf:n]
+ res[2 * indices] = a / 2.0 * (sm[indices] + det[indices])
+ res[2 * indices + 1] = a / 2.0 * (sm[indices] - det[indices])
- n = 2 * n
- nhalf = 2 * nhalf
+ n = 2 * n
+ nhalf = 2 * nhalf
- sm = np.zeros(shape=nhalf)
- det = sm.copy()
- return res
+ sm = np.zeros(shape=nhalf)
+ det = sm.copy()
+ return res
def inverse_haar_fisz_transform(data):
- a = 2.
- n = data.shape[0]
- nhalf = n // 2
- J = np.log2(n)
- res = data.copy()
- sm = np.zeros(shape=nhalf)
- det = sm.copy()
-
- for i in np.arange(0, J, 1):
- indices = np.arange(0, nhalf, 1)
-
- sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a
- det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a
- res[0:nhalf] = sm[0:nhalf]
- res[(nhalf):n] = det[0:nhalf]
- n = n // 2
- nhalf = nhalf // 2
-
- nhalf = 1
- n = 2
-
- for i in np.arange(0, J, 1):
- sm[0:nhalf] = res[0:nhalf]
- det[0:nhalf] = res[nhalf:n]
- indices = np.arange(0, nhalf, 1)
-
- res[2 * indices] = (a / 2.) * (sm[0:nhalf] + det[0:nhalf] * np.sqrt(sm[0:nhalf]))
- res[2 * indices + 1] = (a / 2.) * (sm[0:nhalf] - det[0:nhalf] * np.sqrt(sm[0:nhalf]))
- res[res < 0.] = 0.
- n = 2 * n
- nhalf = 2 * nhalf
- return res
+ a = 2.0
+ n = data.shape[0]
+ nhalf = n // 2
+ J = np.log2(n)
+ res = data.copy()
+ sm = np.zeros(shape=nhalf)
+ det = sm.copy()
+
+ for i in np.arange(0, J, 1):
+ indices = np.arange(0, nhalf, 1)
+
+ sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a
+ det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a
+ res[0:nhalf] = sm[0:nhalf]
+ res[(nhalf):n] = det[0:nhalf]
+ n = n // 2
+ nhalf = nhalf // 2
+
+ nhalf = 1
+ n = 2
+
+ for i in np.arange(0, J, 1):
+ sm[0:nhalf] = res[0:nhalf]
+ det[0:nhalf] = res[nhalf:n]
+ indices = np.arange(0, nhalf, 1)
+
+ res[2 * indices] = (a / 2.0) * (
+ sm[0:nhalf] + det[0:nhalf] * np.sqrt(sm[0:nhalf])
+ )
+ res[2 * indices + 1] = (a / 2.0) * (
+ sm[0:nhalf] - det[0:nhalf] * np.sqrt(sm[0:nhalf])
+ )
+ res[res < 0.0] = 0.0
+ n = 2 * n
+ nhalf = 2 * nhalf
+ return res
if __name__ == "__main__":
- import matplotlib.pyplot as plt
-
- s = np.random.poisson(5, 4) * 0 + 1
- s2 = np.random.poisson(20, 4) * 0 + 3
- s = np.concatenate((s, s2)).astype(float)
- plt.plot(s)
- v = haar_fisz_transform(s)
- s_inv = inverse_haar_fisz_transform(v)
- plt.plot(v)
- plt.plot(s_inv, '--')
- plt.show()
+ import matplotlib.pyplot as plt
+
+ s = np.random.poisson(5, 4) * 0 + 1
+ s2 = np.random.poisson(20, 4) * 0 + 3
+ s = np.concatenate((s, s2)).astype(float)
+ plt.plot(s)
+ v = haar_fisz_transform(s)
+ s_inv = inverse_haar_fisz_transform(v)
+ plt.plot(v)
+ plt.plot(s_inv, "--")
+ plt.show()
diff --git a/stpy/helpers/helper.py b/stpy/helpers/helper.py
index 26591ed..5371e61 100755
--- a/stpy/helpers/helper.py
+++ b/stpy/helpers/helper.py
@@ -8,531 +8,564 @@
from torch.autograd.functional import jacobian
-def isin(element, test_elements, assume_unique=False, atol = 1e-10):
- (n, d) = element.shape
- (m, d) = test_elements.shape
- maskFull = np.full((n), False, dtype=bool)
- for j in range(m):
- mask = np.full((n), True, dtype=bool)
- for i in range(d):
- # mask = np.logical_and(mask,np.in1d(element[:,i],test_elements[j,i], assume_unique=assume_unique))
- mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=atol))
- # print (j, i, mask)
- maskFull = np.logical_or(mask, maskFull)
- # print (maskFull)
- return maskFull
-
-
-
-def cartesian(arrays, out=None, dtype = None):
- """
- Generate a cartesian product of input arrays.
-
- Parameters
- ----------
- arrays : list of array-like
- 1-D arrays to form the cartesian product of.
- out : ndarray
- Array to place the cartesian product in.
-
- Returns
- -------
- out : ndarray
- 2-D array of shape (M, len(arrays)) containing cartesian products
- formed of input arrays.
-
- """
- arrays = [np.asarray(x) for x in arrays]
- if dtype is None:
- dtype = arrays[0].dtype
- n = np.prod([x.size for x in arrays])
- if out is None:
- out = np.zeros([n, len(arrays)], dtype=dtype)
-
- m = n / arrays[0].size
- m = int(m)
- out[:, 0] = np.repeat(arrays[0], m)
- if arrays[1:]:
- cartesian(arrays[1:], out=out[0:m, 1:])
- for j in range(1, arrays[0].size):
- out[j * m:(j + 1) * m, 1:] = out[0:m, 1:]
- return out
-
-
-def estimate_std(x: torch.Tensor, # x values used for uniqueness detection
- y: torch.Tensor, # y values
- truncation:Union[float,None] = None, # truncate at specific y
- verbose:bool = False, # verbosity level
- conservative:bool = False,
- return_all_residuals:bool = False # return
- ): #
-
- out, indices, counts = torch.unique(x, dim=0, return_inverse=True, return_counts=True)
- residuals_mean_list = []
-
- for i in range(counts.size()[0]):
- if counts[i] > 1:
- mask = indices == i
- mean = torch.mean(y[mask].view(-1))
- residuals_mean_list.append(y[mask].view(-1)-mean.view(-1))
- residuals_mean = torch.hstack(residuals_mean_list)
-
- if verbose:
- print ("Estimating variance from:",residuals_mean.size())
-
- if truncation is not None:
- residuals_mean_trunc = residuals_mean[torch.abs(residuals_mean) 1:
+ mask = indices == i
+ mean = torch.mean(y[mask].view(-1))
+ residuals_mean_list.append(y[mask].view(-1) - mean.view(-1))
+ residuals_mean = torch.hstack(residuals_mean_list)
+
+ if verbose:
+ print("Estimating variance from:", residuals_mean.size())
+
+ if truncation is not None:
+ residuals_mean_trunc = residuals_mean[torch.abs(residuals_mean) < truncation]
+ sigma_std = torch.std(residuals_mean_trunc)
+ else:
+ sigma_std = torch.std(residuals_mean)
+
+ if return_all_residuals:
+ return residuals_mean_list, out, counts, residuals_mean, indices
+ else:
+ return sigma_std
def direct_sum(arrays):
- dim = np.sum([array.shape[1] for array in arrays])
- size = np.sum([array.shape[0] for array in arrays])
+ dim = np.sum([array.shape[1] for array in arrays])
+ size = np.sum([array.shape[0] for array in arrays])
- out = np.zeros(shape=(size, dim))
- dim = 0
- n = 0
- for j in range(len(arrays)):
- new_n, new_dim = arrays[j].shape
- out[n:n + new_n, dim:dim + new_dim] = arrays[j]
- dim = dim + new_dim
- n = n + new_n
+ out = np.zeros(shape=(size, dim))
+ dim = 0
+ n = 0
+ for j in range(len(arrays)):
+ new_n, new_dim = arrays[j].shape
+ out[n : n + new_n, dim : dim + new_dim] = arrays[j]
+ dim = dim + new_dim
+ n = n + new_n
- return out
+ return out
def symsqrt(matrix):
- """Compute the square root of a positive definite matrix."""
- # perform the decomposition
- # s, v = matrix.symeig(eigenvectors=True)
- _, s, v = matrix.svd() # passes torch.autograd.gradcheck()
- # truncate small components
- above_cutoff = s > s.max() * s.size(-1) * torch.finfo(s.dtype).eps
- s = s[..., above_cutoff]
- v = v[..., above_cutoff]
- # compose the square root matrix
- return (v * s.sqrt().unsqueeze(-2)) @ v.transpose(-2, -1)
+ """Compute the square root of a positive definite matrix."""
+ # perform the decomposition
+ # s, v = matrix.symeig(eigenvectors=True)
+ _, s, v = matrix.svd() # passes torch.autograd.gradcheck()
+ # truncate small components
+ above_cutoff = s > s.max() * s.size(-1) * torch.finfo(s.dtype).eps
+ s = s[..., above_cutoff]
+ v = v[..., above_cutoff]
+ # compose the square root matrix
+ return (v * s.sqrt().unsqueeze(-2)) @ v.transpose(-2, -1)
def interval(n, d, L_infinity_ball=1, offset=None):
- if offset is None:
- arrays = [np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
- else:
- arrays = [np.linspace(offset[i][0], offset[i][1], n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
- return xtest
+ if offset is None:
+ arrays = [
+ np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1)
+ for i in range(d)
+ ]
+ xtest = cartesian(arrays)
+ else:
+ arrays = [
+ np.linspace(offset[i][0], offset[i][1], n).reshape(n, 1) for i in range(d)
+ ]
+ xtest = cartesian(arrays)
+ return xtest
def interval_torch(n, d, L_infinity_ball=1, offset=None):
- return torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball, offset=offset))
+ return torch.from_numpy(
+ interval(n, d, L_infinity_ball=L_infinity_ball, offset=offset)
+ )
def get_ecdf(x):
- x = np.sort(x)
+ x = np.sort(x)
- def result(v):
- return np.searchsorted(x, v, side='right') / x.size
+ def result(v):
+ return np.searchsorted(x, v, side="right") / x.size
- return result
+ return result
def emprical_cdf(data):
- """
- #>>> import numpy as np
- #>>> emprical_cdf(np.array([1.,2.,3.,1.,2.]))
- #[1.,2.,3.],[0.4,0.4,0.2]
- """
-
- # create a sorted series of unique data
- cdfx = np.sort(np.unique(data))
- # x-data for the ECDF: evenly spaced sequence of the uniques
- x_values = np.linspace(start=min(cdfx),
- stop=max(cdfx), num=len(cdfx))
-
- # size of the x_values
- size_data = data.shape[0]
- # y-data for the ECDF:
- y_values = []
- for i in x_values:
- # all the values in raw data less than the ith value in x_values
- temp = data[data <= i]
- # fraction of that value with respect to the size of the x_values
- value = float(temp.shape[0]) / float(size_data)
- # pushing the value in the y_values
- y_values.append(value)
- # return both x and y values
- return x_values, np.array(y_values)
+ """
+ #>>> import numpy as np
+ #>>> emprical_cdf(np.array([1.,2.,3.,1.,2.]))
+ #[1.,2.,3.],[0.4,0.4,0.2]
+ """
+
+ # create a sorted series of unique data
+ cdfx = np.sort(np.unique(data))
+ # x-data for the ECDF: evenly spaced sequence of the uniques
+ x_values = np.linspace(start=min(cdfx), stop=max(cdfx), num=len(cdfx))
+
+ # size of the x_values
+ size_data = data.shape[0]
+ # y-data for the ECDF:
+ y_values = []
+ for i in x_values:
+ # all the values in raw data less than the ith value in x_values
+ temp = data[data <= i]
+ # fraction of that value with respect to the size of the x_values
+ value = float(temp.shape[0]) / float(size_data)
+ # pushing the value in the y_values
+ y_values.append(value)
+ # return both x and y values
+ return x_values, np.array(y_values)
def batch_jacobian(f, x, create_graph=False, vectorize=False):
- f_sum = lambda x: torch.sum(f(x), axis=0)
- return jacobian(f_sum, x, create_graph=create_graph, vectorize=vectorize)
+ f_sum = lambda x: torch.sum(f(x), axis=0)
+ return jacobian(f_sum, x, create_graph=create_graph, vectorize=vectorize)
def batch_hessian(f, x, create_graph=False, vectorize=False, vv=False):
- J = lambda x: batch_jacobian(f, x, create_graph=True, vectorize=vectorize).transpose(0, 1)
- H = batch_jacobian(J, x, create_graph=create_graph, vectorize=vv)
- return H
+ J = lambda x: batch_jacobian(
+ f, x, create_graph=True, vectorize=vectorize
+ ).transpose(0, 1)
+ H = batch_jacobian(J, x, create_graph=create_graph, vectorize=vv)
+ return H
def create_pull_back(low, high, inverse=False, to=[-1, 1]):
- translate = lambda x: x * (to[0] - to[1]) / (low - high) + to[1] - ((to[0] - to[1]) * high) / (low - high)
- if inverse:
- translate_back = lambda x: x * (low - high) / (to[0] - to[1]) + high - to[1] * (low - high) / (to[0] - to[1])
- return translate, translate_back
- else:
- return translate
+ translate = (
+ lambda x: x * (to[0] - to[1]) / (low - high)
+ + to[1]
+ - ((to[0] - to[1]) * high) / (low - high)
+ )
+ if inverse:
+ translate_back = (
+ lambda x: x * (low - high) / (to[0] - to[1])
+ + high
+ - to[1] * (low - high) / (to[0] - to[1])
+ )
+ return translate, translate_back
+ else:
+ return translate
def hierarchical_distance(group1, group2):
- group3 = copy.deepcopy(group2)
- group4 = copy.deepcopy(group1)
- for elem in group1:
- try:
- group3.remove(elem)
- group4.remove(elem)
- except:
- pass
- if len(group3) == 1 and len(group3[0]) == 1 and len(group4) == 0:
- return 1
-
- isin = lambda set, set2: []
- for a, b in list(itertools.product(group1, group1)):
- new_group = copy.deepcopy(group1)
- if a != b:
- new_group.remove(b)
- new_group.remove(a)
- new_group.append(a + b)
- if len(new_group) == len(group2) and all(i in new_group for i in group2):
- return 1
- return 2
+ group3 = copy.deepcopy(group2)
+ group4 = copy.deepcopy(group1)
+ for elem in group1:
+ try:
+ group3.remove(elem)
+ group4.remove(elem)
+ except:
+ pass
+ if len(group3) == 1 and len(group3[0]) == 1 and len(group4) == 0:
+ return 1
+
+ isin = lambda set, set2: []
+ for a, b in list(itertools.product(group1, group1)):
+ new_group = copy.deepcopy(group1)
+ if a != b:
+ new_group.remove(b)
+ new_group.remove(a)
+ new_group.append(a + b)
+ if len(new_group) == len(group2) and all(i in new_group for i in group2):
+ return 1
+ return 2
def valid_enlargement(curr, groups):
- out = []
- for index, group in enumerate(groups):
- if hierarchical_distance(curr, group) == 1:
- out.append(index)
- return out
+ out = []
+ for index, group in enumerate(groups):
+ if hierarchical_distance(curr, group) == 1:
+ out.append(index)
+ return out
def interval_groups(n, d, groups, L_infinity_ball=1):
- arrays = [interval(n, len(groups[i]), L_infinity_ball=L_infinity_ball) for i in range(len(groups))]
- xtest = direct_sum(arrays)
- out = np.zeros(shape=(xtest.shape[0], d))
- out[:, 0:xtest.shape[1]] = xtest
- return out
+ arrays = [
+ interval(n, len(groups[i]), L_infinity_ball=L_infinity_ball)
+ for i in range(len(groups))
+ ]
+ xtest = direct_sum(arrays)
+ out = np.zeros(shape=(xtest.shape[0], d))
+ out[:, 0 : xtest.shape[1]] = xtest
+ return out
def logsumexp(a, axis=None, b=None):
- a = np.asarray(a)
- if axis is None:
- a = a.ravel()
- else:
- a = np.rollaxis(a, axis)
- a_max = a.max(axis=0)
- if b is not None:
- b = np.asarray(b)
- if axis is None:
- b = b.ravel()
- else:
- b = np.rollaxis(b, axis)
- out = np.log(np.sum(b * np.exp(a - a_max), axis=0))
- else:
- out = np.log(np.sum(np.exp(a - a_max), axis=0))
- out += a_max
- return out
+ a = np.asarray(a)
+ if axis is None:
+ a = a.ravel()
+ else:
+ a = np.rollaxis(a, axis)
+ a_max = a.max(axis=0)
+ if b is not None:
+ b = np.asarray(b)
+ if axis is None:
+ b = b.ravel()
+ else:
+ b = np.rollaxis(b, axis)
+ out = np.log(np.sum(b * np.exp(a - a_max), axis=0))
+ else:
+ out = np.log(np.sum(np.exp(a - a_max), axis=0))
+ out += a_max
+ return out
class MyBounds(object):
- def __init__(self, xmax=[1.1, 1.1], xmin=[-1.1, -1.1]):
- self.xmax = np.array(xmax)
- self.xmin = np.array(xmin)
+ def __init__(self, xmax=[1.1, 1.1], xmin=[-1.1, -1.1]):
+ self.xmax = np.array(xmax)
+ self.xmin = np.array(xmin)
- def __call__(self, **kwargs):
- x = kwargs["x_new"]
- tmax = bool(np.all(x <= self.xmax))
- tmin = bool(np.all(x >= self.xmin))
- return tmax and tmin
+ def __call__(self, **kwargs):
+ x = kwargs["x_new"]
+ tmax = bool(np.all(x <= self.xmax))
+ tmin = bool(np.all(x >= self.xmin))
+ return tmax and tmin
def full_group(d):
- g = []
- for i in range(d):
- g.append([i])
- return g
+ g = []
+ for i in range(d):
+ g.append([i])
+ return g
def pair_groups(d):
- g = []
- for i in range(d - 1):
- g.append([i, i + 1])
- return g
+ g = []
+ for i in range(d - 1):
+ g.append([i, i + 1])
+ return g
def conditional_decorator(dec, condition):
- def decorator(func):
- if not condition:
- # Return the function unchanged, not decorated.
- return func
- return dec(func)
+ def decorator(func):
+ if not condition:
+ # Return the function unchanged, not decorated.
+ return func
+ return dec(func)
+
+ return decorator
- return decorator
def generate_all_pairs(d):
- groups = []
- for elem in range(d):
- for elem2 in range(d):
- groups.append([elem, elem2])
- return groups
+ groups = []
+ for elem in range(d):
+ for elem2 in range(d):
+ groups.append([elem, elem2])
+ return groups
def generate_groups(d, elements=None):
- """
- returns a list of all possible groups combinations of d elements
- :param d: integer
- :return:
- >>> generate_groups(1)
- [[0]]
- >>> generate_groups(2)
- [[[0], [1]], [[1], [0]], [[0, 1]]]
- """
- if elements is None:
- elements = list(range(d))
- g = []
- if len(elements) == 1:
- return [elements]
-
- for r in range(1, d + 1, 1):
- gn = [list(a) for a in list(itertools.combinations(elements, r))]
- for i in gn:
- elements2 = list(set(elements) - set(i))
- g.append([i] + generate_groups(d, elements=elements2))
- return g
+ """
+ returns a list of all possible groups combinations of d elements
+ :param d: integer
+ :return:
+ >>> generate_groups(1)
+ [[0]]
+ >>> generate_groups(2)
+ [[[0], [1]], [[1], [0]], [[0, 1]]]
+ """
+ if elements is None:
+ elements = list(range(d))
+ g = []
+ if len(elements) == 1:
+ return [elements]
+
+ for r in range(1, d + 1, 1):
+ gn = [list(a) for a in list(itertools.combinations(elements, r))]
+ for i in gn:
+ elements2 = list(set(elements) - set(i))
+ g.append([i] + generate_groups(d, elements=elements2))
+ return g
class results:
- def __init__(self):
- self.x = 0
+ def __init__(self):
+ self.x = 0
def proj(x, bounds):
- y = np.zeros(shape=x.shape)
- for ind, elem in enumerate(x):
- if elem > bounds[ind][1]:
- y[ind] = bounds[ind][1]
+ y = np.zeros(shape=x.shape)
+ for ind, elem in enumerate(x):
+ if elem > bounds[ind][1]:
+ y[ind] = bounds[ind][1]
- elif elem < bounds[ind][0]:
- y[ind] = bounds[ind][0]
+ elif elem < bounds[ind][0]:
+ y[ind] = bounds[ind][0]
- else:
- y[ind] = elem
- return y
+ else:
+ y[ind] = elem
+ return y
def lambda_coordinate(fun, x0, index, x):
- x0[index] = x
- r = fun(x0)
- return r
-
-
-def projected_gradient_descent(fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001):
- i = 0
- x_old = x + np.random.randn(x.shape[0])
- while (i < maxit and np.linalg.norm(x - x_old) > tol):
- x_old = x
- x = x - (100 * nu) * grad(x)
- x = proj(x, bounds)
-
- if verbose == True:
- print("Iteration: ", i, " ", fun(x))
- i += 1
- res = results()
- res.x = x
- return res
-
-
-def projected_gradient_descent(fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001):
- i = 0
- x_old = x + np.random.randn(x.shape[0])
- while (i < maxit and np.linalg.norm(x - x_old) > tol):
- x_old = x
- x = x - (100 * nu) * grad(x)
- x = proj(x, bounds)
-
- if verbose == True:
- print("Iteration: ", i, " ", fun(x))
- i += 1
- res = results()
- res.x = x
- return res
+ x0[index] = x
+ r = fun(x0)
+ return r
+
+
+def projected_gradient_descent(
+ fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001
+):
+ i = 0
+ x_old = x + np.random.randn(x.shape[0])
+ while i < maxit and np.linalg.norm(x - x_old) > tol:
+ x_old = x
+ x = x - (100 * nu) * grad(x)
+ x = proj(x, bounds)
+
+ if verbose == True:
+ print("Iteration: ", i, " ", fun(x))
+ i += 1
+ res = results()
+ res.x = x
+ return res
+
+
+def projected_gradient_descent(
+ fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001
+):
+ i = 0
+ x_old = x + np.random.randn(x.shape[0])
+ while i < maxit and np.linalg.norm(x - x_old) > tol:
+ x_old = x
+ x = x - (100 * nu) * grad(x)
+ x = proj(x, bounds)
+
+ if verbose == True:
+ print("Iteration: ", i, " ", fun(x))
+ i += 1
+ res = results()
+ res.x = x
+ return res
def complex_step_derivative(fun, h, x):
- d = x.shape[1]
- der = np.zeros(shape=(1, d))
- for i in range(d):
- one = np.zeros(shape=(1, d))
- one[0, i] = 1.0
- der[0, i] = np.imag((fun(x + 1j * h * one) - fun(x))) / h
- return der
+ d = x.shape[1]
+ der = np.zeros(shape=(1, d))
+ for i in range(d):
+ one = np.zeros(shape=(1, d))
+ one[0, i] = 1.0
+ der[0, i] = np.imag((fun(x + 1j * h * one) - fun(x))) / h
+ return der
def finite_differences(fun, h, x):
- d = x.size()[1]
- der = torch.zeros(size=(1, d), dtype=torch.float64)
- for i in range(d):
- one = torch.zeros(size=(1, d), dtype=torch.float64)
- one[0, i] = 1.0
- der[0, i] = (fun(x + one * h) - fun(x)) / h
- return der
+ d = x.size()[1]
+ der = torch.zeros(size=(1, d), dtype=torch.float64)
+ for i in range(d):
+ one = torch.zeros(size=(1, d), dtype=torch.float64)
+ one[0, i] = 1.0
+ der[0, i] = (fun(x + one * h) - fun(x)) / h
+ return der
def finite_differences_hessian(fun, h, x):
- d = x.size()[1]
- hess = torch.zeros(size=(d, d), dtype=torch.float64)
- for i in range(d):
- for j in range(d):
- one_i = torch.zeros(size=(1, d), dtype=torch.float64)
- one_j = torch.zeros(size=(1, d), dtype=torch.float64)
- one_i[0, i] = 1.0
- one_j[0, j] = 1.0
- hess[i, j] = np.log(
- np.abs(fun(x + h * one_i + h * one_j) - fun(x + h * one_i) - fun(x + h * one_j) + fun(x))) - 2 * np.log(
- h)
-
- hess = torch.exp(hess)
- return (hess + torch.t(hess)) / 2.
+ d = x.size()[1]
+ hess = torch.zeros(size=(d, d), dtype=torch.float64)
+ for i in range(d):
+ for j in range(d):
+ one_i = torch.zeros(size=(1, d), dtype=torch.float64)
+ one_j = torch.zeros(size=(1, d), dtype=torch.float64)
+ one_i[0, i] = 1.0
+ one_j[0, j] = 1.0
+ hess[i, j] = np.log(
+ np.abs(
+ fun(x + h * one_i + h * one_j)
+ - fun(x + h * one_i)
+ - fun(x + h * one_j)
+ + fun(x)
+ )
+ ) - 2 * np.log(h)
+
+ hess = torch.exp(hess)
+ return (hess + torch.t(hess)) / 2.0
def finite_differences_np(fun, h, x):
- d = x.shape[0]
- der = np.zeros(shape=(d))
- for i in range(d):
- one = np.zeros(shape=(d))
- one[i] = 1.0
- der[i] = (fun(x + one * h) - fun(x)) / h
- return der
+ d = x.shape[0]
+ der = np.zeros(shape=(d))
+ for i in range(d):
+ one = np.zeros(shape=(d))
+ one[i] = 1.0
+ der[i] = (fun(x + one * h) - fun(x)) / h
+ return der
-def finite_differences_test(fun, fun_der, x, h_max=1.):
- n = 10
- for i in range(n):
- h = 2 ** (-i) * h_max
- approx_nabla = finite_differences_np(fun, h, x)
- print(i, h, np.linalg.norm(approx_nabla - fun_der(x)))
+def finite_differences_test(fun, fun_der, x, h_max=1.0):
+ n = 10
+ for i in range(n):
+ h = 2 ** (-i) * h_max
+ approx_nabla = finite_differences_np(fun, h, x)
+ print(i, h, np.linalg.norm(approx_nabla - fun_der(x)))
def sample_custom(inverse_cumulative_distribution, size=(1, 1)):
- U = np.random.uniform(0, 1, size=size)
- F = np.vectorize(inverse_cumulative_distribution)
- Z = F(U)
- return Z
+ U = np.random.uniform(0, 1, size=size)
+ F = np.vectorize(inverse_cumulative_distribution)
+ Z = F(U)
+ return Z
def select_subset(M, S):
- d = M.shape[0]
- I = np.zeros(shape=(d, d))
- I[S, S] = 1.
- return I @ M @ I
+ d = M.shape[0]
+ I = np.zeros(shape=(d, d))
+ I[S, S] = 1.0
+ return I @ M @ I
def select_subset_inv(M, S):
- M = select_subset(M, S)
- return np.linalg.pinv(M)
+ M = select_subset(M, S)
+ return np.linalg.pinv(M)
def complement_set(S, size):
- V = set(np.arange(0, size, 1))
- s = V - set(S)
- S_C = list(s)
- return S_C
+ V = set(np.arange(0, size, 1))
+ s = V - set(S)
+ S_C = list(s)
+ return S_C
def add_element(elements, new_element):
- new_out = []
- for element in elements:
- new_out.append(element + [[new_element]])
- new_out.append(element)
- for j in element:
- new = copy.deepcopy(element)
- new.remove(j)
- new.append(j + [new_element])
- new_out.append(new)
+ new_out = []
+ for element in elements:
+ new_out.append(element + [[new_element]])
+ new_out.append(element)
+ for j in element:
+ new = copy.deepcopy(element)
+ new.remove(j)
+ new.append(j + [new_element])
+ new_out.append(new)
- return new_out
+ return new_out
def get_hierarchy(start=1, new_elements=[2, 3, 4]):
- elements = [[[start]]]
- for new_element in new_elements:
- elements = add_element(elements, new_element)
- l = []
- for element in elements:
- l.append(np.sum([3 ** len(e) for e in element]))
- indices = np.argsort(l)
- out = []
- for index in indices:
- out.append(elements[index])
- return out
+ elements = [[[start]]]
+ for new_element in new_elements:
+ elements = add_element(elements, new_element)
+ l = []
+ for element in elements:
+ l.append(np.sum([3 ** len(e) for e in element]))
+ indices = np.argsort(l)
+ out = []
+ for index in indices:
+ out.append(elements[index])
+ return out
def likelihood_bernoulli_test(alpha, delta, failure):
- if alpha == 1.:
- alpha = 0.99999
+ if alpha == 1.0:
+ alpha = 0.99999
- p = (1 - (np.log(alpha / delta)) / np.log((1 - alpha) / (1 - delta))) ** (-1)
+ p = (1 - (np.log(alpha / delta)) / np.log((1 - alpha) / (1 - delta))) ** (-1)
- dkl = p * np.log(p / delta) + (1 - p) * np.log((1 - p) / (1 - delta))
- n = np.log(2 / failure) / dkl
- k = n * p
- return n, k
+ dkl = p * np.log(p / delta) + (1 - p) * np.log((1 - p) / (1 - delta))
+ n = np.log(2 / failure) / dkl
+ k = n * p
+ return n, k
def median_of_means(list, delta=0.01):
- r = list.shape[0]
- if r > 3:
- k = r
- N = int(np.floor(r / k))
- means = []
- for j in range(k - 1):
- means.append((1. / N) * np.sum(list[(j * N):(j + 1) * N]))
- return np.median(means)
- else:
- return 0.
-
-
-def get_indices(xtest,x):
- """
- Find location of vectors in a larger set
- :param xtest: torch.Tensor, tensor to be located
- :param x: torch.Tensor, to be located in xtest
- :return: list, if None its means it was not found in the original tensor
- """
-
- indices = []
- for i in range(x.size()[0]):
- xtrial = x[i,:]
- mask = torch.all(xtest == xtrial, dim=1)
- if torch.sum(mask) > 0:
- index = int(torch.argmax(mask.int()))
- indices.append(index)
- else:
- indices.append(None)
-
- return indices
+ r = list.shape[0]
+ if r > 3:
+ k = r
+ N = int(np.floor(r / k))
+ means = []
+ for j in range(k - 1):
+ means.append((1.0 / N) * np.sum(list[(j * N) : (j + 1) * N]))
+ return np.median(means)
+ else:
+ return 0.0
+
+
+def get_indices(xtest, x):
+ """
+ Find location of vectors in a larger set
+ :param xtest: torch.Tensor, tensor to be located
+ :param x: torch.Tensor, to be located in xtest
+ :return: list, if None its means it was not found in the original tensor
+ """
+
+ indices = []
+ for i in range(x.size()[0]):
+ xtrial = x[i, :]
+ mask = torch.all(xtest == xtrial, dim=1)
+ if torch.sum(mask) > 0:
+ index = int(torch.argmax(mask.int()))
+ indices.append(index)
+ else:
+ indices.append(None)
+
+ return indices
+
if __name__ == "__main__":
- x = torch.arange(0,9,1).reshape(3,3)
- xtrial = torch.Tensor([[0,1,2],[6,7,8],[3,4,5]])
- print (x)
- print (get_indices(x,xtrial))
+ x = torch.arange(0, 9, 1).reshape(3, 3)
+ xtrial = torch.tensor([[0, 1, 2], [6, 7, 8], [3, 4, 5]])
+ print(x)
+ print(get_indices(x, xtrial))
diff --git a/stpy/helpers/parallel_interpolation.py b/stpy/helpers/parallel_interpolation.py
new file mode 100644
index 0000000..c070333
--- /dev/null
+++ b/stpy/helpers/parallel_interpolation.py
@@ -0,0 +1,370 @@
+from torch.multiprocessing import Pool
+from os import cpu_count
+import torch
+import numpy as np
+from scipy.spatial import Delaunay, cKDTree
+
+shared_triangulation: Delaunay | None
+xtree: cKDTree
+
+
+def _initialize(tri: Delaunay, tree: cKDTree):
+ global shared_triangulation
+ global xtree
+ shared_triangulation = tri
+ xtree = tree
+
+
+def _find_exact_or_simplex_batch(batch: np.ndarray, tri_local=None, xtree_local=None):
+ if tri_local is None:
+ tri_local = shared_triangulation
+ if xtree_local is None:
+ xtree_local = xtree
+ distances, idx = xtree_local.query(batch, k=1, distance_upper_bound=1e-7)
+ exact_match_mask = distances <= 1e-7
+ batch_remaining = batch[~exact_match_mask]
+
+ simplices = tri_local.find_simplex(batch_remaining)
+ outside_conv_hull_mask = simplices < 0
+
+ simplices_remaining = simplices[~outside_conv_hull_mask]
+ exact_match_mask[~exact_match_mask] = outside_conv_hull_mask
+
+ idx = idx[exact_match_mask]
+ no_match_mask = idx == len(xtree_local.data)
+ if no_match_mask.any():
+ _, idx_no_match = xtree_local.query(batch[exact_match_mask][no_match_mask], k=1)
+ idx[no_match_mask] = idx_no_match
+
+ return idx, simplices_remaining, exact_match_mask
+
+
+class InterpolatorArray:
+
+ def __init__(self, x: torch.Tensor, phi: torch.Tensor, m: int, num_cpu_cores=None):
+ # Ensure x is on CPU for Delaunay
+ x_cpu = x.cpu().numpy()
+
+ # Build the Delaunay triangulation on CPU
+ tri = Delaunay(x_cpu)
+ xtree = cKDTree(x_cpu)
+ if num_cpu_cores is None:
+ num_cpu_cores = cpu_count()
+ self.num_cpu_cores = num_cpu_cores
+ if self.num_cpu_cores >= 1:
+ pool = Pool(self.num_cpu_cores, _initialize, [tri, xtree])
+ else:
+ pool = None
+ self.interpolators = {
+ 0: [
+ InterpolatorND(x, phi[:, j], tri, xtree, pool, num_cpu_cores)
+ for j in range(m)
+ ]
+ }
+ self.pools = {0: pool}
+
+ def __call__(self, j: int, q: torch.Tensor):
+ all_interpolators = [ip for list in self.interpolators.values() for ip in list]
+ return all_interpolators[j](q).view(-1, 1)
+
+ def set(self, i: int, x: torch.Tensor, phi: torch.Tensor, m: int):
+ x_cpu = x.cpu().numpy()
+ tri = Delaunay(x_cpu)
+ xtree = cKDTree(x_cpu)
+ if self.num_cpu_cores >= 1:
+ pool = Pool(self.num_cpu_cores, _initialize, [tri, xtree])
+ else:
+ pool = None
+ self.interpolators[i] = [
+ InterpolatorND(x, phi[:, j], tri, xtree, pool, self.num_cpu_cores)
+ for j in range(m)
+ ]
+ if i in self.pools and self.pools[i] is not None:
+ self.pools[i].close()
+ self.pools[i].join()
+ self.pools[i] = pool
+
+ def __del__(self):
+ for pool in self.pools.values():
+ if pool is not None:
+ pool.close()
+ pool.join()
+
+
+class InterpolatorND:
+ """
+ Piecewise linear interpolator for N-dimensional data using Delaunay triangulation.
+ """
+
+ def __init__(
+ self,
+ x: torch.Tensor,
+ y: torch.Tensor,
+ tri=None,
+ xtree=None,
+ pool=None,
+ num_cpu_cores=None,
+ ):
+ """
+ Args:
+ x: (N, D) tensor of input points in D-dimensional space.
+ y: (N,) tensor of function values at those points.
+ tri: Precomputed Delaunay triangulation.
+ xtree: Precomputed cKDTree for nearest neighbor search.
+ pool: Optional multiprocessing pool.
+ num_cpu_cores: Number of CPU cores to use for parallel processing.
+ """
+ assert y.dtype == torch.float64
+
+ if tri is not None:
+ # Use the provided Delaunay triangulation and cKDTree
+ self.tri = tri
+ self.xtree = xtree
+ self.pool = pool
+ self.own_pool = False
+ self.num_cpu_cores = num_cpu_cores
+ else:
+ # Ensure x is on CPU for Delaunay
+ x_cpu = x.detach().cpu().numpy()
+
+ # Build the Delaunay triangulation on CPU
+ self.tri = Delaunay(x_cpu)
+ self.xtree = cKDTree(x_cpu)
+
+ if num_cpu_cores is None:
+ num_cpu_cores = cpu_count()
+ self.num_cpu_cores = num_cpu_cores
+ if self.num_cpu_cores >= 1:
+ self.pool = Pool(num_cpu_cores, _initialize, [self.tri, self.xtree])
+ self.own_pool = True
+ else:
+ self.pool = None
+ self.own_pool = False
+ self.own_pool = True
+
+ self.x = x
+ self.y = y
+
+ # Convert triangle simplices to a torch tensor
+ simplices = torch.tensor(self.tri.simplices, device=y.device)
+ self.simplices = simplices # Shape: (M, D+1), M = # of simplices
+
+ # Gather simplex vertex positions and function values
+ self.tri_pts = x[simplices] # Shape: (M, D+1, D)
+ self.tri_y = y[simplices] # Shape: (M, D+1)
+
+ # Precompute matrices for barycentric transformation
+ v0 = self.tri_pts[:, 0, :] # First vertex of each simplex
+ T = self.tri_pts[:, 1:, :] - v0[:, None, :] # (M, D, D)
+ T = T.transpose(-1, -2)
+ self.T_inv = torch.inverse(T) # (M, D, D)
+ self.v0 = v0 # Store v0 for barycentric computation
+
+ def __del__(self):
+ if self.own_pool and self.pool is not None:
+ self.pool.close()
+ self.pool.join()
+
+ def __call__(self, xp: torch.Tensor) -> torch.Tensor:
+ """
+ Interpolate y-values at query points xp. Does not keep order of points the same!
+
+ Args:
+ xp: (B, D) tensor of query points in D-dimensional space.
+
+ Returns:
+ out: (B,) tensor of interpolated values.
+ """
+
+ xp_cpu = xp.cpu().numpy() # (B, D)
+
+ # 1) Use Delaunay.find_simplex on CPU to find simplices
+ # simplex_idx = self.tri.find_simplex(xp_cpu) # (B,)
+
+ # Split xp_cpu into batches for parallel processing
+ if self.pool is not None:
+ batches = np.array_split(xp_cpu, self.num_cpu_cores)
+ # Use multiprocessing to parallelize find_simplex
+ results = self.pool.map_async(
+ _find_exact_or_simplex_batch, [batch for batch in batches]
+ ).get(timeout=10)
+ else:
+ # Run find_simplex sequentially
+ results = [_find_exact_or_simplex_batch(xp_cpu, self.tri, self.xtree)]
+
+ # Concatenate the results back into a single array
+ # results = [(out_exact_matches0, xp0, simplices0), (out_exact_matches1, xp1, simplices1), ...]
+ exact_matches_idx_list = []
+ exact_match_mask_list = []
+ simplices_list = []
+ for exact_matches_idx, simplices, exact_match_mask in results:
+ exact_matches_idx_list.append(exact_matches_idx)
+ exact_match_mask_list.append(exact_match_mask)
+ simplices_list.append(simplices)
+
+ exact_matches_idx = np.concatenate(exact_matches_idx_list)
+ exact_matches_idx = torch.tensor(exact_matches_idx, device=self.y.device)
+ exact_matches_y = self.y[exact_matches_idx]
+ if len(exact_matches_y) == len(xp):
+ return exact_matches_y
+
+ exact_match_mask = np.concatenate(exact_match_mask_list)
+ exact_match_mask = torch.tensor(exact_match_mask, device=self.y.device)
+
+ simplices_remaining = np.concatenate(simplices_list)
+ simplices_remaining = torch.tensor(
+ simplices_remaining, device=self.y.device
+ ) # (B,)
+
+ xp_remaining = xp[~exact_match_mask]
+
+ # p - v0: (Bv, D)
+ p_v0 = xp_remaining - self.v0[simplices_remaining]
+
+ # alpha = T_inv @ (p - v0): (Bv, D)
+ T_inv_local = self.T_inv[simplices_remaining] # (Bv, D, D)
+ # Batched Matrix multiplication, but T_inv_local is transposed
+ bary_coords = torch.einsum("bij, bj -> bi", T_inv_local, p_v0) # (Bv, D)
+
+ # Compute last barycentric coordinate
+ bary_coords = torch.cat(
+ [1 - bary_coords.sum(dim=-1, keepdim=True), bary_coords], dim=-1
+ ) # (Bv, D+1)
+
+ # 5) Interpolate y-values using barycentric coordinates
+ tri_y_local = self.tri_y[simplices_remaining] # (Bv, D+1)
+ out_interpolated = (bary_coords * tri_y_local).sum(dim=-1) # (Bv,)
+
+ # 6) Store results for valid points
+ result = torch.empty(len(xp), dtype=self.y.dtype, device=self.y.device)
+ result[exact_match_mask] = exact_matches_y
+ result[~exact_match_mask] = out_interpolated
+ return result
+
+
+def plot_simple_function():
+
+ # Define a simple 2D function
+ def simple_function(x, y):
+ return np.sin(np.pi * x) * np.cos(np.pi * y)
+
+ # Generate a grid of points for the original function
+ n_points = 21 # Number of points along each axis
+ x_vals = np.linspace(0, 1, n_points)
+ y_vals = np.linspace(0, 1, n_points)
+ x_grid, y_grid = np.meshgrid(x_vals, y_vals)
+ z_grid = simple_function(x_grid, y_grid) # Compute function values
+
+ # Flatten the grid for input to the interpolator
+ x_flat = x_grid.flatten()
+ y_flat = y_grid.flatten()
+ z_flat = z_grid.flatten()
+
+ # Convert to PyTorch tensors
+ x_train = torch.tensor(
+ np.column_stack((x_flat, y_flat)), dtype=torch.float64, device="cuda"
+ )
+
+ y_train = torch.tensor(z_flat, dtype=torch.float64, device="cuda")
+
+ # Create the interpolator
+ interpolator = InterpolatorND(x_train, y_train, num_cpu_cores=0)
+
+ # Generate slightly offset query points
+ n_query = 21
+ x_query_vals = np.linspace(0.010, 1.01, n_query)
+ y_query_vals = np.linspace(0.010, 1.01, n_query)
+ # x_query_vals = np.array([0.31])
+ # y_query_vals = np.array([0.01])
+ x_query_grid, y_query_grid = np.meshgrid(x_query_vals, y_query_vals)
+ x_query_grid = np.concat(
+ [np.linspace(0.0, 1.0, n_query).reshape(1, -1), x_query_grid]
+ )
+ y_query_grid = np.concat([np.zeros([1, n_query]), y_query_grid])
+ x_query_flat = x_query_grid.flatten()
+ y_query_flat = y_query_grid.flatten()
+
+ # Convert query points to PyTorch tensors
+ x_query = torch.tensor(
+ np.column_stack((x_query_flat, y_query_flat)),
+ dtype=torch.float64,
+ device="cuda",
+ )
+
+ # Perform interpolation
+ z_query = interpolator(x_query).cpu().numpy() # Interpolated values
+
+ # Plot the original function as a scatter plot
+ plt.figure(figsize=(30, 10))
+ plt.subplot(1, 2, 1)
+ plt.scatter(x_flat, y_flat, c=z_flat, cmap="viridis", s=40)
+ plt.title("Original Function")
+ plt.colorbar()
+ plt.subplot(1, 2, 2)
+ plt.scatter(x_query_flat, y_query_flat, c=z_query, cmap="viridis", s=200)
+ plt.scatter(
+ x_flat,
+ y_flat,
+ c=z_flat,
+ cmap="viridis",
+ s=200,
+ )
+ plt.title("Interpolated Function with Original Points")
+ plt.colorbar()
+ # Show plots
+ plt.tight_layout()
+ plt.show()
+ print("hi")
+
+
+def interploate_between():
+
+ # Define 5 points in a 1x1 field
+ x_points = np.array([[0.1, 0.1], [1, 0], [0, 1], [1, 1], [0.3, 0.3], [0.7, 0.7]])
+ # Add random noise to x_points
+ noise = np.random.normal(scale=0.01, size=x_points.shape)
+ # x_points += noise
+ y_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
+
+ # Convert to PyTorch tensors
+ x_train = torch.tensor(x_points, dtype=torch.float64, device="cuda")
+ y_train = torch.tensor(y_values, dtype=torch.float64, device="cuda")
+
+ # Create the interpolator
+ interpolator = InterpolatorND(x_train, y_train, num_cpu_cores=1)
+
+ # Generate a grid of query points
+ n_query = 10 # Number of query points along each axis
+ x_query_vals = np.linspace(0, 1, n_query)
+ y_query_vals = np.linspace(0, 1, n_query)
+ x_query_grid, y_query_grid = np.meshgrid(x_query_vals, y_query_vals)
+ x_query_flat = x_query_grid.flatten()
+ y_query_flat = y_query_grid.flatten()
+
+ # Convert query points to PyTorch tensors
+ x_query = torch.tensor(
+ np.column_stack((x_query_flat, y_query_flat)),
+ dtype=torch.float64,
+ device="cuda",
+ )
+
+ # Perform interpolation
+ z_query = interpolator(x_query).cpu().numpy() # Interpolated values
+
+ # Plot the interpolated values
+ plt.figure(figsize=(10, 10))
+ plt.scatter(x_query_flat, y_query_flat, c=z_query, cmap="viridis", s=40)
+ plt.scatter(x_points[:, 0], x_points[:, 1], c=y_values, cmap="viridis", s=200)
+ plt.title("Interpolated Values")
+ plt.colorbar()
+ plt.show()
+ print("hi")
+
+
+if __name__ == "__main__":
+
+ import torch
+ import numpy as np
+ import matplotlib.pyplot as plt
+
+ plot_simple_function()
diff --git a/stpy/helpers/plot_helper.py b/stpy/helpers/plot_helper.py
index 1b2eece..3b891b7 100644
--- a/stpy/helpers/plot_helper.py
+++ b/stpy/helpers/plot_helper.py
@@ -4,116 +4,133 @@
import webcolors
-def plot_ellipse(offset, cov, scale=1, theta_num=1e3, axis=None, plot_kwargs=None, fill=False, fill_kwargs=None):
- '''
- offset = 2d array which gives center of ellipse
- cov = covariance of ellipse
- scale = scale ellipse by constant factor
- theta_num = used for a linspace below, not sure exactly (?)
-
- '''
- # Get Ellipse Properties from cov matrix
-
- eig_vec, eig_val, u = np.linalg.svd(cov)
- # Make sure 0th eigenvector has positive x-coordinate
- if eig_vec[0][0] < 0:
- eig_vec[0] *= -1
-
- semimaj = np.sqrt(eig_val[0])
- semimin = np.sqrt(eig_val[1])
- semimaj *= scale
- semimin *= scale
-
- phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0])))
- if eig_vec[0][1] < 0 and phi > 0:
- phi *= -1
-
- # Generate data for ellipse structure
- theta = np.linspace(0, 2 * np.pi, theta_num)
- r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2)
- x = r * np.cos(theta)
- y = r * np.sin(theta)
- data = np.array([x, y])
- S = np.array([[semimaj, 0], [0, semimin]])
- R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]])
- T = np.dot(R, S)
- data = np.dot(T, data)
- data[0] += offset[0]
- data[1] += offset[1]
-
- # Plot!
- return_fig = False
- if axis is None:
- axis = plt.gca()
-
- if plot_kwargs is None:
- p, = axis.plot(data[0], data[1], color='r', linestyle='-')
- else:
- p, = axis.plot(data[0], data[1], **plot_kwargs)
-
- if fill == True:
- if fill_kwargs is None:
- fill_kwargs = dict()
- axis.fill(data[0], data[1], alpha=0.2, color='r')
+def plot_ellipse(
+ offset,
+ cov,
+ scale=1,
+ theta_num=1e3,
+ axis=None,
+ plot_kwargs=None,
+ fill=False,
+ fill_kwargs=None,
+):
+ """
+ offset = 2d array which gives center of ellipse
+ cov = covariance of ellipse
+ scale = scale ellipse by constant factor
+ theta_num = used for a linspace below, not sure exactly (?)
+
+ """
+ # Get Ellipse Properties from cov matrix
+
+ eig_vec, eig_val, u = np.linalg.svd(cov)
+ # Make sure 0th eigenvector has positive x-coordinate
+ if eig_vec[0][0] < 0:
+ eig_vec[0] *= -1
+
+ semimaj = np.sqrt(eig_val[0])
+ semimin = np.sqrt(eig_val[1])
+ semimaj *= scale
+ semimin *= scale
+
+ phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0])))
+ if eig_vec[0][1] < 0 and phi > 0:
+ phi *= -1
+
+ # Generate data for ellipse structure
+ theta = np.linspace(0, 2 * np.pi, theta_num)
+ r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2)
+ x = r * np.cos(theta)
+ y = r * np.sin(theta)
+ data = np.array([x, y])
+ S = np.array([[semimaj, 0], [0, semimin]])
+ R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]])
+ T = np.dot(R, S)
+ data = np.dot(T, data)
+ data[0] += offset[0]
+ data[1] += offset[1]
+
+ # Plot!
+ return_fig = False
+ if axis is None:
+ axis = plt.gca()
+
+ if plot_kwargs is None:
+ (p,) = axis.plot(data[0], data[1], color="r", linestyle="-")
+ else:
+ (p,) = axis.plot(data[0], data[1], **plot_kwargs)
+
+ if fill == True:
+ if fill_kwargs is None:
+ fill_kwargs = dict()
+ axis.fill(data[0], data[1], alpha=0.2, color="r")
def closest_colour(requested_colour):
- min_colours = {}
- for name, key in webcolors.css3_hex_to_names.items():
- r_c, g_c, b_c = webcolors.hex_to_rgb(key)
- rd = (r_c - requested_colour[0]) ** 2
- gd = (g_c - requested_colour[1]) ** 2
- bd = (b_c - requested_colour[2]) ** 2
- min_colours[(rd + gd + bd)] = name
- return min_colours[min(min_colours.keys())]
+ min_colours = {}
+ for name, key in webcolors.css3_hex_to_names.items():
+ r_c, g_c, b_c = webcolors.hex_to_rgb(key)
+ rd = (r_c - requested_colour[0]) ** 2
+ gd = (g_c - requested_colour[1]) ** 2
+ bd = (b_c - requested_colour[2]) ** 2
+ min_colours[(rd + gd + bd)] = name
+ return min_colours[min(min_colours.keys())]
def get_colour_name(requested_colour):
- try:
- closest_name = actual_name = webcolors.rgb_to_name(requested_colour)
- except ValueError:
- closest_name = closest_colour(requested_colour)
- actual_name = None
- return actual_name, closest_name
-
-
-def colorline(x, y, z=None, cmap=plt.get_cmap('copper'), norm=plt.Normalize(0.0, 1.0),
- linewidth=3, alpha=1.0):
- """
- http://nbviewer.ipython.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb
- http://matplotlib.org/examples/pylab_examples/multicolored_line.html
- Plot a colored line with coordinates x and y
- Optionally specify colors in the array z
- Optionally specify a colormap, a norm function and a line width
- """
-
- # Default colors equally spaced on [0,1]:
- if z is None:
- z = np.linspace(0.0, 1.0, len(x))
-
- # Special case if a single number:
- if not hasattr(z, "__iter__"): # to check for numerical input -- this is a hack
- z = np.array([z])
-
- z = np.asarray(z)
-
- segments = make_segments(x, y)
- lc = mcoll.LineCollection(segments, array=z, cmap=cmap, norm=norm,
- linewidth=linewidth, alpha=alpha)
-
- ax = plt.gca()
- ax.add_collection(lc)
-
- return lc
+ try:
+ closest_name = actual_name = webcolors.rgb_to_name(requested_colour)
+ except ValueError:
+ closest_name = closest_colour(requested_colour)
+ actual_name = None
+ return actual_name, closest_name
+
+
+def colorline(
+ x,
+ y,
+ z=None,
+ cmap=plt.get_cmap("copper"),
+ norm=plt.Normalize(0.0, 1.0),
+ linewidth=3,
+ alpha=1.0,
+):
+ """
+ http://nbviewer.ipython.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb
+ http://matplotlib.org/examples/pylab_examples/multicolored_line.html
+ Plot a colored line with coordinates x and y
+ Optionally specify colors in the array z
+ Optionally specify a colormap, a norm function and a line width
+ """
+
+ # Default colors equally spaced on [0,1]:
+ if z is None:
+ z = np.linspace(0.0, 1.0, len(x))
+
+ # Special case if a single number:
+ if not hasattr(z, "__iter__"): # to check for numerical input -- this is a hack
+ z = np.array([z])
+
+ z = np.asarray(z)
+
+ segments = make_segments(x, y)
+ lc = mcoll.LineCollection(
+ segments, array=z, cmap=cmap, norm=norm, linewidth=linewidth, alpha=alpha
+ )
+
+ ax = plt.gca()
+ ax.add_collection(lc)
+
+ return lc
def make_segments(x, y):
- """
- Create list of line segments from x and y coordinates, in the correct format
- for LineCollection: an array of the form numlines x (points per line) x 2 (x
- and y) array
- """
-
- points = np.array([x, y]).T.reshape(-1, 1, 2)
- segments = np.concatenate([points[:-1], points[1:]], axis=1)
- return segments
+ """
+ Create list of line segments from x and y coordinates, in the correct format
+ for LineCollection: an array of the form numlines x (points per line) x 2 (x
+ and y) array
+ """
+
+ points = np.array([x, y]).T.reshape(-1, 1, 2)
+ segments = np.concatenate([points[:-1], points[1:]], axis=1)
+ return segments
diff --git a/stpy/helpers/plotting_helper.py b/stpy/helpers/plotting_helper.py
index 6a4fa7d..c350229 100644
--- a/stpy/helpers/plotting_helper.py
+++ b/stpy/helpers/plotting_helper.py
@@ -2,7 +2,18 @@
import sklearn
-def plot_R2(vals, lcb, ucb, truth, s, truth_lcb=None, truth_ucb=None, show=False, save_file_name=None, name=None):
+def plot_R2(
+ vals,
+ lcb,
+ ucb,
+ truth,
+ s,
+ truth_lcb=None,
+ truth_ucb=None,
+ show=False,
+ save_file_name=None,
+ name=None,
+):
r2 = sklearn.metrics.r2_score(truth, vals)
if save_file_name is not None:
filename = save_file_name
@@ -15,22 +26,28 @@ def plot_R2(vals, lcb, ucb, truth, s, truth_lcb=None, truth_ucb=None, show=False
plt.xlabel("true")
plt.ylabel("predicted")
- plt.plot(truth, truth, 'k-')
- plt.plot(truth, truth + s, 'k--')
- plt.plot(truth, truth - s, 'k--')
- plt.plot(truth, vals, color='k', marker='o', linestyle='')
-
- plt.errorbar(truth, vals, yerr=vals - lcb, color='k', marker='o', linestyle='')
+ plt.plot(truth, truth, "k-")
+ plt.plot(truth, truth + s, "k--")
+ plt.plot(truth, truth - s, "k--")
+ plt.plot(truth, vals, color="k", marker="o", linestyle="")
+ plt.errorbar(truth, vals, yerr=vals - lcb, color="k", marker="o", linestyle="")
if save_file_name is not None:
plt.savefig(filename + "_0.png", dpi=150)
- plt.errorbar(truth, vals, yerr=vals - lcb + 2 * s, color='r', marker='o', linestyle='', zorder = -10)
+ plt.errorbar(
+ truth,
+ vals,
+ yerr=vals - lcb + 2 * s,
+ color="r",
+ marker="o",
+ linestyle="",
+ zorder=-10,
+ )
if save_file_name is not None:
plt.savefig(filename + "_1.png", dpi=150)
-
if show:
plt.show()
diff --git a/stpy/helpers/posterior_sampling.py b/stpy/helpers/posterior_sampling.py
index 2318f27..46d3472 100644
--- a/stpy/helpers/posterior_sampling.py
+++ b/stpy/helpers/posterior_sampling.py
@@ -1,4 +1,7 @@
+import sys
import numpy as np
+import scipy
+from tqdm import tqdm
# Python implementation of "Exact Hamiltonian Monte Carlo for Truncated Multivariate Gaussian"
@@ -7,214 +10,221 @@
class HmcSampler:
- min_t = 0.00001
-
- def __init__(self, dim, init, f, g, verbose):
- """
-
- :param dim: dimension
- :param init: (dim, ), the initial value for HMC
- :param f: (q, dim), coefficient for linear constraints
- :param g: (q,), linear constraints: f*X+g >= 0
- """
- self.dim = dim
- self.lastSample = init
- self.f = f
- self.g = g
- self.verbose = verbose
-
- def getNextLinearHitTime(self, a, b):
- """
- the position x(t) = a * sin(t) + b * cos(t)
-
- :param a: (dim, ) initial value for a (initial velocity)
- :param b: (dim, ) initial value for b (initial position)
- :return: hit_time: the time for the hit
- cn : the cn-th constraint is active at hit time.
- """
- hit_time = 0
- cn = 0
-
- if self.f is None:
- return hit_time, cn
-
- f = self.f
- g = self.g
- for i in range(f.shape[0]):
- # constraints: f[i].dot(x)+g[i] >= 0
- fa = f[i].dot(a)
- fb = f[i].dot(b)
- u = np.sqrt(fa * fa + fb * fb)
- # if u > g[i] and u > -g[i]:
- if -u < g[i] < u:
- # otherwise the constrain will always be satisfied
- phi = np.arctan2(-fa, fb) # -pi < phi < pi
- t1 = np.arccos(-g[i] / u) - phi # -pi < t1 < 2*pi
-
- if t1 < 0:
- t1 += 2 * np.pi # 0 < t1 < 2*pi
- if np.abs(t1) < self.min_t or \
- np.abs(t1 - 2 * np.pi) < self.min_t:
- t1 = 0
-
- t2 = -t1 - 2 * phi # -4*pi < t2 < 2*pi
- if t2 < 0:
- t2 += 2 * np.pi # -2*pi < t2 < 2*pi
- if t2 < 0:
- t2 += 2 * np.pi # 0 < t2 < 2*pi
-
- if np.abs(t2) < self.min_t or \
- np.abs(t2 - 2 * np.pi) < self.min_t:
- t2 = 0
-
- if t1 == 0:
- t = t2
- elif t2 == 0:
- t = t1
- else:
- t = np.minimum(t1, t2)
-
- if self.min_t < t and (hit_time == 0 or t < hit_time):
- hit_time = t
- cn = i
- return hit_time, cn
-
- def verifyConstraints(self, b):
- """
-
- :param b:
- :return:
- """
- if self.f is not None:
- return np.min(self.f @ b + self.g)
- else:
- return 1
-
- def sampleNext(self):
- T = np.pi / 2 # how much time to move
- b = self.lastSample
- dim = self.dim
-
- count_sample_vel = 0
-
- while True:
- velsign = 0
- # sample new initial velocity
- a = np.random.normal(0, 1, dim)
-
- count_sample_vel += 1
- if self.verbose and count_sample_vel % 50 == 0:
- print("Has sampled %d times of initial velocity." % count_sample_vel)
-
- tt = T # the time left to move
- while True:
- t, c1 = self.getNextLinearHitTime(a, b)
- # t: how much time to move to hit the boundary, if t == 0, move tt
- # c1: the strict constraint at hit time
-
- if t == 0 or tt < t:
- # if no wall to be hit (t == 0) or not enough
- # time left to hit the wall (tt < t)
- break
-
- tt -= t # time left to move after hitting the wall
- new_b = np.sin(t) * a + np.cos(t) * b # hit location
- hit_vel = np.cos(t) * a - np.sin(t) * b # hit velocity
- b = new_b
- # reflect the velocity and verify that it points in the right direction
- f2 = np.dot(self.f[c1], self.f[c1])
- alpha = np.dot(self.f[c1], hit_vel) / f2
- a = hit_vel - 2 * alpha * self.f[c1] # reflected velocity
-
- velsign = a.dot(self.f[c1])
-
- if velsign < 0:
- # get out of inner while, resample the velocity and start again
- # this occurs rarelly, due to numerical instabilities
- break
-
- if velsign < 0:
- # go to the beginning of outer while
- continue
-
- bb = np.sin(tt) * a + np.cos(tt) * b
- check = self.verifyConstraints(bb)
- if check >= 0:
- # verify that we don't violate the constraints
- # due to a numerical instability
- if self.verbose:
- print("total number of velocity samples : %d" % count_sample_vel)
-
- self.lastSample = bb
- return bb
+ min_t = 0.00001
+
+ def __init__(self, dim, init, f, g, verbose):
+ """
+
+ :param dim: dimension
+ :param init: (dim, ), the initial value for HMC
+ :param f: (q, dim), coefficient for linear constraints
+ :param g: (q,), linear constraints: f*X+g >= 0
+ """
+ self.dim = dim
+ self.lastSample = init
+ self.f = f
+ self.g = g
+ self.verbose = verbose
+
+ def getNextLinearHitTime(self, a, b):
+ """
+ the position x(t) = a * sin(t) + b * cos(t)
+
+ :param a: (dim, ) initial value for a (initial velocity)
+ :param b: (dim, ) initial value for b (initial position)
+ :return: hit_time: the time for the hit
+ cn : the cn-th constraint is active at hit time.
+ """
+ hit_time = 0
+ cn = 0
+
+ if self.f is None:
+ return hit_time, cn
+
+ f = self.f
+ g = self.g
+ for i in range(f.shape[0]):
+ # constraints: f[i].dot(x)+g[i] >= 0
+ fa = f[i].dot(a)
+ fb = f[i].dot(b)
+ u = np.sqrt(fa * fa + fb * fb)
+ # if u > g[i] and u > -g[i]:
+ if -u < g[i] < u:
+ # otherwise the constrain will always be satisfied
+ phi = np.arctan2(-fa, fb) # -pi < phi < pi
+ t1 = np.arccos(-g[i] / u) - phi # -pi < t1 < 2*pi
+
+ if t1 < 0:
+ t1 += 2 * np.pi # 0 < t1 < 2*pi
+ if np.abs(t1) < self.min_t or np.abs(t1 - 2 * np.pi) < self.min_t:
+ t1 = 0
+
+ t2 = -t1 - 2 * phi # -4*pi < t2 < 2*pi
+ if t2 < 0:
+ t2 += 2 * np.pi # -2*pi < t2 < 2*pi
+ if t2 < 0:
+ t2 += 2 * np.pi # 0 < t2 < 2*pi
+
+ if np.abs(t2) < self.min_t or np.abs(t2 - 2 * np.pi) < self.min_t:
+ t2 = 0
+
+ if t1 == 0:
+ t = t2
+ elif t2 == 0:
+ t = t1
+ else:
+ t = np.minimum(t1, t2)
+
+ if self.min_t < t and (hit_time == 0 or t < hit_time):
+ hit_time = t
+ cn = i
+ return hit_time, cn
+
+ def verifyConstraints(self, b):
+ """
+
+ :param b:
+ :return:
+ """
+ if self.f is not None:
+ return np.min(self.f @ b + self.g)
+ else:
+ return 1
+
+ def sampleNext(self):
+ T = np.pi / 2 # how much time to move
+ b = self.lastSample
+ dim = self.dim
+
+ count_sample_vel = 0
+
+ while True:
+ velsign = 0
+ # sample new initial velocity
+ a = np.random.normal(0, 1, dim)
+
+ count_sample_vel += 1
+ if self.verbose and count_sample_vel % 50 == 0:
+ print("Has sampled %d times of initial velocity." % count_sample_vel)
+
+ tt = T # the time left to move
+ while True:
+ t, c1 = self.getNextLinearHitTime(a, b)
+ # t: how much time to move to hit the boundary, if t == 0, move tt
+ # c1: the strict constraint at hit time
+
+ if t == 0 or tt < t:
+ # if no wall to be hit (t == 0) or not enough
+ # time left to hit the wall (tt < t)
+ break
+
+ tt -= t # time left to move after hitting the wall
+ new_b = np.sin(t) * a + np.cos(t) * b # hit location
+ hit_vel = np.cos(t) * a - np.sin(t) * b # hit velocity
+ b = new_b
+ # reflect the velocity and verify that it points in the right direction
+ f2 = np.dot(self.f[c1], self.f[c1])
+ alpha = np.dot(self.f[c1], hit_vel) / f2
+ a = hit_vel - 2 * alpha * self.f[c1] # reflected velocity
+
+ velsign = a.dot(self.f[c1])
+
+ if velsign < 0:
+ # get out of inner while, resample the velocity and start again
+ # this occurs rarelly, due to numerical instabilities
+ break
+
+ if velsign < 0:
+ # go to the beginning of outer while
+ continue
+
+ bb = np.sin(tt) * a + np.cos(tt) * b
+ check = self.verifyConstraints(bb)
+ if check >= 0:
+ # verify that we don't violate the constraints
+ # due to a numerical instability
+
+ self.lastSample = bb
+ return bb, count_sample_vel
def tmg(n, mu, M, initial, f=None, g=None, burn_in=30, verbose=False):
- """
- This function generates samples from a Markov chain whose equilibrium distribution is a d-dimensional
- multivariate Gaussian truncated by linear inequalities. The log probability density is
- log p(X) = -0.5 (X-mu)^T M^-1 (X-mu) + const
- in terms of a covariance matrix M and a mean vector mu. The constraints are imposed as explained below.
- The Markov chain is built using the Hamiltonian Monte Carlo technique.
-
- :param n: Number of samples.
- :param mu: (m,) vector for the mean of multivariate Gaussian density
- :param M: (m,m) covariance matrix of the multivariate Gaussian density
- :param initial: (m,) vector with the initial value of the Markov chain. Must satisfy
- the truncation inequalities strictly.
- :param f: (q,m) matrix, where q is the number of linear constraints. The constraints require each component
- of the m-dimensional vector fX+g to be non-negative
- :param g: (q,) vector with the constant terms in the above linear constraints.
- :param burn_in: The number of burn-in iterations. The Markov chain is sampled n + burn_in
- times, and the last n samples are returned.
- :param verbose:
- :return: (n, m)
- """
-
- dim = len(mu)
- if M.shape[1] != dim:
- raise ValueError("The covariance matrix must be square.")
-
- if len(initial) != dim:
- raise ValueError("Wrong length for initial value vector.")
-
- # verify that M is positive definite, it will raise an error if M is not SPD
- R = np.linalg.cholesky(M)
-
- # we change variable to the canonical frame, and transform back after sampling
- # X ~ N(mu, M), then R^-1(X-mu) ~ N(0, I)
- init_trans = scipy.linalg.solve(R, initial - mu) # the new initial value
-
- if f is not None:
- if f.shape[0] != len(g) or f.shape[1] != dim:
- raise ValueError("Inconsistent linear constraints. f must \
- be an d-by-m matrix and g an d-dimensional vector.")
- # g may contains infinity, extract valid constraints
- valid = np.logical_and(g < np.inf, g > -np.inf)
- g = g[valid]
- f = f[valid]
-
- # verify initial value satisfies linear constraints
- if np.any(f @ initial + g < 0):
- raise ValueError("Initial point violates linear constraints.")
-
- # map linear constraints to canonical frame
- f_trans = f @ R
- g_trans = f @ mu + g
-
- hmc = HmcSampler(dim, init_trans, f_trans, g_trans, verbose=verbose)
- else:
- hmc = HmcSampler(dim, init_trans, f, g, verbose=verbose)
-
- samples = np.zeros((n, dim))
- for i in range(burn_in):
- if verbose:
- print("=" * 30 + " (burn in) sample {} ".format(i) + "=" * 30)
- hmc.sampleNext()
- for i in range(n):
- if verbose:
- print("=" * 30 + " sample {} ".format(i) + "=" * 30)
- samples[i] = hmc.sampleNext()
-
- # transform back
- return samples @ R.T + mu
+ """
+ This function generates samples from a Markov chain whose equilibrium distribution is a d-dimensional
+ multivariate Gaussian truncated by linear inequalities. The log probability density is
+ log p(X) = -0.5 (X-mu)^T M^-1 (X-mu) + const
+ in terms of a covariance matrix M and a mean vector mu. The constraints are imposed as explained below.
+ The Markov chain is built using the Hamiltonian Monte Carlo technique.
+
+ :param n: Number of samples.
+ :param mu: (m,) vector for the mean of multivariate Gaussian density
+ :param M: (m,m) covariance matrix of the multivariate Gaussian density
+ :param initial: (m,) vector with the initial value of the Markov chain. Must satisfy
+ the truncation inequalities strictly.
+ :param f: (q,m) matrix, where q is the number of linear constraints. The constraints require each component
+ of the m-dimensional vector fX+g to be non-negative
+ :param g: (q,) vector with the constant terms in the above linear constraints.
+ :param burn_in: The number of burn-in iterations. The Markov chain is sampled n + burn_in
+ times, and the last n samples are returned.
+ :param verbose:
+ :return: (n, m)
+ """
+
+ dim = len(mu)
+ if M.shape[1] != dim:
+ raise ValueError("The covariance matrix must be square.")
+
+ if len(initial) != dim:
+ raise ValueError("Wrong length for initial value vector.")
+
+ # verify that M is positive definite, it will raise an error if M is not SPD
+ R = np.linalg.cholesky(M)
+
+ # we change variable to the canonical frame, and transform back after sampling
+ # X ~ N(mu, M), then R^-1(X-mu) ~ N(0, I)
+ init_trans = scipy.linalg.solve(R, initial - mu) # the new initial value
+
+ if f is not None:
+ if f.shape[0] != len(g) or f.shape[1] != dim:
+ raise ValueError(
+ "Inconsistent linear constraints. f must "
+ " be an d-by-m matrix and g an d-dimensional vector."
+ )
+ # g may contains infinity, extract valid constraints
+ valid = np.logical_and(g < np.inf, g > -np.inf)
+ g = g[valid]
+ f = f[valid]
+
+ # verify initial value satisfies linear constraints
+ if np.any(f @ initial + g < 0):
+ raise ValueError("Initial point violates linear constraints.")
+
+ # map linear constraints to canonical frame
+ f_trans = f @ R
+ g_trans = f @ mu + g
+
+ hmc = HmcSampler(dim, init_trans, f_trans, g_trans, verbose=verbose)
+ else:
+ hmc = HmcSampler(dim, init_trans, f, g, verbose=verbose)
+
+ samples = np.zeros((n, dim))
+ for num_steps, desc in [(burn_in, "Burn-In"), (n, "sampling")]:
+ progress_bar = tqdm(range(num_steps), desc=desc, position=0)
+ numbers_bar = tqdm(total=1, bar_format="{desc}", position=1)
+ count_sample_vels = []
+
+ for i in progress_bar:
+ s, count_sample_vel = hmc.sampleNext()
+ if desc == "sampling":
+ samples[i] = s
+
+ if hmc.verbose:
+ count_sample_vels.append(count_sample_vel)
+ numbers_bar.set_description(
+ "\rtotal number of velocity samples:"
+ f" {', '.join(map(str, count_sample_vels))}"
+ )
+ numbers_bar.refresh()
+
+ # transform back
+ return samples @ R.T + mu
diff --git a/stpy/helpers/quadrature_helper.py b/stpy/helpers/quadrature_helper.py
index b03d28e..997cd27 100644
--- a/stpy/helpers/quadrature_helper.py
+++ b/stpy/helpers/quadrature_helper.py
@@ -6,253 +6,320 @@
def integrate_sin_sin(a, b, omega1, omega2):
- """
-
- :param a:
- :param b:
- :param omega1:
- :param omega2:
- :return:
- >>> np.round(integrate_sin_sin(0.2,0.5,2,3),6)
- 0.164678
- """
- eps = 10e-5
- if np.abs(omega1 - omega2) < eps:
- F = lambda x: x / 2 - np.sin(2 * omega1 * x) / (4 * omega1)
- else:
- F = lambda x: (omega2 * np.sin(omega1 * x) * np.cos(x * omega2) -
- omega1 * np.cos(omega1 * x) * np.sin(omega2 * x)) / (omega1 ** 2 - omega2 ** 2)
- return F(b) - F(a)
+ """
+
+ :param a:
+ :param b:
+ :param omega1:
+ :param omega2:
+ :return:
+ >>> np.round(integrate_sin_sin(0.2,0.5,2,3),6)
+ 0.164678
+ """
+ eps = 10e-5
+ if np.abs(omega1 - omega2) < eps:
+ F = lambda x: x / 2 - np.sin(2 * omega1 * x) / (4 * omega1)
+ else:
+ F = lambda x: (
+ omega2 * np.sin(omega1 * x) * np.cos(x * omega2)
+ - omega1 * np.cos(omega1 * x) * np.sin(omega2 * x)
+ ) / (omega1**2 - omega2**2)
+ return F(b) - F(a)
def integrate_sin_cos(a, b, omega1, omega2):
- """
-
- :param a:
- :param b:
- :param omega1:
- :param omega2:
- :return:
- >>> np.round(integrate_sin_cos(0.2,0.5,2,3),6)
- 0.082903
- """
- eps = 10e-5
- if np.abs(omega1 - omega2) < eps:
- F = lambda x: -np.cos(omega1 * x) ** 2 / (2 * omega1)
- else:
- F = lambda x: -(omega2 * np.sin(omega1 * x) * np.sin(x * omega2) +
- omega1 * np.cos(omega1 * x) * np.cos(omega2 * x)) / (omega1 ** 2 - omega2 ** 2)
- return F(b) - F(a)
+ """
+
+ :param a:
+ :param b:
+ :param omega1:
+ :param omega2:
+ :return:
+ >>> np.round(integrate_sin_cos(0.2,0.5,2,3),6)
+ 0.082903
+ """
+ eps = 10e-5
+ if np.abs(omega1 - omega2) < eps:
+ F = lambda x: -np.cos(omega1 * x) ** 2 / (2 * omega1)
+ else:
+ F = lambda x: -(
+ omega2 * np.sin(omega1 * x) * np.sin(x * omega2)
+ + omega1 * np.cos(omega1 * x) * np.cos(omega2 * x)
+ ) / (omega1**2 - omega2**2)
+ return F(b) - F(a)
def integrate_cos_cos(a, b, omega1, omega2):
- """
-
- :param a:
- :param b:
- :param omega1:
- :param omega2:
- :return:
- >>> np.round(integrate_cos_cos(0.2,0.5,2,3),6)
- 0.116078
- """
- eps = 10e-5
- if np.abs(omega1 - omega2) < eps:
- F = lambda x: x / 2 + np.sin(2 * omega1 * x) / (4 * omega1)
- else:
- F = lambda x: (omega1 * np.sin(omega1 * x) * np.cos(x * omega2) -
- omega2 * np.cos(omega1 * x) * np.sin(omega2 * x)) / (omega1 ** 2 - omega2 ** 2)
- return F(b) - F(a)
+ """
+
+ :param a:
+ :param b:
+ :param omega1:
+ :param omega2:
+ :return:
+ >>> np.round(integrate_cos_cos(0.2,0.5,2,3),6)
+ 0.116078
+ """
+ eps = 10e-5
+ if np.abs(omega1 - omega2) < eps:
+ F = lambda x: x / 2 + np.sin(2 * omega1 * x) / (4 * omega1)
+ else:
+ F = lambda x: (
+ omega1 * np.sin(omega1 * x) * np.cos(x * omega2)
+ - omega2 * np.cos(omega1 * x) * np.sin(omega2 * x)
+ ) / (omega1**2 - omega2**2)
+ return F(b) - F(a)
def integrate2d_sin_sin(A, B, C, D, a, b, c, d):
- Cos = lambda x: np.cos(x)
- val = (1 / (2 * (b - d) * (b + d))) * (-(((b + d) * (Cos(a * A - A * c + b * C - C * d) -
- Cos(a * B - B * c + b * C - C * d))) / (a - c)) + (
- (b + d) * (Cos(a * A - A * c + b * D - d * D) -
- Cos(a * B - B * c + b * D - d * D))) / (a - c) + (
- 1 / (
- a + c)) * (b - d) * (Cos(A * (a + c) + C * (b + d)) - Cos(
- B * (a + c) + C * (b + d)) - Cos(A * (a + c) + (b + d) *
- D) + Cos(B * (a + c) + (b + d) * D)))
- return val
+ Cos = lambda x: np.cos(x)
+ val = (1 / (2 * (b - d) * (b + d))) * (
+ -(
+ (
+ (b + d)
+ * (
+ Cos(a * A - A * c + b * C - C * d)
+ - Cos(a * B - B * c + b * C - C * d)
+ )
+ )
+ / (a - c)
+ )
+ + (
+ (b + d)
+ * (Cos(a * A - A * c + b * D - d * D) - Cos(a * B - B * c + b * D - d * D))
+ )
+ / (a - c)
+ + (1 / (a + c))
+ * (b - d)
+ * (
+ Cos(A * (a + c) + C * (b + d))
+ - Cos(B * (a + c) + C * (b + d))
+ - Cos(A * (a + c) + (b + d) * D)
+ + Cos(B * (a + c) + (b + d) * D)
+ )
+ )
+ return val
def integrate2d_sin_cos(A, B, C, D, a, b, c, d):
- Sin = lambda x: np.sin(x)
- val = (1 / (2 * (b - d) * (b + d))) * (((b + d) * (-Sin(a * A - A * c + b * C - C * d) +
- Sin(a * B - B * c + b * C - C * d))) / (a - c) + (
- (b + d) * (Sin(a * A - A * c + b * D - d * D) -
- Sin(a * B - B * c + b * D - d * D))) / (a - c) - (
- 1 / (a + c)) * (b - d) * (Sin(A * (a + c) + C * (b + d)) -
- Sin(B * (a + c) + C * (b + d)) - Sin(
- A * (a + c) + (b + d) * D) +
- Sin(B * (a + c) + (b + d) * D)))
- return val
+ Sin = lambda x: np.sin(x)
+ val = (1 / (2 * (b - d) * (b + d))) * (
+ (
+ (b + d)
+ * (-Sin(a * A - A * c + b * C - C * d) + Sin(a * B - B * c + b * C - C * d))
+ )
+ / (a - c)
+ + (
+ (b + d)
+ * (Sin(a * A - A * c + b * D - d * D) - Sin(a * B - B * c + b * D - d * D))
+ )
+ / (a - c)
+ - (1 / (a + c))
+ * (b - d)
+ * (
+ Sin(A * (a + c) + C * (b + d))
+ - Sin(B * (a + c) + C * (b + d))
+ - Sin(A * (a + c) + (b + d) * D)
+ + Sin(B * (a + c) + (b + d) * D)
+ )
+ )
+ return val
def integrate2d_cos_cos(A, B, C, D, a, b, c, d):
- Cos = lambda x: np.cos(x)
- val = -(1 / (2 * (b - d) * (b + d))) * (((b + d)(Cos(a * A - A * c + b * C - C * d) -
- Cos(a * B - B * c + b * C - C * d))) / (
- a - c) - ((b + d) * (Cos(a * A - A * c + b * D - d * D) -
- Cos(a * B - B * c + b * D - d * D))) / (
- a - c) + (1 / (
- a + c)) * (b - d) * (Cos(A * (a + c) + C * (b + d)) -
- Cos(B * (a + c) + C * (b + d)) - Cos(A * (a + c) + (b + d) * D) + Cos(
- B * (a + c) + (b + d) * D)))
- return val
+ Cos = lambda x: np.cos(x)
+ val = -(1 / (2 * (b - d) * (b + d))) * (
+ (
+ (b + d)(
+ Cos(a * A - A * c + b * C - C * d) - Cos(a * B - B * c + b * C - C * d)
+ )
+ )
+ / (a - c)
+ - (
+ (b + d)
+ * (Cos(a * A - A * c + b * D - d * D) - Cos(a * B - B * c + b * D - d * D))
+ )
+ / (a - c)
+ + (1 / (a + c))
+ * (b - d)
+ * (
+ Cos(A * (a + c) + C * (b + d))
+ - Cos(B * (a + c) + C * (b + d))
+ - Cos(A * (a + c) + (b + d) * D)
+ + Cos(B * (a + c) + (b + d) * D)
+ )
+ )
+ return val
def integrate_sin_multidimensional(a, b, omegas):
- """
-
- :param a: bounds start
- :param b: bounds end
- :param omegas: frequencies
- :return:
- >>> np.round(integrate_sin_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5)
- 0.47822
- >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5)
- -0.01037
- >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5)
- 0.02232
- """
- d = omegas.shape[0]
-
- z = np.array([omegas * b, omegas * a])
- sign = np.array([omegas * 0, omegas * 0 + 1])
- ar = cartesian([z[:, i] for i in range(z.shape[1])])
- signs = cartesian([sign[:, i] for i in range(sign.shape[1])])
- signs = np.sum(signs, axis=1)
- ar = np.sum(ar, axis=1)
- k = 1. / np.prod(omegas)
- # print (ar)
-
- if d % 2 == 1:
- r = np.cos(ar)
- if d % 4 == 1:
- r = -r
- for i in range(r.shape[0]):
- if signs[i] % 2 == 1:
- r[i] = -r[i]
- else:
- r = np.sin(ar)
- if d % 4 == 3:
- r = -r
- for i in range(r.shape[0]):
- if signs[i] % 2 == 0:
- r[i] = -r[i]
- return k * np.sum(r)
+ """
+
+ :param a: bounds start
+ :param b: bounds end
+ :param omegas: frequencies
+ :return:
+ >>> np.round(integrate_sin_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5)
+ 0.47822
+ >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5)
+ -0.01037
+ >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5)
+ 0.02232
+ """
+ d = omegas.shape[0]
+
+ z = np.array([omegas * b, omegas * a])
+ sign = np.array([omegas * 0, omegas * 0 + 1])
+ ar = cartesian([z[:, i] for i in range(z.shape[1])])
+ signs = cartesian([sign[:, i] for i in range(sign.shape[1])])
+ signs = np.sum(signs, axis=1)
+ ar = np.sum(ar, axis=1)
+ k = 1.0 / np.prod(omegas)
+ # print (ar)
+
+ if d % 2 == 1:
+ r = np.cos(ar)
+ if d % 4 == 1:
+ r = -r
+ for i in range(r.shape[0]):
+ if signs[i] % 2 == 1:
+ r[i] = -r[i]
+ else:
+ r = np.sin(ar)
+ if d % 4 == 3:
+ r = -r
+ for i in range(r.shape[0]):
+ if signs[i] % 2 == 0:
+ r[i] = -r[i]
+ return k * np.sum(r)
def integrate_cos_multidimensional(a, b, omegas):
- """
-
- :param a: bounds start
- :param b: bounds end
- :param omegas: frequencies
- :return:
- >>> np.round(integrate_cos_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5)
- 0.03391
- >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5)
- 0.03169
- >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5)
- -0.03784
- """
- d = omegas.shape[0]
-
- z = np.array([omegas * b, omegas * a])
- sign = np.array([omegas * 0, omegas * 0 + 1])
- # print(z)
- ar = cartesian([z[:, i] for i in range(z.shape[1])])
- signs = cartesian([sign[:, i] for i in range(sign.shape[1])])
- signs = np.sum(signs, axis=1)
- ar = np.sum(ar, axis=1)
- k = 1. / np.prod(omegas)
- # print (ar)
-
- if d % 2 == 1:
- r = np.sin(ar)
- if d % 4 == 3:
- r = -r
- for i in range(r.shape[0]):
- if signs[i] % 2 == 1:
- r[i] = -r[i]
- else:
- r = np.cos(ar)
- if d % 4 == 1:
- r = -r
- for i in range(r.shape[0]):
- if signs[i] % 2 == 0:
- r[i] = -r[i]
-
- return k * np.sum(r)
+ """
+
+ :param a: bounds start
+ :param b: bounds end
+ :param omegas: frequencies
+ :return:
+ >>> np.round(integrate_cos_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5)
+ 0.03391
+ >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5)
+ 0.03169
+ >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5)
+ -0.03784
+ """
+ d = omegas.shape[0]
+
+ z = np.array([omegas * b, omegas * a])
+ sign = np.array([omegas * 0, omegas * 0 + 1])
+ # print(z)
+ ar = cartesian([z[:, i] for i in range(z.shape[1])])
+ signs = cartesian([sign[:, i] for i in range(sign.shape[1])])
+ signs = np.sum(signs, axis=1)
+ ar = np.sum(ar, axis=1)
+ k = 1.0 / np.prod(omegas)
+ # print (ar)
+
+ if d % 2 == 1:
+ r = np.sin(ar)
+ if d % 4 == 3:
+ r = -r
+ for i in range(r.shape[0]):
+ if signs[i] % 2 == 1:
+ r[i] = -r[i]
+ else:
+ r = np.cos(ar)
+ if d % 4 == 1:
+ r = -r
+ for i in range(r.shape[0]):
+ if signs[i] % 2 == 0:
+ r[i] = -r[i]
+
+ return k * np.sum(r)
def romberg2d(func, x1, x2, y1, y2):
- """
-
- :param func:
- :param x1:
- :param x2:
- :param y1:
- :param y2:
- :return:
- >>> np.round(romberg2d(lambda x,y:2*x**2+y**2,0,1,1,2),5)
- 3.0
- """
- func2 = lambda y, a, b: integrate.romberg(func, a, b, args=(y,))
- return integrate.romberg(func2, y1, y2, args=(x1, x2))
-
-
-def quadvec2(func, x1, x2, y1, y2, epsabs=1e-200, epsrel=1e-08, limit=1000, workers=1, quadrature='gk21'):
- """
- >>> alpha = np.linspace(0.0, 2.0, num=30)
- >>> np.round(quadvec2(lambda x,y: x**alpha + y**alpha,0,1,1,2)[0],5)
- 2.0
- >>> np.round(quadvec2(lambda x,y: 2*x**alpha + y**alpha,0,1,1,2)[-1],5)
- 3.0
- """
- func2 = lambda y: \
- integrate.quad_vec(lambda x: func(x, y), x1, x2, epsabs=epsabs, epsrel=epsrel, limit=limit, quadrature=quadrature)[
- 0]
- res = integrate.quad_vec(func2, y1, y2, epsabs=epsabs, epsrel=epsrel, limit=limit, quadrature=quadrature)
- return res[0]
+ """
+
+ :param func:
+ :param x1:
+ :param x2:
+ :param y1:
+ :param y2:
+ :return:
+ >>> np.round(romberg2d(lambda x,y:2*x**2+y**2,0,1,1,2),5)
+ 3.0
+ """
+ func2 = lambda y, a, b: integrate.romberg(func, a, b, args=(y,))
+ return integrate.romberg(func2, y1, y2, args=(x1, x2))
+
+
+def quadvec2(
+ func,
+ x1,
+ x2,
+ y1,
+ y2,
+ epsabs=1e-200,
+ epsrel=1e-08,
+ limit=1000,
+ workers=1,
+ quadrature="gk21",
+):
+ """
+ >>> alpha = np.linspace(0.0, 2.0, num=30)
+ >>> np.round(quadvec2(lambda x,y: x**alpha + y**alpha,0,1,1,2)[0],5)
+ 2.0
+ >>> np.round(quadvec2(lambda x,y: 2*x**alpha + y**alpha,0,1,1,2)[-1],5)
+ 3.0
+ """
+ func2 = lambda y: integrate.quad_vec(
+ lambda x: func(x, y),
+ x1,
+ x2,
+ epsabs=epsabs,
+ epsrel=epsrel,
+ limit=limit,
+ quadrature=quadrature,
+ )[0]
+ res = integrate.quad_vec(
+ func2, y1, y2, epsabs=epsabs, epsrel=epsrel, limit=limit, quadrature=quadrature
+ )
+ return res[0]
def AvgEig(Phi, xtest):
- n = Phi(xtest[0].view(1, -1)).size()[0]
- A = torch.zeros(size=(n, n), dtype=torch.float64)
- for x in xtest:
- v = Phi(x.view(1, -1)).view(-1, 1)
- A = A + v @ v.T
- A = A / xtest.size()[0]
- # import matplotlib.pyplot as plt
- # plt.imshow(A)
- # plt.colorbar()
- # plt.show()
- maxeig = torch.min(torch.symeig(A)[0])
- return maxeig
+ n = Phi(xtest[0].view(1, -1)).size()[0]
+ A = torch.zeros(size=(n, n), dtype=torch.float64)
+ for x in xtest:
+ v = Phi(x.view(1, -1)).view(-1, 1)
+ A = A + v @ v.T
+ A = A / xtest.size()[0]
+ # import matplotlib.pyplot as plt
+ # plt.imshow(A)
+ # plt.colorbar()
+ # plt.show()
+ maxeig = torch.min(torch.symeig(A)[0])
+ return maxeig
def volume_eig(Phi, xtest, alpha=0.5):
- n = Phi(xtest[0].view(1, -1)).size()[0]
- A = torch.zeros(size=(n, n), dtype=torch.float64)
- for x in xtest:
- v = Phi(x.view(1, -1)).view(-1, 1)
- mineig = torch.min(torch.symeig(v @ v.T)[0])
- print(mineig)
- vol = 0
- return vol
+ n = Phi(xtest[0].view(1, -1)).size()[0]
+ A = torch.zeros(size=(n, n), dtype=torch.float64)
+ for x in xtest:
+ v = Phi(x.view(1, -1)).view(-1, 1)
+ mineig = torch.min(torch.symeig(v @ v.T)[0])
+ print(mineig)
+ vol = 0
+ return vol
def chebyschev_nodes(n, d=1, L_infinity_ball=1):
- nodes, w = np.polynomial.chebyshev.chebgauss(n)
- arrays = [nodes.reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
- return xtest
+ nodes, w = np.polynomial.chebyshev.chebgauss(n)
+ arrays = [nodes.reshape(n, 1) for i in range(d)]
+ xtest = cartesian(arrays)
+ return xtest
if __name__ == "__main__":
- pass
+ pass
diff --git a/stpy/helpers/scores.py b/stpy/helpers/scores.py
index 133db9c..af1cf97 100644
--- a/stpy/helpers/scores.py
+++ b/stpy/helpers/scores.py
@@ -1,4 +1,5 @@
import torch
-def r_score_std(y_true, y_pred, std, alpha = 1.):
- return 1 - torch.mean((y_true - y_pred)**2)/(alpha*std**2)
\ No newline at end of file
+
+def r_score_std(y_true, y_pred, std, alpha=1.0):
+ return 1 - torch.mean((y_true - y_pred) ** 2) / (alpha * std**2)
diff --git a/stpy/helpers/transformations.py b/stpy/helpers/transformations.py
index 6a77ee1..2ea4eb1 100644
--- a/stpy/helpers/transformations.py
+++ b/stpy/helpers/transformations.py
@@ -5,40 +5,48 @@
def transform(X, low=-1, high=1, functions=True, offsets=None):
- n, d = X.size()
- Y = X.clone()
- transforms = []
- inv_transforms = []
-
- for i in range(d):
-
- if offsets is None:
- xmin = torch.min(X[:, i]).clone().numpy()
- xmax = torch.max(X[:, i]).clone().numpy()
- else:
- xmin = offsets[i][0]
- xmax = offsets[i][1]
-
- k = copy.copy(float((xmin - xmax) / ((low - high))))
- q = copy.copy(float(xmin - low * k))
-
- k2 = copy.copy(float((low - high) / (xmin - xmax)))
- q2 = copy.copy(float(high - xmax * k2))
-
- inv_transform = lambda a, k=k, q=q: k * a + q
- transform = lambda a, k2=k2, q2=q2: k2 * a + q2
-
- transforms.append(copy.copy(transform))
- inv_transforms.append(copy.copy(inv_transform))
-
- Y[:, i] = torch.from_numpy(np.apply_along_axis(transform, 0, X[:, i].numpy()))
-
- trans = lambda Z: torch.stack(
- [torch.from_numpy(np.apply_along_axis(transforms[i], 0, Z[:, i].numpy())) for i in range(d)]).T
- inv_trans = lambda Y: torch.stack(
- [torch.from_numpy(np.apply_along_axis(inv_transforms[i], 0, Y[:, i].numpy())) for i in range(d)]).T
-
- if functions == True:
- return Y, trans, inv_trans, transforms, inv_transforms
- else:
- return Y
+ n, d = X.size()
+ Y = X.clone()
+ transforms = []
+ inv_transforms = []
+
+ for i in range(d):
+
+ if offsets is None:
+ xmin = torch.min(X[:, i]).clone().numpy()
+ xmax = torch.max(X[:, i]).clone().numpy()
+ else:
+ xmin = offsets[i][0]
+ xmax = offsets[i][1]
+
+ k = copy.copy(float((xmin - xmax) / ((low - high))))
+ q = copy.copy(float(xmin - low * k))
+
+ k2 = copy.copy(float((low - high) / (xmin - xmax)))
+ q2 = copy.copy(float(high - xmax * k2))
+
+ inv_transform = lambda a, k=k, q=q: k * a + q
+ transform = lambda a, k2=k2, q2=q2: k2 * a + q2
+
+ transforms.append(copy.copy(transform))
+ inv_transforms.append(copy.copy(inv_transform))
+
+ Y[:, i] = torch.from_numpy(np.apply_along_axis(transform, 0, X[:, i].numpy()))
+
+ trans = lambda Z: torch.stack(
+ [
+ torch.from_numpy(np.apply_along_axis(transforms[i], 0, Z[:, i].numpy()))
+ for i in range(d)
+ ]
+ ).T
+ inv_trans = lambda Y: torch.stack(
+ [
+ torch.from_numpy(np.apply_along_axis(inv_transforms[i], 0, Y[:, i].numpy()))
+ for i in range(d)
+ ]
+ ).T
+
+ if functions == True:
+ return Y, trans, inv_trans, transforms, inv_transforms
+ else:
+ return Y
diff --git a/stpy/helpers/voxel_grid.py b/stpy/helpers/voxel_grid.py
new file mode 100644
index 0000000..b4c309e
--- /dev/null
+++ b/stpy/helpers/voxel_grid.py
@@ -0,0 +1,63 @@
+from typing import List, Optional, Union
+
+
+import torch
+from torch import Tensor
+
+from torch_cluster import grid_cluster
+
+
+def _get_n_voxels(x, size: float):
+ size = torch.full([x.shape[1]], size)
+ indices = grid_cluster(x, size)
+ return indices.unique().numel()
+
+
+def voxel_grid(
+ x: Tensor,
+ size: Union[float, Tensor, None] = None,
+ max_n_voxels: int | None = None,
+) -> Tensor:
+
+ # Do binary search to find the right voxel size that yields <= max_n_voxels
+ if size is None:
+ assert max_n_voxels is not None, "One of size, n_voxels must be given"
+ max_size = (x.max(dim=0).values - x.min(dim=0).values).max().item()
+ tol = max_size / 1e7
+ low, high = 0, max_size
+ while high - low > tol:
+ mid = (low + high) / 2
+ n_voxels = _get_n_voxels(x, mid)
+ if n_voxels > max_n_voxels:
+ low = mid
+ else:
+ high = mid
+ size = high
+
+ if isinstance(size, float):
+ size = torch.full([x.shape[1]], size)
+ indices = grid_cluster(x, size).unsqueeze(1).expand(-1, x.shape[1])
+ out = torch.full(
+ [indices.max() + 1, x.shape[1]], torch.nan, dtype=x.dtype, device=x.device
+ )
+ averaged = out.scatter_reduce(0, indices, x, reduce="mean", include_self=False)
+ return averaged[~torch.isnan(averaged).any(dim=1)]
+
+
+if __name__ == "__main__":
+
+ # Example usage of voxel_grid
+ x = torch.tensor(
+ [
+ [0.1, 0.2, 0.3],
+ [2, 0, 0],
+ [0, 2, 0],
+ [0, 0, 2],
+ [2.1, 2.2, 2.3],
+ [3, 3, 3],
+ ]
+ )
+ size = 1.0
+
+ result = voxel_grid(x, max_n_voxels=3)
+ print(result)
diff --git a/stpy/helpers/wavelets.py b/stpy/helpers/wavelets.py
index 9a378af..5872fde 100644
--- a/stpy/helpers/wavelets.py
+++ b/stpy/helpers/wavelets.py
@@ -1,26 +1,26 @@
from mpmath import *
phi = lambda x: (0 <= x < 1) # scaling fct
-psi = lambda x: (0 <= x < .5) - (.5 <= x < 1) # wavelet fct
-phi_j_k = lambda x, j, k: 2 ** (j / 2) * phi(2 ** j * x - k)
-psi_j_k = lambda x, j, k: 2 ** (j / 2) * psi(2 ** j * x - k)
+psi = lambda x: (0 <= x < 0.5) - (0.5 <= x < 1) # wavelet fct
+phi_j_k = lambda x, j, k: 2 ** (j / 2) * phi(2**j * x - k)
+psi_j_k = lambda x, j, k: 2 ** (j / 2) * psi(2**j * x - k)
def haar(f, interval, level):
- c0 = quadgl(lambda t: f(t) * phi_j_k(t, 0, 0), interval)
+ c0 = quadgl(lambda t: f(t) * phi_j_k(t, 0, 0), interval)
- coef = []
- for j in xrange(0, level):
- for k in xrange(0, 2 ** j):
- djk = quadgl(lambda t: f(t) * psi_j_k(t, j, k), interval)
- coef.append((j, k, djk))
+ coef = []
+ for j in xrange(0, level):
+ for k in xrange(0, 2**j):
+ djk = quadgl(lambda t: f(t) * psi_j_k(t, j, k), interval)
+ coef.append((j, k, djk))
- return c0, coef
+ return c0, coef
def haarval(haar_coef, x):
- c0, coef = haar_coef
- s = c0 * phi_j_k(x, 0, 0)
- for j, k, djk in coef:
- s += djk * psi_j_k(x, j, k)
- return s
+ c0, coef = haar_coef
+ s = c0 * phi_j_k(x, 0, 0)
+ for j, k, djk in coef:
+ s += djk * psi_j_k(x, j, k)
+ return s
diff --git a/stpy/kernel_functions/additive_decorator.py b/stpy/kernel_functions/additive_decorator.py
index 718ef62..a477f67 100644
--- a/stpy/kernel_functions/additive_decorator.py
+++ b/stpy/kernel_functions/additive_decorator.py
@@ -1,5 +1,6 @@
def additive(func):
def wrapper():
- func()
- return wrapper
\ No newline at end of file
+ func()
+
+ return wrapper
diff --git a/stpy/kernel_functions/ard_kernel.py b/stpy/kernel_functions/ard_kernel.py
index 353cdc7..21a1ba7 100644
--- a/stpy/kernel_functions/ard_kernel.py
+++ b/stpy/kernel_functions/ard_kernel.py
@@ -3,91 +3,91 @@
def ard_kernel(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["ard_gamma", "kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["ard_gamma", "kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
+ a = a[:, p.group]
+ b = b[:, p.group]
- D = torch.diag(1. / (p.ard_gamma[p.group]))
+ D = torch.diag(1.0 / (p.ard_gamma[p.group]))
- a = torch.mm(a, D)
- b = torch.mm(b, D)
+ a = torch.mm(a, D)
+ b = torch.mm(b, D)
- normx = torch.sum(a ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(b ** 2, dim=1).reshape(-1, 1)
+ normx = torch.sum(a**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(b**2, dim=1).reshape(-1, 1)
- product = torch.mm(b, torch.t(a))
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- return p.kappa * res
+ product = torch.mm(b, torch.t(a))
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ return p.kappa * res
def ard_kernel_diag(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["ard_gamma", "kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["ard_gamma", "kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
+ a = a[:, p.group]
+ b = b[:, p.group]
- D = torch.diag(1. / (p.ard_gamma[p.group]))
- a = torch.mm(a, D)
- b = torch.mm(b, D)
- normx = torch.sum(a ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(b ** 2, dim=1).reshape(-1, 1)
+ D = torch.diag(1.0 / (p.ard_gamma[p.group]))
+ a = torch.mm(a, D)
+ b = torch.mm(b, D)
+ normx = torch.sum(a**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(b**2, dim=1).reshape(-1, 1)
- product = torch.mm(b, torch.t(a))
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- return p.kappa * res
+ product = torch.mm(b, torch.t(a))
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ return p.kappa * res
def ard_per_group_kernel_additive(self, a, b, **kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'groups' in kwargs.keys():
- groups = kwargs['groups']
- else:
- groups = self.groups
-
- if 'ard_per_group' in kwargs.keys():
- ard_per_group = kwargs['ard_per_group']
- else:
- raise AssertionError("This kernel requires 'ard_per_group' initial parameters")
-
- (n, z) = tuple(a.size())
- (q, m) = tuple(b.size())
-
- r = torch.zeros(size=(q, n), dtype=torch.float64)
- groups_index = 0
-
- for group_add in groups:
- kwargs['group'] = group_add
-
- size_group = len(group_add)
- # use per group lenghtscale
- # kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group]
- gamma = ard_per_group[groups_index:groups_index + size_group]
- groups_index += size_group
-
- ax = a[:, group_add]
- bx = b[:, group_add]
- D = torch.diag(1. / (gamma))
- ax = torch.mm(ax, D)
- bx = torch.mm(bx, D)
- normx = torch.sum(ax ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(bx ** 2, dim=1).reshape(-1, 1)
- product = torch.mm(bx, torch.t(ax))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- r = r + res
-
- r = r / float(len(groups))
- return kappa * r
\ No newline at end of file
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "groups" in kwargs.keys():
+ groups = kwargs["groups"]
+ else:
+ groups = self.groups
+
+ if "ard_per_group" in kwargs.keys():
+ ard_per_group = kwargs["ard_per_group"]
+ else:
+ raise AssertionError("This kernel requires 'ard_per_group' initial parameters")
+
+ (n, z) = tuple(a.size())
+ (q, m) = tuple(b.size())
+
+ r = torch.zeros(size=(q, n), dtype=torch.float64)
+ groups_index = 0
+
+ for group_add in groups:
+ kwargs["group"] = group_add
+
+ size_group = len(group_add)
+ # use per group lenghtscale
+ # kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group]
+ gamma = ard_per_group[groups_index : groups_index + size_group]
+ groups_index += size_group
+
+ ax = a[:, group_add]
+ bx = b[:, group_add]
+ D = torch.diag(1.0 / (gamma))
+ ax = torch.mm(ax, D)
+ bx = torch.mm(bx, D)
+ normx = torch.sum(ax**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(bx**2, dim=1).reshape(-1, 1)
+ product = torch.mm(bx, torch.t(ax))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ r = r + res
+
+ r = r / float(len(groups))
+ return kappa * r
diff --git a/stpy/kernel_functions/covar_kernel.py b/stpy/kernel_functions/covar_kernel.py
index 070cdc3..2ebdecc 100644
--- a/stpy/kernel_functions/covar_kernel.py
+++ b/stpy/kernel_functions/covar_kernel.py
@@ -1,20 +1,21 @@
import torch
from stpy.kernel_functions.kernel_params import KernelParams
+
def covar_kernel(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["cov", "kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["cov", "kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
- a = torch.mm(a, p.cov)
- b = torch.mm(b, p.cov)
+ a = a[:, p.group]
+ b = b[:, p.group]
+ a = torch.mm(a, p.cov)
+ b = torch.mm(b, p.cov)
- normx = torch.sum(a ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(b ** 2, dim=1).reshape(-1, 1)
- product = torch.mm(b, torch.t(a))
+ normx = torch.sum(a**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(b**2, dim=1).reshape(-1, 1)
+ product = torch.mm(b, torch.t(a))
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- return p.kappa * res
\ No newline at end of file
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ return p.kappa * res
diff --git a/stpy/kernel_functions/custom_map_kernel.py b/stpy/kernel_functions/custom_map_kernel.py
index 62cc068..0e2ddea 100644
--- a/stpy/kernel_functions/custom_map_kernel.py
+++ b/stpy/kernel_functions/custom_map_kernel.py
@@ -1,14 +1,15 @@
from stpy.kernel_functions.kernel_params import KernelParams
from stpy.kernel_functions.linear_kernel import linear_kernel
+
def custom_map_kernel(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["map", "kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["map", "kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
+ a = a[:, p.group]
+ b = b[:, p.group]
- if map is not None:
- return p.kappa * linear_kernel(torch.t(p.map(a)), torch.t(p.map(b))).detach()
- else:
- return p.kappa * linear_kernel(a, b)
\ No newline at end of file
+ if map is not None:
+ return p.kappa * linear_kernel(torch.t(p.map(a)), torch.t(p.map(b))).detach()
+ else:
+ return p.kappa * linear_kernel(a, b)
diff --git a/stpy/kernel_functions/gibbs_custom_kernel.py b/stpy/kernel_functions/gibbs_custom_kernel.py
index d3f3a37..dd6b153 100644
--- a/stpy/kernel_functions/gibbs_custom_kernel.py
+++ b/stpy/kernel_functions/gibbs_custom_kernel.py
@@ -1,23 +1,24 @@
from stpy.kernel_functions.kernel_params import KernelParams
import torch
+
def gibbs_custom_kernel(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["gamma_fun", "kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["gamma_fun", "kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
- # print (a.shape, b.shape)
- normx = torch.sum(a ** 2, dim=1).view(-1, 1)
- normy = torch.sum(b ** 2, dim=1).view(-1, 1)
+ a = a[:, p.group]
+ b = b[:, p.group]
+ # print (a.shape, b.shape)
+ normx = torch.sum(a**2, dim=1).view(-1, 1)
+ normy = torch.sum(b**2, dim=1).view(-1, 1)
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
- lengthscales = p.gamma_fun(a, b)
+ lengthscales = p.gamma_fun(a, b)
- arg = (-0.5 / lengthscales) * sqdist
- res = torch.exp(arg)
- return p.kappa * res
\ No newline at end of file
+ arg = (-0.5 / lengthscales) * sqdist
+ res = torch.exp(arg)
+ return p.kappa * res
diff --git a/stpy/kernel_functions/gibbs_kernel.py b/stpy/kernel_functions/gibbs_kernel.py
index dbb4dc6..c9d9eca 100644
--- a/stpy/kernel_functions/gibbs_kernel.py
+++ b/stpy/kernel_functions/gibbs_kernel.py
@@ -1,24 +1,25 @@
import torch
from stpy.kernel_functions.kernel_params import KernelParams
+
def gibbs_kernel(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["gamma_fun", "kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["gamma_fun", "kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
- # print (a.shape, b.shape)
- normx = torch.sum(a ** 2, dim=1).view(-1, 1)
- normy = torch.sum(b ** 2, dim=1).view(-1, 1)
+ a = a[:, p.group]
+ b = b[:, p.group]
+ # print (a.shape, b.shape)
+ normx = torch.sum(a**2, dim=1).view(-1, 1)
+ normy = torch.sum(b**2, dim=1).view(-1, 1)
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
- lengthscales = (p.gamma_fun(a) ** 2 + p.gamma_fun(b).T ** 2)
+ lengthscales = p.gamma_fun(a) ** 2 + p.gamma_fun(b).T ** 2
- print(lengthscales)
+ print(lengthscales)
- arg = (-0.5 / lengthscales) * sqdist
- res = torch.exp(arg)
- return p.kappa * res
\ No newline at end of file
+ arg = (-0.5 / lengthscales) * sqdist
+ res = torch.exp(arg)
+ return p.kappa * res
diff --git a/stpy/kernel_functions/kernel_params.py b/stpy/kernel_functions/kernel_params.py
index 047091b..9e9f766 100644
--- a/stpy/kernel_functions/kernel_params.py
+++ b/stpy/kernel_functions/kernel_params.py
@@ -1,11 +1,10 @@
+class KernelParams:
-class KernelParams():
+ def __init__(self, param_dict):
+ for key in param_dict:
+ setattr(self, key, param_dict[key])
- def __init__(self, param_dict):
- for key in param_dict:
- setattr(self, key, param_dict[key])
-
- def assert_existence(self, names):
- for name in names:
- if not hasattr(self, name):
- raise AttributeError("Missing attribute of the kernel %s" % str(name))
+ def assert_existence(self, names):
+ for name in names:
+ if not hasattr(self, name):
+ raise AttributeError("Missing attribute of the kernel %s" % str(name))
diff --git a/stpy/kernel_functions/laplace_kernel.py b/stpy/kernel_functions/laplace_kernel.py
index a1f1ce0..ffa34ee 100644
--- a/stpy/kernel_functions/laplace_kernel.py
+++ b/stpy/kernel_functions/laplace_kernel.py
@@ -3,12 +3,13 @@
from sklearn.metrics.pairwise import check_pairwise_arrays, manhattan_distances
from stpy.kernel_functions.kernel_params import KernelParams
+
def laplace_kernel(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["gamma", "kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["gamma", "kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
- K = - manhattan_distances(a, b) / p.gamma ** 2
- K = np.exp(K) # exponentiate K in-place
- return p.kappa * torch.from_numpy(K).T
\ No newline at end of file
+ a = a[:, p.group]
+ b = b[:, p.group]
+ K = -manhattan_distances(a, b) / p.gamma**2
+ K = np.exp(K) # exponentiate K in-place
+ return p.kappa * torch.from_numpy(K).T
diff --git a/stpy/kernel_functions/linear_kernel.py b/stpy/kernel_functions/linear_kernel.py
index 93403c4..1c0603b 100644
--- a/stpy/kernel_functions/linear_kernel.py
+++ b/stpy/kernel_functions/linear_kernel.py
@@ -1,15 +1,16 @@
from stpy.kernel_functions.kernel_params import KernelParams
+
def linear_kernel(a, b, **kwargs):
- """
- linear kernl
- :param a:
- :param b:
- :param kwargs:
- :return:
- """
- p = KernelParams(kwargs)
- p.assert_existence(["kappa", "group"])
- a = a[:, group]
- b = b[:, group]
- return kappa * (b @ a.T)
\ No newline at end of file
+ """
+ linear kernl
+ :param a:
+ :param b:
+ :param kwargs:
+ :return:
+ """
+ p = KernelParams(kwargs)
+ p.assert_existence(["kappa", "group"])
+ a = a[:, group]
+ b = b[:, group]
+ return kappa * (b @ a.T)
diff --git a/stpy/kernel_functions/squared_exponential_kernel.py b/stpy/kernel_functions/squared_exponential_kernel.py
index 0297a99..7b6fe26 100644
--- a/stpy/kernel_functions/squared_exponential_kernel.py
+++ b/stpy/kernel_functions/squared_exponential_kernel.py
@@ -1,38 +1,131 @@
-import numpy as np
import torch
from stpy.kernel_functions.kernel_params import KernelParams
+
def squared_exponential_kernel(a, b, **kwargs):
- """
-
- :param a:
- :param b:
- :param kwargs: must include gamma, kappa, group
- :return:
- """
- p = KernelParams(kwargs)
- p.assert_existence(["gamma", "kappa", "group"])
-
- a = a[:, p.group]
- b = b[:, p.group]
- # print (a.shape, b.shape)
- normx = torch.sum(a ** 2, dim=1).view(-1, 1)
- normy = torch.sum(b ** 2, dim=1).view(-1, 1)
-
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
- arg = (-0.5 / (p.gamma * p.gamma)) * sqdist
- res = torch.exp(arg)
- return p.kappa * res
-
-def squared_exponential_kernel_diag(a,b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["gamma", "kappa", "group"])
-
- a = a[:, p.group]
- b = b[:, p.group]
- sqdist = (a-b)**2
- arg = (-0.5 / (p.gamma * p.gamma)) * sqdist
- res = torch.exp(arg)
- return p.kappa * res
\ No newline at end of file
+ """
+
+ :param a:
+ :param b:
+ :param kwargs: must include gamma, kappa, group
+ :return:
+ """
+ p = KernelParams(kwargs)
+ p.assert_existence(["gamma", "kappa", "group"])
+
+ a = a[:, p.group]
+ b = b[:, p.group]
+ # print (a.shape, b.shape)
+ normx = torch.sum(a**2, dim=1).view(-1, 1)
+ normy = torch.sum(b**2, dim=1).view(-1, 1)
+
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = (-0.5 / (p.gamma * p.gamma)) * sqdist
+ res = torch.exp(arg)
+ return p.kappa * res
+
+
+def squared_exponential_kernel_diag(a, b, **kwargs):
+ p = KernelParams(kwargs)
+ p.assert_existence(["gamma", "kappa", "group"])
+
+ a = a[:, p.group]
+ b = b[:, p.group]
+ sqdist = (a - b) ** 2
+ arg = (-0.5 / (p.gamma * p.gamma)) * sqdist
+ res = torch.exp(arg)
+ return p.kappa * res
+
+
+def squared_exponential_integral(a_x, a_y, b_x, b_y, **kwargs):
+ """
+ Returns $g(x) = \int_{a_1, b_1}^{a_2, b_2} \kappa \cdot \exp(-\gamma \| x - s \|^2 ds$
+
+ Parameters:
+ - a_x: torch.Tensor, lower bounds in x-dimension (shape: [N])
+ - a_y: torch.Tensor, lower bounds in y-dimension (shape: [N])
+ - b_x: torch.Tensor, upper bounds in x-dimension (shape: [N])
+ - b_y: torch.Tensor, upper bounds in y-dimension (shape: [N])
+ - kwargs: should give attributes gamma (float) and kappa (float)
+
+ Returns:
+ - A function such that squared_exponetial_integral(a_x, a_y, b_x, b_y)(x)[i][j]
+ is equal to $g(x_j)$ where $g$ is created from a_x[i], a_y[i], b_x[i], b_y[i]
+ """
+ p = KernelParams(kwargs)
+ p.assert_existence(["gamma", "kappa"])
+ gamma = p.gamma
+ kappa = p.kappa
+ sqrt_2 = torch.sqrt(torch.tensor(2.0))
+
+ def g(x):
+ """
+ Compute the integral g(x) for multiple 2D points x.
+
+ Parameters:
+ - x: torch.Tensor, input points of shape [M, 2] where each row is a 2D point.
+
+ Returns:
+ - torch.Tensor of shape [len(a_x), len(x)], where result[i][j] is g(x_j) for bounds from a_x[i], a_y[i], b_x[i], b_y[i].
+ """
+ x1, x2 = x[:, 0], x[:, 1] # Extract x1 and x2 from input tensor x
+ a_x_broadcast = a_x.unsqueeze(1) # Shape [N, 1]
+ a_y_broadcast = a_y.unsqueeze(1) # Shape [N, 1]
+ b_x_broadcast = b_x.unsqueeze(1) # Shape [N, 1]
+ b_y_broadcast = b_y.unsqueeze(1) # Shape [N, 1]
+
+ # Compute the error function terms
+ erf_x1_a = torch.erf((a_x_broadcast - x1) / (gamma * sqrt_2))
+ erf_x1_b = torch.erf((b_x_broadcast - x1) / (gamma * sqrt_2))
+ erf_x2_a = torch.erf((a_y_broadcast - x2) / (gamma * sqrt_2))
+ erf_x2_b = torch.erf((b_y_broadcast - x2) / (gamma * sqrt_2))
+
+ # Compute the product of error function differences
+ integral_values = (erf_x1_a - erf_x1_b) * (erf_x2_a - erf_x2_b)
+
+ # Scale by constants
+ result = (torch.pi * kappa * (gamma**2) / 2.0) * integral_values
+
+ return result
+
+ return g
+
+
+if __name__ == "__main__":
+ # Test squared_exponential_integral
+ a_x = torch.tensor([-float("inf"), -float("inf")])
+ a_y = torch.tensor([-float("inf"), -float("inf")])
+ b_x = torch.tensor([float("inf"), float("inf")])
+ b_y = torch.tensor([float("inf"), float("inf")])
+
+ gamma = 1.0
+ kappa = 1.0
+ g = squared_exponential_integral(a_x, a_y, b_x, b_y, gamma=gamma, kappa=kappa)
+ x = torch.tensor([[87, 0], [1123, 11]])
+ assert torch.allclose(g(x), torch.tensor([torch.pi, torch.pi]))
+
+ # Test with new bounds x in [0,1] and y in [0,1]
+ a_x = torch.tensor([0.0])
+ a_y = torch.tensor([0.0])
+ b_x = torch.tensor([1.0])
+ b_y = torch.tensor([1.0])
+
+ g = squared_exponential_integral(a_x, a_y, b_x, b_y, gamma=10e-6, kappa=kappa)
+ x = torch.tensor([[0.5, 0.5], [0.25, 0.75]])
+ assert torch.allclose(g(x), torch.tensor([1.0, 1.0]))
+
+ a_x = torch.tensor([0.0, 1.0])
+ a_y = torch.tensor([0.0, 2.0])
+ b_x = torch.tensor([1.0, 3.0])
+ b_y = torch.tensor([1.0, 4.0])
+
+ g = squared_exponential_integral(a_x, a_y, b_x, b_y, gamma=0.5, kappa=3.0)
+ x = torch.tensor([[0.5, 0.5], [2.0, 3.0]])
+ result = g(x)
+ assert torch.allclose(
+ result, torch.tensor([[2.7639, 0.0548], [0.3794, 8.7851]]), atol=1e-4
+ )
+
+ torch.ones(())
diff --git a/stpy/kernel_functions/step_kernel.py b/stpy/kernel_functions/step_kernel.py
index 0ddfa78..0a643d4 100644
--- a/stpy/kernel_functions/step_kernel.py
+++ b/stpy/kernel_functions/step_kernel.py
@@ -1,20 +1,21 @@
from stpy.kernel_functions.kernel_params import KernelParams
import torch
+
def step_kernel(a, b, **kwargs):
- p = KernelParams(kwargs)
- p.assert_existence(["kappa", "group"])
+ p = KernelParams(kwargs)
+ p.assert_existence(["kappa", "group"])
- a = a[:, p.group]
- b = b[:, p.group]
+ a = a[:, p.group]
+ b = b[:, p.group]
- n, d = a.size()
- m, d = b.size()
+ n, d = a.size()
+ m, d = b.size()
- K = torch.zeros(size=(n, m)).double()
+ K = torch.zeros(size=(n, m)).double()
- for i in range(n):
- for j in range(m):
- K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :])
+ for i in range(n):
+ for j in range(m):
+ K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :])
- return p.kappa * K.T
\ No newline at end of file
+ return p.kappa * K.T
diff --git a/stpy/kernels.py b/stpy/kernels.py
index c05e32b..a151417 100755
--- a/stpy/kernels.py
+++ b/stpy/kernels.py
@@ -5,1119 +5,1174 @@
from scipy.spatial.distance import cdist
from scipy.special import kv
from sklearn.metrics.pairwise import check_pairwise_arrays, manhattan_distances
-from stpy.kernel_functions.squared_exponential_kernel import squared_exponential_kernel_diag
+from stpy.kernel_functions.squared_exponential_kernel import (
+ squared_exponential_integral,
+ squared_exponential_kernel_diag,
+)
+
class KernelFunction:
- def __init__(self, kernel_function=None, kernel_name="squared_exponential", \
- freq=None, groups=None, d=1, gamma=1, ard_gamma=None, nu=1.5, kappa=1, map=None, power=2,
- cov=None, params=None, group=None, offset = 0. ):
-
- if kernel_function is not None:
- self.kernel_function = kernel_function
- self.optkernel = "custom"
- self.kappa = kappa
- self.offset = offset
- if params is None:
- self.params = {'kappa': self.kappa}
- else:
- self.params = params
- self.initial_params = self.params
-
- if group is None:
- self.group = [i for i in range(d)]
- else:
- self.group = group
- self.d = d
- else:
- self.offset = offset
- self.optkernel = kernel_name
- self.gamma = gamma
- if ard_gamma is None:
- self.ard_gamma = torch.ones(d).double()
- else:
- try:
- self.ard_gamma = torch.Tensor([ard_gamma]).double()
- except:
- self.ard_gamma = ard_gamma
- self.power = power
- self.v = nu
-
- if params is not None:
- self.initial_params = params
- else:
- self.initial_params = {'kappa':kappa}
-
- if cov is None:
- self.cov = torch.eye(d).double()
- else:
- self.cov = cov
-
- if group is None:
- self.group = [i for i in range(d)]
- else:
- self.group = group
-
- self.map = map
- self.groups = groups
- self.kappa = kappa
- self.freq = freq
- self.d = d
- self.add = False
-
- self.kernel_function_list = [self.get_kernel_internal()]
- self.kernel_diag_function_list = [self.get_kernel_internal(diag = True)]
- self.optkernel_list = [self.optkernel]
- self.params_dict = {'0': self.params}
- self.kernel_items = 1
-
- self.operations = ["-"]
-
- def __combine__(self, second_kernel_object):
- self.kernel_function_list = self.kernel_function_list + second_kernel_object.kernel_function_list
- self.optkernel_list = self.optkernel_list + second_kernel_object.optkernel_list
- self.operations = self.operations + second_kernel_object.operations[1:]
- for key, value in second_kernel_object.params_dict.items():
- self.params_dict[str(self.kernel_items)] = value
- self.kernel_items += 1
-
- def __add__(self, second_kernel_object):
- self.__combine__(second_kernel_object)
- diff = len(set(second_kernel_object.group) - set(self.group))
- self.d += diff
- self.operations.append("+")
- return self
-
- def __mul__(self, second_kernel_object):
- self.__combine__(second_kernel_object)
- self.operations.append("*")
- return self
-
- def description(self):
- desc = "Kernel description:"
- for index in range(0, self.kernel_items, 1):
- desc = desc + "\n\n\tkernel: " + self.optkernel_list[index]
- desc = desc + "\n\toperation: " + self.operations[index]
- desc = desc + "\n\t" + "\n\t".join(
- ["{0}={1}".format(key, value) for key, value in self.params_dict[str(index)].items()])
- return desc
-
- def add_groups(self, dict):
- for a in self.params_dict.keys():
- if a not in dict.keys():
- dict[a] = {}
- dict[a]['group'] = self.params_dict[a]['group']
- return dict
-
- def kernel_diag(self, a,b, **kwargs):
- if len(kwargs) > 0:
- # params_dict = list(kwargs)
- # we need to send
- params_dict = kwargs
- self.add_groups(params_dict)
- else:
- params_dict = self.params_dict
-
- for i in range(0, len(self.kernel_function_list), 1):
- k = self.kernel_diag_function_list[i]
- if str(i) in params_dict.keys():
- arg = params_dict[str(i)]
- else:
- arg = {}
- if self.operations[i] == "+":
- output = output + k(a, b, **arg)
- elif self.operations[i] == "*":
- output = output * k(a, b, **arg)
- else:
- output = k(a, b, **arg)
-
- return output
-
- def kernel(self, a, b, **kwargs):
-
- if len(kwargs) > 0:
- # params_dict = list(kwargs)
- # we need to send
- params_dict = kwargs
- self.add_groups(params_dict)
- else:
- params_dict = self.params_dict
-
- for i in range(0, len(self.kernel_function_list), 1):
- k = self.kernel_function_list[i]
- if str(i) in params_dict.keys():
- arg = params_dict[str(i)]
- else:
- arg = {}
- if self.operations[i] == "+":
- output = output + k(a, b, **arg)
- elif self.operations[i] == "*":
- output = output * k(a, b, **arg)
- else:
- output = k(a, b, **arg)
-
- return output
-
- def get_param_refs(self):
- return self.params_dict
-
- def get_kernel(self):
- return self.kernel
-
- def get_kernel_internal(self, diag = False):
-
- self.params = {**self.initial_params, 'kappa': self.kappa, 'group': self.group, 'offset': self.offset}
-
- if self.optkernel == "squared_exponential":
- self.params = dict(**self.params, **{'gamma': self.gamma})
- if diag:
- return squared_exponential_kernel_diag
- else:
- return self.squared_exponential_kernel
-
- elif self.optkernel == "ard" and (self.groups is None):
- self.params = dict(**self.params, **{'ard_gamma': self.ard_gamma})
- if diag:
- return self.ard_kernel
- else:
- return self.ard_kernel_diag
-
-
- elif self.optkernel == "linear":
- return self.linear_kernel
-
- elif self.optkernel == "laplace":
- self.params = dict(**self.params, **{'gamma': self.gamma})
- return self.laplace_kernel
-
- elif self.optkernel == "modified_matern":
- self.params = dict(**self.params, **{'gamma': self.gamma, 'nu': self.v})
- return self.modified_matern_kernel
-
- elif self.optkernel == "custom":
- return self.kernel_function
-
- elif self.optkernel == "tanh":
- return self.tanh_kernel
-
- elif self.optkernel == 'step':
- return self.step_kernel
-
- elif self.optkernel == "angsim":
- return self.angsim_kernel
-
- elif self.optkernel == "matern":
- self.params = dict(**self.params, **{'gamma': self.gamma, 'nu': self.v})
- return self.matern_kernel
-
- elif self.optkernel == "ard_matern":
- self.params = dict(**self.params, **{'ard_gamma': self.ard_gamma, 'nu': self.v})
-
- if diag:
- return self.ard_matern_kernel_diag
- else:
- return self.ard_matern_kernel
-
- elif self.optkernel == "full_covariance_se":
- self.params = dict(**self.params, **{'cov': self.cov})
- return self.covar_kernel
-
- elif self.optkernel == "full_covariance_matern":
- self.params = dict(**self.params, **{'cov': self.cov, 'nu': self.v})
- return self.covar_kernel_matern
-
- elif (self.optkernel == "polynomial") and (self.groups is None):
- self.params = dict(**self.params, **{'degree': self.power})
- return self.polynomial_kernel
-
- elif (self.optkernel == "polynomial") and (self.groups is not None):
- self.params = dict(**self.params, **{'degree': self.power, 'groups': self.groups})
- return self.polynomial_additive_kernel
-
- elif self.optkernel == "ard" and (self.groups is not None):
- self.params = dict(**self.params, **{'ard_gamma': self.ard_gamma, 'groups': self.groups})
- return self.ard_kernel_additive
-
- elif self.optkernel == "squared_exponential_per_group" and (self.groups is not None):
- self.params = dict(**self.params, **{'groups': self.groups})
- return self.squared_exponential_per_group_kernel_additive
-
- elif self.optkernel == "ard_per_group" and (self.groups is not None):
- self.params = dict(**self.params, **{'groups': self.groups})
- return self.ard_per_group_kernel_additive
-
- elif self.optkernel == "gibbs":
- self.params = dict(**self.params, **{'groups': self.groups})
- return self.gibbs_kernel
-
- elif self.optkernel == "gibbs_custom":
- self.params = dict(**self.params, **{'groups': self.groups})
- return self.gibbs_custom_kernel
-
- elif self.optkernel == "random_map":
- return self.random_map_kernel
-
- else:
- raise AssertionError("Kernel not implemented.")
-
- def embed(self, x):
- if self.optkernel == "linear":
- return x
- else:
- raise AttributeError("This type of kernel does not support a finite dimensional embedding")
-
- def get_basis_size(self):
- if self.optkernel == "linear":
- return self.d
- else:
- raise AttributeError("This type of kernel does not support a finite dimensional embedding")
-
- def step_kernel(self, a, b, **kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- n, d = a.size()
- m, d = b.size()
-
- K = torch.zeros(size=(n, m)).double()
-
- for i in range(n):
- for j in range(m):
- K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :])
-
- return kappa * K.T
-
- def linear_kernel(self, a, b, **kwargs):
- """
- GP linear kernel
- """
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- if 'offset' in kwargs.keys():
- offset = kwargs['offset']
- else:
- offset = self.offset
- a = a[:, group]
- b = b[:, group]
- return kappa * (b @ a.T) + offset
-
- def custom_map_kernel(self, a, b, **kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
-
- if 'map' in kwargs.keys():
- map = kwargs['map']
- else:
- map = self.map
-
- a = a[:, group]
- b = b[:, group]
-
- if map is not None:
- return kappa * self.linear_kernel(torch.t(self.map.map(a)), torch.t(self.map.map(b))).detach()
- else:
- return kappa * self.linear_kernel(a, b)
-
- def laplace_kernel(self, a, b, **kwargs):
- if 'gamma' in kwargs.keys():
- gamma = kwargs['gamma']
- else:
- gamma = self.gamma
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
- K = - manhattan_distances(a, b) / gamma ** 2
- K = np.exp(K) # exponentiate K in-place
- return kappa * torch.from_numpy(K).T
-
- def squared_exponential_kernel(self, a, b, **kwargs):
- """
- GP squared exponential kernel
- """
- if 'gamma' in kwargs.keys():
- gamma = kwargs['gamma']
- else:
- gamma = self.gamma
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
- # print (a.shape, b.shape)
- normx = torch.sum(a ** 2, dim=1).view(-1, 1)
- normy = torch.sum(b ** 2, dim=1).view(-1, 1)
-
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
- arg = (-0.5 / (gamma * gamma)) * sqdist
- res = torch.exp(arg)
- return kappa * res
-
- def gibbs_custom_kernel(self, a, b, **kwargs):
- if 'gamma_fun' in kwargs.keys():
- gamma_fun = kwargs['gamma_fun']
- else:
- raise AttributeError("Missing gamma_fun in Gibbs kernel definition.")
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
- # print (a.shape, b.shape)
- normx = torch.sum(a ** 2, dim=1).view(-1, 1)
- normy = torch.sum(b ** 2, dim=1).view(-1, 1)
-
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
-
- lengthscales = gamma_fun(a, b)
-
- arg = (-0.5 / lengthscales) * sqdist
- res = torch.exp(arg)
- return kappa * res
-
- def gibbs_kernel(self, a, b, **kwargs):
- if 'gamma_fun' in kwargs.keys():
- gamma_fun = kwargs['gamma_fun']
- else:
- raise AttributeError("Missing gamma_fun in Gibbs kernel definition.")
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
- # print (a.shape, b.shape)
- normx = torch.sum(a ** 2, dim=1).view(-1, 1)
- normy = torch.sum(b ** 2, dim=1).view(-1, 1)
-
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
-
- lengthscales = (gamma_fun(a) ** 2 + gamma_fun(b).T ** 2)
-
- print(lengthscales)
-
- arg = (-0.5 / lengthscales) * sqdist
- res = torch.exp(arg)
- return kappa * res
-
- def covar_kernel(self, a, b, **kwargs):
- """
- :param a:
- :param b:
- :param cov: square-root of the covariance matrix
- :return:
- """
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'cov' in kwargs.keys():
- cov = kwargs['cov']
- else:
- cov = self.cov
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
- a = torch.mm(a, cov)
- b = torch.mm(b, cov)
-
- normx = torch.sum(a ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(b ** 2, dim=1).reshape(-1, 1)
-
- product = torch.mm(b, torch.t(a))
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- return kappa * res
-
-
- def covar_kernel_matern(self, a, b, **kwargs):
- """
- :param a:
- :param b:
- :param cov: square-root of the covariance matrix
- :return:
- """
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'cov' in kwargs.keys():
- cov = kwargs['cov']
- else:
- cov = self.cov
- if 'v' in kwargs.keys():
- v = kwargs['v']
- else:
- v = self.v
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
- a = torch.mm(a, cov)
- b = torch.mm(b, cov)
-
- dists = torch.cdist(a, b, p=2).T
-
- if v == 0.5:
- K = torch.exp(-dists)
- elif v == 1.5:
- K = dists * np.sqrt(3)
- K = (1. + K) * torch.exp(-K)
- elif v == 2.5:
- K = dists * np.sqrt(5)
- K = (1. + K + K ** 2 / 3.0) * torch.exp(-K)
- else: # general case; expensive to evaluate
- K = dists
- K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
- tmp = (np.sqrt(2 * v) * K)
- K.fill((2 ** (1. - v)) / math.gamma(v))
- K *= tmp ** v
- K *= kv(v, tmp)
- return kappa * K
-
-
- def ard_kernel(self, a, b, **kwargs):
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'ard_gamma' in kwargs.keys():
- gamma = kwargs['ard_gamma']
- else:
- gamma = self.ard_gamma
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- D = torch.diag(1. / (gamma[group]))
- a = torch.mm(a, D)
- b = torch.mm(b, D)
- normx = torch.sum(a ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(b ** 2, dim=1).reshape(-1, 1)
-
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- return kappa * res
-
- def ard_kernel_diag(self, a, b, **kwargs):
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'ard_gamma' in kwargs.keys():
- gamma = kwargs['ard_gamma']
- else:
- gamma = self.ard_gamma
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- D = torch.diag(1. / (gamma[group]))
- a = torch.mm(a, D)
- b = torch.mm(b, D)
- normx = torch.sum(a ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(b ** 2, dim=1).reshape(-1, 1)
-
- product = torch.mm(b, torch.t(a))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- return kappa * res
-
-
-
- def ard_per_group_kernel_additive(self,a,b,**kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'groups' in kwargs.keys():
- groups = kwargs['groups']
- else:
- groups = self.groups
-
- if 'ard_per_group' in kwargs.keys():
- ard_per_group = kwargs['ard_per_group']
- else:
- raise AssertionError("This kernel requires 'ard_per_group' initial parameters")
-
- (n, z) = tuple(a.size())
- (q, m) = tuple(b.size())
-
- r = torch.zeros(size=(q, n), dtype=torch.float64)
- groups_index = 0
-
- for group_add in groups:
- kwargs['group'] = group_add
-
- size_group = len(group_add)
- # use per group lenghtscale
- #kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group]
- gamma = ard_per_group[groups_index:groups_index+size_group]
- groups_index +=size_group
-
- ax = a[:, group_add]
- bx = b[:, group_add]
- D = torch.diag(1. / (gamma))
- ax = torch.mm(ax, D)
- bx = torch.mm(bx, D)
- normx = torch.sum(ax ** 2, dim=1).reshape(-1, 1)
- normy = torch.sum(bx ** 2, dim=1).reshape(-1, 1)
- product = torch.mm(bx, torch.t(ax))
- # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
- sqdist = -2 * product + torch.t(normx) + normy
- arg = - 0.5 * sqdist
- res = torch.exp(arg)
- r = r + res
-
- r = r / float(len(groups))
- return kappa*r
-
- def squared_exponential_per_group_kernel_additive(self,a,b,**kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'groups' in kwargs.keys():
- groups = kwargs['groups']
- else:
- groups = self.groups
-
- if 'gamma_per_group' in kwargs.keys():
- gamma_per_group = kwargs['gamma_per_group']
- else:
- raise AssertionError("This kernel requires 'gamma_per_group' initial parameters")
-
- (n, z) = tuple(a.size())
- (q, m) = tuple(b.size())
-
- r = torch.zeros(size=(q, n), dtype=torch.float64)
-
- for group_add, gamma in zip(groups,gamma_per_group):
- kwargs['group'] = group_add
-
- # use per group lenghtscale
- kwargs['gamma'] = gamma
-
- r = r + self.squared_exponential_kernel(a, b, **kwargs)
-
- r = kappa * r / float(len(groups))
- return r
-
- def ard_kernel_additive(self, a, b, **kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'groups' in kwargs.keys():
- groups = kwargs['groups']
- else:
- groups = self.groups
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- (n, z) = tuple(a.size())
- (q, m) = tuple(b.size())
-
- r = torch.zeros(size=(q, n), dtype=torch.float64)
-
- for group_add in groups:
- kwargs['group'] = group_add
- r = r + self.ard_kernel(a, b, **kwargs)
-
- r = r / float(len(groups))
- return r
-
- def tanh_kernel(self, a, b, **kwargs):
- """
- GP squared exponential kernel
- """
- # print (a.shape, b.shape)
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- X, Y = check_pairwise_arrays(a.numpy(), b.numpy())
- K = manhattan_distances(a.numpy(), b.numpy())
- K = K.T
- eps = 10e-10
- q = 3
- A = (np.tanh(K) ** q) / (eps + K ** q)
- return kappa * torch.from_numpy(A)
-
- def angsim_kernel(self, a, b, **kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- return kappa * (2. / np.pi) * np.arcsin((a.dot(b)) / (a.norm() * b.norm()))
-
- def polynomial_kernel(self, a, b, **kwargs):
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
- if 'degree' in kwargs.keys():
- power = kwargs['degree']
- else:
- power = self.power
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- K = (torch.mm(b, torch.t(a)) + 1) ** power
- return kappa * K
-
- def polynomial_additive_kernel(self, a, b, **kwargs):
-
- if 'groups' in kwargs.keys():
- groups = kwargs['groups']
- else:
- groups = self.groups
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- (n, z) = tuple(a.size())
- (q, m) = tuple(b.size())
- no_groups = float(len(groups))
- r = torch.zeros(size=(q, n), dtype=torch.float64)
- for i, group in enumerate(groups):
- z = self.polynomial_kernel(a[:, group], b[:, group], **kwargs)
- r = r + z
- r = r / no_groups
- return r
-
-
- def matern_kernel(self, a, b, **kwargs):
- """
- :param a: matrices
- :param b: matrices
- :param gamma: smoothness
- :param v: Bessel function type
- :return:
- """
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'nu' in kwargs.keys():
- v = kwargs['nu']
- else:
- v = self.v
-
- if 'gamma' in kwargs.keys():
- gamma = kwargs['gamma']
- else:
- gamma = self.gamma
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group].numpy()
- b = b[:, group].numpy()
-
- dists = cdist(a / gamma, b / gamma, metric='euclidean').T
- if v == 0.5:
- K = np.exp(-dists)
- elif v == 1.5:
- K = dists * math.sqrt(3)
- K = (1. + K) * np.exp(-K)
- elif v == 2.5:
- K = dists * math.sqrt(5)
- K = (1. + K + K ** 2 / 3.0) * np.exp(-K)
- else: # general case; expensive to evaluate
- K = dists
- K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
- tmp = (math.sqrt(2 * v) * K)
- K.fill((2 ** (1. - v)) / math.gamma(v))
- K *= tmp ** v
- K *= kv(v, tmp)
- return kappa * torch.from_numpy(K)
-
-
- def ard_matern_kernel_diag(self, a, b, **kwargs):
- """
- :param a: matrices
- :param b: matrices
- :param gamma: smoothness
- :param v: Bessel function type
- :return:
- """
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'nu' in kwargs.keys():
- v = kwargs['nu']
- else:
- v = self.v
-
- if 'ard_gamma' in kwargs.keys():
- ard_gamma = kwargs['ard_gamma']
- else:
- ard_gamma = self.ard_gamma
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- D = torch.diag(1. / (ard_gamma[group]))
- a = torch.mm(a, D)
- b = torch.mm(b, D)
-
- a = a[:, group]
- b = b[:, group]
-
- #dists = torch.cdist(a , b , p = 2).T
- dists = torch.sqrt(torch.sum((a - b)**2))
-
- if v == 0.5:
- K = torch.exp(-dists)
- elif v == 1.5:
- K = dists * np.sqrt(3)
- K = (1. + K) * torch.exp(-K)
- elif v == 2.5:
- K = dists * np.sqrt(5)
- K = (1. + K + K ** 2 / 3.0) * torch.exp(-K)
- else: # general case; expensive to evaluate
- K = dists
- K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
- tmp = (np.sqrt(2 * v) * K)
- K.fill((2 ** (1. - v)) / math.gamma(v))
- K *= tmp ** v
- K *= kv(v, tmp)
- return kappa * K
-
- def ard_matern_kernel(self, a, b, **kwargs):
- """
- :param a: matrices
- :param b: matrices
- :param gamma: smoothness
- :param v: Bessel function type
- :return:
- """
-
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'nu' in kwargs.keys():
- v = kwargs['nu']
- else:
- v = self.v
-
- if 'ard_gamma' in kwargs.keys():
- ard_gamma = kwargs['ard_gamma']
- else:
- ard_gamma = self.ard_gamma
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- D = torch.diag(1. / (ard_gamma[group]))
- a = torch.mm(a, D)
- b = torch.mm(b, D)
-
- a = a[:, group]
- b = b[:, group]
-
- dists = torch.cdist(a , b , p = 2).T
-
- if v == 0.5:
- K = torch.exp(-dists)
- elif v == 1.5:
- K = dists * np.sqrt(3)
- K = (1. + K) * torch.exp(-K)
- elif v == 2.5:
- K = dists * np.sqrt(5)
- K = (1. + K + K ** 2 / 3.0) * torch.exp(-K)
- else: # general case; expensive to evaluate
- K = dists
- K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
- tmp = (np.sqrt(2 * v) * K)
- K.fill((2 ** (1. - v)) / math.gamma(v))
- K *= tmp ** v
- K *= kv(v, tmp)
- return kappa * K
-
- def modified_matern_kernel(self, X, Y, **kwargs):
- """
- :param a: matrices
- :param b: matrices
- :param gamma: smoothness
- :param v: Bessel function type
- :return:
- """
- if 'kappa' in kwargs.keys():
- kappa = kwargs['kappa']
- else:
- kappa = self.kappa
-
- if 'nu' in kwargs.keys():
- v = kwargs['nu']
- else:
- v = self.v
-
- if 'gamma' in kwargs.keys():
- gamma = kwargs['gamma']
- else:
- gamma = self.gamma
-
- if 'group' in kwargs.keys():
- group = kwargs['group']
- else:
- group = self.group
-
- a = a[:, group]
- b = b[:, group]
-
- d = X.size()[1]
- # Z = np.ones(shape = (X.shape[0],Y.shape[0]))
- Z = torch.ones(size=(Y.size()[0], X.size()[0]), dtype=torch.float64)
- for i in range(d):
- a = X[:, i].view(-1, 1)
- b = Y[:, i].view(-1, 1)
- # dists = cdist(a/gamma,b/gamma,metric='cityblock').T
- dists = cdist(a.numpy() / gamma, b.numpy() / gamma, metric='euclidean').T
- # dists = manhattan_distances(a, b).T/ gamma
- dists = torch.from_numpy(dists)
- if v == 1:
- K = torch.exp(-dists)
- elif v == 2:
- K = (1 + dists) * torch.exp(-dists)
- elif v == 3:
- K = (dists ** 2 + 3 * torch.abs(dists) + 3) * torch.exp(-dists) / 3.
- elif v == 4:
- K = (dists ** 3 + 6 * dists ** 2 + 15 * torch.abs(dists) + 15) * torch.exp(-dists) / 15.
- else:
- raise AssertionError("Kernel with nu = " + str(v) + "not implemented.")
- Z = Z * K
- return kappa * Z
-
- def spectral_kernel(self, a, b):
- if self.freq is not None:
- (n, d) = a.size()
- (m, d) = b.size()
- dist = torch.zeros(size=(n, m), dtype=torch.float64)
- c = 0
- for x in a:
- z = 0
- for y in b:
- dist[c, z] = torch.sum(torch.cos(torch.mm(x.view(1, 1) - y.view(1, 1), self.freq)))
- z = z + 1
- c = c + 1
- N = self.freq.size()[0]
- return torch.t(dist) / N
- else:
- raise AssertionError("No frequencies passed")
-
- def wiener_kernel(self, a, b):
- """
- Wiener process kernel
- k(x,y) = min(x,y)
- k(x,y) = \sum_i min(x_i,y_i)
- """
- (n, d) = a.size()
- (m, d) = b.size()
- dist = torch.zeros(size=(n, m))
- # dist = 0.1*np.eye(max(n,m))[0:m,0:n]
- c = 0
- for x in a:
- z = 0
- for y in b:
- print(x, y)
- dist[c, z] = torch.from_numpy(np.sum(np.min(np.array([x, y]), axis=0)))
- z = z + 1
- c = c + 1
-
- # print (dist)
- return dist.T
-
- def derivative_1(self, fixed, x):
- """
-
- """
- d = x.size()[1]
- n = x.size()[0]
-
- size = fixed.size()[0]
-
- if self.optkernel == "squared_exponential":
- k_original = self.squared_exponential_kernel(fixed, x)
- second = fixed.unsqueeze(1) - x
- second = second / self.gamma ** 2
- res = self.kappa * torch.einsum('ij,jik->ijk', k_original, second)
- else:
- raise AssertionError("Not implemented for this kernel")
-
- # result should be (n,d)
- return res
-
- def derivative_2(self, fixed, x):
- """
-
- """
- d = x.size()[1]
- n = x.size()[0]
-
- size = fixed.size()[0]
-
- if self.optkernel == "squared_exponential":
- k_original = self.squared_exponential_kernel(fixed, x)
- second = fixed.unsqueeze(1) - x
- second = second / self.gamma ** 2
- second2 = torch.einsum('ijk,ijl->ijkl', second, second)
- res1 = torch.einsum('ij,jikl->ijkl', k_original, second2)
-
- ones = torch.zeros(size=(size, n, d, d))
- for j in range(d):
- ones[:, :, j, j] = 1.
- ones = -ones / self.gamma ** 2
- res2 = torch.einsum('ij,jikl->ijkl', k_original, ones)
- res = self.kappa * (res1 + res2)
- # res = self.kappa * res2
- else:
- raise AssertionError("Not implemented for this kernel")
-
- return res
-
- def square_dist(self, a, b):
- if (a.shape == b.shape):
- normx = np.sum(a ** 2, axis=1).reshape(-1, 1)
- normy = np.sum(b ** 2, axis=1).reshape(-1, 1)
- else:
- normx = np.sum(a ** 2, axis=1).reshape(-1, 1)
- normy = np.sum(b ** 2, axis=1).reshape(-1, 1)
-
- product = b.dot(a.T)
- sqdist = np.tile(normx, b.shape[0]).T + np.tile(normy, a.shape[0]) - 2 * product
- return sqdist
+ def __init__(
+ self,
+ kernel_function=None,
+ kernel_name="squared_exponential",
+ freq=None,
+ groups=None,
+ d=1,
+ gamma: float = 1.0,
+ ard_gamma=None,
+ nu=1.5,
+ kappa: float = 1.0,
+ map=None,
+ power=2,
+ cov=None,
+ params=None,
+ group=None,
+ offset=0.0,
+ ):
+
+ if kernel_function is not None:
+ self.kernel_function = kernel_function
+ self.optkernel = "custom"
+ self.kappa = kappa
+ self.offset = offset
+ if params is None:
+ self.params = {"kappa": self.kappa}
+ else:
+ self.params = params
+ self.initial_params = self.params
+
+ if group is None:
+ self.group = [i for i in range(d)]
+ else:
+ self.group = group
+ self.d = d
+ else:
+ self.offset = offset
+ self.optkernel = kernel_name
+ self.gamma = gamma
+ if ard_gamma is None:
+ self.ard_gamma = torch.ones(d).double()
+ else:
+ try:
+ self.ard_gamma = torch.tensor([ard_gamma]).double()
+ except:
+ self.ard_gamma = ard_gamma
+ self.power = power
+ self.v = nu
+
+ if params is not None:
+ self.initial_params = params
+ else:
+ self.initial_params = {"kappa": kappa}
+
+ if cov is None:
+ self.cov = torch.eye(d).double()
+ else:
+ self.cov = cov
+
+ if group is None:
+ self.group = [i for i in range(d)]
+ else:
+ self.group = group
+
+ self.map = map
+ self.groups = groups
+ self.kappa = kappa
+ self.freq = freq
+ self.d = d
+ self.add = False
+
+ self.kernel_function_list = [self.get_kernel_internal()]
+ self.kernel_diag_function_list = [self.get_kernel_internal(diag=True)]
+ self.optkernel_list = [self.optkernel]
+ self.params_dict = {"0": self.params}
+ self.kernel_items = 1
+
+ self.operations = ["-"]
+
+ def __combine__(self, second_kernel_object):
+ self.kernel_function_list = (
+ self.kernel_function_list + second_kernel_object.kernel_function_list
+ )
+ self.optkernel_list = self.optkernel_list + second_kernel_object.optkernel_list
+ self.operations = self.operations + second_kernel_object.operations[1:]
+ for key, value in second_kernel_object.params_dict.items():
+ self.params_dict[str(self.kernel_items)] = value
+ self.kernel_items += 1
+
+ def __add__(self, second_kernel_object):
+ self.__combine__(second_kernel_object)
+ diff = len(set(second_kernel_object.group) - set(self.group))
+ self.d += diff
+ self.operations.append("+")
+ return self
+
+ def __mul__(self, second_kernel_object):
+ self.__combine__(second_kernel_object)
+ self.operations.append("*")
+ return self
+
+ def description(self):
+ desc = "Kernel description:"
+ for index in range(0, self.kernel_items, 1):
+ desc = desc + "\n\n\tkernel: " + self.optkernel_list[index]
+ desc = desc + "\n\toperation: " + self.operations[index]
+ desc = (
+ desc
+ + "\n\t"
+ + "\n\t".join(
+ [
+ "{0}={1}".format(key, value)
+ for key, value in self.params_dict[str(index)].items()
+ ]
+ )
+ )
+ return desc
+
+ def add_groups(self, dict):
+ for a in self.params_dict.keys():
+ if a not in dict.keys():
+ dict[a] = {}
+ dict[a]["group"] = self.params_dict[a]["group"]
+ return dict
+
+ def kernel_diag(self, a, b, **kwargs):
+ if len(kwargs) > 0:
+ # params_dict = list(kwargs)
+ # we need to send
+ params_dict = kwargs
+ self.add_groups(params_dict)
+ else:
+ params_dict = self.params_dict
+
+ for i in range(0, len(self.kernel_function_list), 1):
+ k = self.kernel_diag_function_list[i]
+ if str(i) in params_dict.keys():
+ arg = params_dict[str(i)]
+ else:
+ arg = {}
+ if self.operations[i] == "+":
+ output = output + k(a, b, **arg)
+ elif self.operations[i] == "*":
+ output = output * k(a, b, **arg)
+ else:
+ output = k(a, b, **arg)
+
+ return output
+
+ def kernel(self, a, b, **kwargs):
+
+ if len(kwargs) > 0:
+ # params_dict = list(kwargs)
+ # we need to send
+ params_dict = kwargs
+ self.add_groups(params_dict)
+ else:
+ params_dict = self.params_dict
+
+ for i in range(0, len(self.kernel_function_list), 1):
+ k = self.kernel_function_list[i]
+ if str(i) in params_dict.keys():
+ arg = params_dict[str(i)]
+ else:
+ arg = {}
+ if self.operations[i] == "+":
+ output = output + k(a, b, **arg)
+ elif self.operations[i] == "*":
+ output = output * k(a, b, **arg)
+ else:
+ output = k(a, b, **arg)
+
+ return output
+
+ def get_param_refs(self):
+ return self.params_dict
+
+ def get_kernel(self):
+ return self.kernel
+
+ def integral(self, a_x, a_y, b_x, b_y):
+ if self.optkernel == "squared_exponential":
+ return squared_exponential_integral(a_x, a_y, b_x, b_y, **self.params)
+ else:
+ raise NotImplementedError()
+
+ def get_kernel_internal(self, diag=False):
+
+ self.params = {
+ **self.initial_params,
+ "kappa": self.kappa,
+ "group": self.group,
+ "offset": self.offset,
+ }
+
+ if self.optkernel == "squared_exponential":
+ self.params = dict(**self.params, **{"gamma": self.gamma})
+ if diag:
+ return squared_exponential_kernel_diag
+ else:
+ return self.squared_exponential_kernel
+
+ elif self.optkernel == "ard" and (self.groups is None):
+ self.params = dict(**self.params, **{"ard_gamma": self.ard_gamma})
+ if diag:
+ return self.ard_kernel
+ else:
+ return self.ard_kernel_diag
+
+ elif self.optkernel == "linear":
+ return self.linear_kernel
+
+ elif self.optkernel == "laplace":
+ self.params = dict(**self.params, **{"gamma": self.gamma})
+ return self.laplace_kernel
+
+ elif self.optkernel == "modified_matern":
+ self.params = dict(**self.params, **{"gamma": self.gamma, "nu": self.v})
+ return self.modified_matern_kernel
+
+ elif self.optkernel == "custom":
+ return self.kernel_function
+
+ elif self.optkernel == "tanh":
+ return self.tanh_kernel
+
+ elif self.optkernel == "step":
+ return self.step_kernel
+
+ elif self.optkernel == "angsim":
+ return self.angsim_kernel
+
+ elif self.optkernel == "matern":
+ self.params = dict(**self.params, **{"gamma": self.gamma, "nu": self.v})
+ return self.matern_kernel
+
+ elif self.optkernel == "ard_matern":
+ self.params = dict(
+ **self.params, **{"ard_gamma": self.ard_gamma, "nu": self.v}
+ )
+
+ if diag:
+ return self.ard_matern_kernel_diag
+ else:
+ return self.ard_matern_kernel
+
+ elif self.optkernel == "full_covariance_se":
+ self.params = dict(**self.params, **{"cov": self.cov})
+ return self.covar_kernel
+
+ elif self.optkernel == "full_covariance_matern":
+ self.params = dict(**self.params, **{"cov": self.cov, "nu": self.v})
+ return self.covar_kernel_matern
+
+ elif (self.optkernel == "polynomial") and (self.groups is None):
+ self.params = dict(**self.params, **{"degree": self.power})
+ return self.polynomial_kernel
+
+ elif (self.optkernel == "polynomial") and (self.groups is not None):
+ self.params = dict(
+ **self.params, **{"degree": self.power, "groups": self.groups}
+ )
+ return self.polynomial_additive_kernel
+
+ elif self.optkernel == "ard" and (self.groups is not None):
+ self.params = dict(
+ **self.params, **{"ard_gamma": self.ard_gamma, "groups": self.groups}
+ )
+ return self.ard_kernel_additive
+
+ elif self.optkernel == "squared_exponential_per_group" and (
+ self.groups is not None
+ ):
+ self.params = dict(**self.params, **{"groups": self.groups})
+ return self.squared_exponential_per_group_kernel_additive
+
+ elif self.optkernel == "ard_per_group" and (self.groups is not None):
+ self.params = dict(**self.params, **{"groups": self.groups})
+ return self.ard_per_group_kernel_additive
+
+ elif self.optkernel == "gibbs":
+ self.params = dict(**self.params, **{"groups": self.groups})
+ return self.gibbs_kernel
+
+ elif self.optkernel == "gibbs_custom":
+ self.params = dict(**self.params, **{"groups": self.groups})
+ return self.gibbs_custom_kernel
+
+ elif self.optkernel == "random_map":
+ return self.random_map_kernel
+
+ else:
+ raise AssertionError("Kernel not implemented.")
+
+ def embed(self, x):
+ if self.optkernel == "linear":
+ return x
+ else:
+ raise AttributeError(
+ "This type of kernel does not support a finite dimensional embedding"
+ )
+
+ def get_basis_size(self):
+ if self.optkernel == "linear":
+ return self.d
+ else:
+ raise AttributeError(
+ "This type of kernel does not support a finite dimensional embedding"
+ )
+
+ def step_kernel(self, a, b, **kwargs):
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ n, d = a.size()
+ m, d = b.size()
+
+ K = torch.zeros(size=(n, m)).double()
+
+ for i in range(n):
+ for j in range(m):
+ K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :])
+
+ return kappa * K.T
+
+ def linear_kernel(self, a, b, **kwargs):
+ """
+ GP linear kernel
+ """
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ if "offset" in kwargs.keys():
+ offset = kwargs["offset"]
+ else:
+ offset = self.offset
+ a = a[:, group]
+ b = b[:, group]
+ return kappa * (b @ a.T) + offset
+
+ def custom_map_kernel(self, a, b, **kwargs):
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ if "map" in kwargs.keys():
+ map = kwargs["map"]
+ else:
+ map = self.map
+
+ a = a[:, group]
+ b = b[:, group]
+
+ if map is not None:
+ return (
+ kappa
+ * self.linear_kernel(
+ torch.t(self.map.map(a)), torch.t(self.map.map(b))
+ ).detach()
+ )
+ else:
+ return kappa * self.linear_kernel(a, b)
+
+ def laplace_kernel(self, a, b, **kwargs):
+ if "gamma" in kwargs.keys():
+ gamma = kwargs["gamma"]
+ else:
+ gamma = self.gamma
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+ K = -manhattan_distances(a, b) / gamma**2
+ K = np.exp(K) # exponentiate K in-place
+ return kappa * torch.from_numpy(K).T
+
+ def squared_exponential_kernel(self, a, b, **kwargs):
+ """
+ GP squared exponential kernel
+ """
+ if "gamma" in kwargs.keys():
+ gamma = kwargs["gamma"]
+ else:
+ gamma = self.gamma
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+ # print (a.shape, b.shape)
+ normx = torch.sum(a**2, dim=1).view(-1, 1)
+ normy = torch.sum(b**2, dim=1).view(-1, 1)
+
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = (-0.5 / (gamma * gamma)) * sqdist
+ res = torch.exp(arg)
+ return kappa * res
+
+ def gibbs_custom_kernel(self, a, b, **kwargs):
+ if "gamma_fun" in kwargs.keys():
+ gamma_fun = kwargs["gamma_fun"]
+ else:
+ raise AttributeError("Missing gamma_fun in Gibbs kernel definition.")
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+ # print (a.shape, b.shape)
+ normx = torch.sum(a**2, dim=1).view(-1, 1)
+ normy = torch.sum(b**2, dim=1).view(-1, 1)
+
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+
+ lengthscales = gamma_fun(a, b)
+
+ arg = (-0.5 / lengthscales) * sqdist
+ res = torch.exp(arg)
+ return kappa * res
+
+ def gibbs_kernel(self, a, b, **kwargs):
+ if "gamma_fun" in kwargs.keys():
+ gamma_fun = kwargs["gamma_fun"]
+ else:
+ raise AttributeError("Missing gamma_fun in Gibbs kernel definition.")
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+ # print (a.shape, b.shape)
+ normx = torch.sum(a**2, dim=1).view(-1, 1)
+ normy = torch.sum(b**2, dim=1).view(-1, 1)
+
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+
+ lengthscales = gamma_fun(a) ** 2 + gamma_fun(b).T ** 2
+
+ print(lengthscales)
+
+ arg = (-0.5 / lengthscales) * sqdist
+ res = torch.exp(arg)
+ return kappa * res
+
+ def covar_kernel(self, a, b, **kwargs):
+ """
+ :param a:
+ :param b:
+ :param cov: square-root of the covariance matrix
+ :return:
+ """
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "cov" in kwargs.keys():
+ cov = kwargs["cov"]
+ else:
+ cov = self.cov
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+ a = torch.mm(a, cov)
+ b = torch.mm(b, cov)
+
+ normx = torch.sum(a**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(b**2, dim=1).reshape(-1, 1)
+
+ product = torch.mm(b, torch.t(a))
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ return kappa * res
+
+ def covar_kernel_matern(self, a, b, **kwargs):
+ """
+ :param a:
+ :param b:
+ :param cov: square-root of the covariance matrix
+ :return:
+ """
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "cov" in kwargs.keys():
+ cov = kwargs["cov"]
+ else:
+ cov = self.cov
+ if "v" in kwargs.keys():
+ v = kwargs["v"]
+ else:
+ v = self.v
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+ a = torch.mm(a, cov)
+ b = torch.mm(b, cov)
+
+ dists = torch.cdist(a, b, p=2).T
+
+ if v == 0.5:
+ K = torch.exp(-dists)
+ elif v == 1.5:
+ K = dists * np.sqrt(3)
+ K = (1.0 + K) * torch.exp(-K)
+ elif v == 2.5:
+ K = dists * np.sqrt(5)
+ K = (1.0 + K + K**2 / 3.0) * torch.exp(-K)
+ else: # general case; expensive to evaluate
+ K = dists
+ K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
+ tmp = np.sqrt(2 * v) * K
+ K.fill((2 ** (1.0 - v)) / math.gamma(v))
+ K *= tmp**v
+ K *= kv(v, tmp)
+ return kappa * K
+
+ def ard_kernel(self, a, b, **kwargs):
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "ard_gamma" in kwargs.keys():
+ gamma = kwargs["ard_gamma"]
+ else:
+ gamma = self.ard_gamma
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ D = torch.diag(1.0 / (gamma[group]))
+ a = torch.mm(a, D)
+ b = torch.mm(b, D)
+ normx = torch.sum(a**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(b**2, dim=1).reshape(-1, 1)
+
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ return kappa * res
+
+ def ard_kernel_diag(self, a, b, **kwargs):
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "ard_gamma" in kwargs.keys():
+ gamma = kwargs["ard_gamma"]
+ else:
+ gamma = self.ard_gamma
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ D = torch.diag(1.0 / (gamma[group]))
+ a = torch.mm(a, D)
+ b = torch.mm(b, D)
+ normx = torch.sum(a**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(b**2, dim=1).reshape(-1, 1)
+
+ product = torch.mm(b, torch.t(a))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ return kappa * res
+
+ def ard_per_group_kernel_additive(self, a, b, **kwargs):
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "groups" in kwargs.keys():
+ groups = kwargs["groups"]
+ else:
+ groups = self.groups
+
+ if "ard_per_group" in kwargs.keys():
+ ard_per_group = kwargs["ard_per_group"]
+ else:
+ raise AssertionError(
+ "This kernel requires 'ard_per_group' initial parameters"
+ )
+
+ (n, z) = tuple(a.size())
+ (q, m) = tuple(b.size())
+
+ r = torch.zeros(size=(q, n), dtype=torch.float64)
+ groups_index = 0
+
+ for group_add in groups:
+ kwargs["group"] = group_add
+
+ size_group = len(group_add)
+ # use per group lenghtscale
+ # kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group]
+ gamma = ard_per_group[groups_index : groups_index + size_group]
+ groups_index += size_group
+
+ ax = a[:, group_add]
+ bx = b[:, group_add]
+ D = torch.diag(1.0 / (gamma))
+ ax = torch.mm(ax, D)
+ bx = torch.mm(bx, D)
+ normx = torch.sum(ax**2, dim=1).reshape(-1, 1)
+ normy = torch.sum(bx**2, dim=1).reshape(-1, 1)
+ product = torch.mm(bx, torch.t(ax))
+ # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product
+ sqdist = -2 * product + torch.t(normx) + normy
+ arg = -0.5 * sqdist
+ res = torch.exp(arg)
+ r = r + res
+
+ r = r / float(len(groups))
+ return kappa * r
+
+ def squared_exponential_per_group_kernel_additive(self, a, b, **kwargs):
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "groups" in kwargs.keys():
+ groups = kwargs["groups"]
+ else:
+ groups = self.groups
+
+ if "gamma_per_group" in kwargs.keys():
+ gamma_per_group = kwargs["gamma_per_group"]
+ else:
+ raise AssertionError(
+ "This kernel requires 'gamma_per_group' initial parameters"
+ )
+
+ (n, z) = tuple(a.size())
+ (q, m) = tuple(b.size())
+
+ r = torch.zeros(size=(q, n), dtype=torch.float64)
+
+ for group_add, gamma in zip(groups, gamma_per_group):
+ kwargs["group"] = group_add
+
+ # use per group lenghtscale
+ kwargs["gamma"] = gamma
+
+ r = r + self.squared_exponential_kernel(a, b, **kwargs)
+
+ r = kappa * r / float(len(groups))
+ return r
+
+ def ard_kernel_additive(self, a, b, **kwargs):
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "groups" in kwargs.keys():
+ groups = kwargs["groups"]
+ else:
+ groups = self.groups
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ (n, z) = tuple(a.size())
+ (q, m) = tuple(b.size())
+
+ r = torch.zeros(size=(q, n), dtype=torch.float64)
+
+ for group_add in groups:
+ kwargs["group"] = group_add
+ r = r + self.ard_kernel(a, b, **kwargs)
+
+ r = r / float(len(groups))
+ return r
+
+ def tanh_kernel(self, a, b, **kwargs):
+ """
+ GP squared exponential kernel
+ """
+ # print (a.shape, b.shape)
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ X, Y = check_pairwise_arrays(a.numpy(), b.numpy())
+ K = manhattan_distances(a.numpy(), b.numpy())
+ K = K.T
+ eps = 10e-10
+ q = 3
+ A = (np.tanh(K) ** q) / (eps + K**q)
+ return kappa * torch.from_numpy(A)
+
+ def angsim_kernel(self, a, b, **kwargs):
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ return kappa * (2.0 / np.pi) * np.arcsin((a.dot(b)) / (a.norm() * b.norm()))
+
+ def polynomial_kernel(self, a, b, **kwargs):
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+ if "degree" in kwargs.keys():
+ power = kwargs["degree"]
+ else:
+ power = self.power
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ K = (torch.mm(b, torch.t(a)) + 1) ** power
+ return kappa * K
+
+ def polynomial_additive_kernel(self, a, b, **kwargs):
+
+ if "groups" in kwargs.keys():
+ groups = kwargs["groups"]
+ else:
+ groups = self.groups
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ (n, z) = tuple(a.size())
+ (q, m) = tuple(b.size())
+ no_groups = float(len(groups))
+ r = torch.zeros(size=(q, n), dtype=torch.float64)
+ for i, group in enumerate(groups):
+ z = self.polynomial_kernel(a[:, group], b[:, group], **kwargs)
+ r = r + z
+ r = r / no_groups
+ return r
+
+ def matern_kernel(self, a, b, **kwargs):
+ """
+ :param a: matrices
+ :param b: matrices
+ :param gamma: smoothness
+ :param v: Bessel function type
+ :return:
+ """
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "nu" in kwargs.keys():
+ v = kwargs["nu"]
+ else:
+ v = self.v
+
+ if "gamma" in kwargs.keys():
+ gamma = kwargs["gamma"]
+ else:
+ gamma = self.gamma
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group].numpy()
+ b = b[:, group].numpy()
+
+ dists = cdist(a / gamma, b / gamma, metric="euclidean").T
+ if v == 0.5:
+ K = np.exp(-dists)
+ elif v == 1.5:
+ K = dists * math.sqrt(3)
+ K = (1.0 + K) * np.exp(-K)
+ elif v == 2.5:
+ K = dists * math.sqrt(5)
+ K = (1.0 + K + K**2 / 3.0) * np.exp(-K)
+ else: # general case; expensive to evaluate
+ K = dists
+ K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
+ tmp = math.sqrt(2 * v) * K
+ K.fill((2 ** (1.0 - v)) / math.gamma(v))
+ K *= tmp**v
+ K *= kv(v, tmp)
+ return kappa * torch.from_numpy(K)
+
+ def ard_matern_kernel_diag(self, a, b, **kwargs):
+ """
+ :param a: matrices
+ :param b: matrices
+ :param gamma: smoothness
+ :param v: Bessel function type
+ :return:
+ """
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "nu" in kwargs.keys():
+ v = kwargs["nu"]
+ else:
+ v = self.v
+
+ if "ard_gamma" in kwargs.keys():
+ ard_gamma = kwargs["ard_gamma"]
+ else:
+ ard_gamma = self.ard_gamma
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ D = torch.diag(1.0 / (ard_gamma[group]))
+ a = torch.mm(a, D)
+ b = torch.mm(b, D)
+
+ a = a[:, group]
+ b = b[:, group]
+
+ # dists = torch.cdist(a , b , p = 2).T
+ dists = torch.sqrt(torch.sum((a - b) ** 2))
+
+ if v == 0.5:
+ K = torch.exp(-dists)
+ elif v == 1.5:
+ K = dists * np.sqrt(3)
+ K = (1.0 + K) * torch.exp(-K)
+ elif v == 2.5:
+ K = dists * np.sqrt(5)
+ K = (1.0 + K + K**2 / 3.0) * torch.exp(-K)
+ else: # general case; expensive to evaluate
+ K = dists
+ K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
+ tmp = np.sqrt(2 * v) * K
+ K.fill((2 ** (1.0 - v)) / math.gamma(v))
+ K *= tmp**v
+ K *= kv(v, tmp)
+ return kappa * K
+
+ def ard_matern_kernel(self, a, b, **kwargs):
+ """
+ :param a: matrices
+ :param b: matrices
+ :param gamma: smoothness
+ :param v: Bessel function type
+ :return:
+ """
+
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "nu" in kwargs.keys():
+ v = kwargs["nu"]
+ else:
+ v = self.v
+
+ if "ard_gamma" in kwargs.keys():
+ ard_gamma = kwargs["ard_gamma"]
+ else:
+ ard_gamma = self.ard_gamma
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ D = torch.diag(1.0 / (ard_gamma[group]))
+ a = torch.mm(a, D)
+ b = torch.mm(b, D)
+
+ a = a[:, group]
+ b = b[:, group]
+
+ dists = torch.cdist(a, b, p=2).T
+
+ if v == 0.5:
+ K = torch.exp(-dists)
+ elif v == 1.5:
+ K = dists * np.sqrt(3)
+ K = (1.0 + K) * torch.exp(-K)
+ elif v == 2.5:
+ K = dists * np.sqrt(5)
+ K = (1.0 + K + K**2 / 3.0) * torch.exp(-K)
+ else: # general case; expensive to evaluate
+ K = dists
+ K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
+ tmp = np.sqrt(2 * v) * K
+ K.fill((2 ** (1.0 - v)) / math.gamma(v))
+ K *= tmp**v
+ K *= kv(v, tmp)
+ return kappa * K
+
+ def modified_matern_kernel(self, X, Y, **kwargs):
+ """
+ :param a: matrices
+ :param b: matrices
+ :param gamma: smoothness
+ :param v: Bessel function type
+ :return:
+ """
+ if "kappa" in kwargs.keys():
+ kappa = kwargs["kappa"]
+ else:
+ kappa = self.kappa
+
+ if "nu" in kwargs.keys():
+ v = kwargs["nu"]
+ else:
+ v = self.v
+
+ if "gamma" in kwargs.keys():
+ gamma = kwargs["gamma"]
+ else:
+ gamma = self.gamma
+
+ if "group" in kwargs.keys():
+ group = kwargs["group"]
+ else:
+ group = self.group
+
+ a = a[:, group]
+ b = b[:, group]
+
+ d = X.size()[1]
+ # Z = np.ones(shape = (X.shape[0],Y.shape[0]))
+ Z = torch.ones(size=(Y.size()[0], X.size()[0]), dtype=torch.float64)
+ for i in range(d):
+ a = X[:, i].view(-1, 1)
+ b = Y[:, i].view(-1, 1)
+ # dists = cdist(a/gamma,b/gamma,metric='cityblock').T
+ dists = cdist(a.numpy() / gamma, b.numpy() / gamma, metric="euclidean").T
+ # dists = manhattan_distances(a, b).T/ gamma
+ dists = torch.from_numpy(dists)
+ if v == 1:
+ K = torch.exp(-dists)
+ elif v == 2:
+ K = (1 + dists) * torch.exp(-dists)
+ elif v == 3:
+ K = (dists**2 + 3 * torch.abs(dists) + 3) * torch.exp(-dists) / 3.0
+ elif v == 4:
+ K = (
+ (dists**3 + 6 * dists**2 + 15 * torch.abs(dists) + 15)
+ * torch.exp(-dists)
+ / 15.0
+ )
+ else:
+ raise AssertionError("Kernel with nu = " + str(v) + "not implemented.")
+ Z = Z * K
+ return kappa * Z
+
+ def spectral_kernel(self, a, b):
+ if self.freq is not None:
+ (n, d) = a.size()
+ (m, d) = b.size()
+ dist = torch.zeros(size=(n, m), dtype=torch.float64)
+ c = 0
+ for x in a:
+ z = 0
+ for y in b:
+ dist[c, z] = torch.sum(
+ torch.cos(torch.mm(x.view(1, 1) - y.view(1, 1), self.freq))
+ )
+ z = z + 1
+ c = c + 1
+ N = self.freq.size()[0]
+ return torch.t(dist) / N
+ else:
+ raise AssertionError("No frequencies passed")
+
+ def wiener_kernel(self, a, b):
+ """
+ Wiener process kernel
+ k(x,y) = min(x,y)
+ k(x,y) = \sum_i min(x_i,y_i)
+ """
+ (n, d) = a.size()
+ (m, d) = b.size()
+ dist = torch.zeros(size=(n, m))
+ # dist = 0.1*np.eye(max(n,m))[0:m,0:n]
+ c = 0
+ for x in a:
+ z = 0
+ for y in b:
+ print(x, y)
+ dist[c, z] = torch.from_numpy(np.sum(np.min(np.array([x, y]), axis=0)))
+ z = z + 1
+ c = c + 1
+
+ # print (dist)
+ return dist.T
+
+ def derivative_1(self, fixed, x):
+ """ """
+ d = x.size()[1]
+ n = x.size()[0]
+
+ size = fixed.size()[0]
+
+ if self.optkernel == "squared_exponential":
+ k_original = self.squared_exponential_kernel(fixed, x)
+ second = fixed.unsqueeze(1) - x
+ second = second / self.gamma**2
+ res = self.kappa * torch.einsum("ij,jik->ijk", k_original, second)
+ else:
+ raise AssertionError("Not implemented for this kernel")
+
+ # result should be (n,d)
+ return res
+
+ def derivative_2(self, fixed, x):
+ """ """
+ d = x.size()[1]
+ n = x.size()[0]
+
+ size = fixed.size()[0]
+
+ if self.optkernel == "squared_exponential":
+ k_original = self.squared_exponential_kernel(fixed, x)
+ second = fixed.unsqueeze(1) - x
+ second = second / self.gamma**2
+ second2 = torch.einsum("ijk,ijl->ijkl", second, second)
+ res1 = torch.einsum("ij,jikl->ijkl", k_original, second2)
+
+ ones = torch.zeros(size=(size, n, d, d))
+ for j in range(d):
+ ones[:, :, j, j] = 1.0
+ ones = -ones / self.gamma**2
+ res2 = torch.einsum("ij,jikl->ijkl", k_original, ones)
+ res = self.kappa * (res1 + res2)
+ # res = self.kappa * res2
+ else:
+ raise AssertionError("Not implemented for this kernel")
+
+ return res
+
+ def square_dist(self, a, b):
+ if a.shape == b.shape:
+ normx = np.sum(a**2, axis=1).reshape(-1, 1)
+ normy = np.sum(b**2, axis=1).reshape(-1, 1)
+ else:
+ normx = np.sum(a**2, axis=1).reshape(-1, 1)
+ normy = np.sum(b**2, axis=1).reshape(-1, 1)
+
+ product = b.dot(a.T)
+ sqdist = np.tile(normx, b.shape[0]).T + np.tile(normy, a.shape[0]) - 2 * product
+ return sqdist
diff --git a/stpy/legacy/integral_kernels.py b/stpy/legacy/integral_kernels.py
index fef208c..d56aa98 100755
--- a/stpy/legacy/integral_kernels.py
+++ b/stpy/legacy/integral_kernels.py
@@ -9,576 +9,614 @@
class IntegralKernel:
- def __init__(self, dataset, s=0.1):
-
- self.x = dataset[0]
- self.y = dataset[1]
-
- self.s = s
- self.gamma = 1.0
- self.distibution = lambda size: torch.from_numpy(np.random.normal(size=size) * (1. / self.gamma))
-
- self.n = self.x.size()[0]
- self.d = self.x.size()[1]
-
- self.basis_func = lambda x, theta: torch.cat((torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1)
- self.size = 2
-
- self.set = []
- self.weights = []
- self.params = []
- self.active_basis = None
-
- def set_distribution(self, distibution):
- self.distibution = distibution
-
- def set_basis_function(self, fun, size):
- self.basis_func = fun
- self.size = size
-
- def sample_basis_function(self):
- param = self.distibution(self.d).view(-1, 1)
- return [self.get_basis_function(param), param]
-
- def sample_basis_function_qmc(self, size=1):
- inv_cum_dist = lambda x: norm.ppf(x) * (1. / 1.)
- params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d)))
- return params
-
- def sample_basis_vector(self):
- fun = self.sample_basis_function()[0]
- return fun(self.x).view(-1) / np.sqrt(self.n)
-
- def get_basis_function(self, param):
- return lambda x: self.basis_func(param, x)
-
- def add_to_basis(self, fun, weight, param):
- self.set.append(fun)
- self.weights.append(weight)
- self.params.append(param)
-
- def basis_func_dataset(self, param):
- return self.basis_func(param, self.x).view(-1) / np.sqrt(self.n)
-
- def basis_map_set(self, x, set, weights):
- value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64)
- # print (value.size(),x.size(),self.set[0](x).view(-1).size())
- for index, elem in enumerate(set):
- # print (np.sqrt(np.array(self.weights[index]).astype(complex)))
- value[index, :] = elem(x).view(-1) / np.sqrt(self.n) # * np.sqrt(weights[index])
- return value
-
- def empty(self):
- self.active_basis = None
- self.set = []
- self.weights = []
- self.params = []
-
- def empty_add_random(self):
- self.empty()
- self.random_increase(1)
-
- def basis_map(self, x):
- return self.basis_map_set(x, self.set, self.weights)
-
- def kernel(self, x, y, noise=True):
- value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64)
-
- for index, elem in enumerate(self.set):
- value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index]
- if noise == True:
- value = value + self.s * self.s * torch.eye(x.size()[0], y.size()[0], dtype=torch.float64)
-
- return value
-
- def outer_kernel(self, x):
- Phi = self.basis_map(x)
- value = torch.mm(Phi, torch.t(Phi))
- return value
-
- def expected_phi(self, x, base=10000):
- Ephi = torch.zeros(x.size()[0] * self.size, dtype=torch.float64)
- for _ in range(base):
- Ephi += self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n)
- Ephi = Ephi / base
- return Ephi
-
- def expected_phi_squared(self, x, fun, base=10000):
- prod = 0
- v = fun(x).view(-1) / np.sqrt(self.n)
- for _ in range(base):
- sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n)
- prod += torch.dot(sample, v) ** 2
- prod = prod / base
- return prod
-
- def expected_phi_squared_set(self, x, base=10000):
- v = self.active_basis
-
- prod = torch.zeros(x.size()[0], )
- for _ in range(base):
- sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n)
- prod += torch.mm(sample, v) ** 2
- prod = prod / base
- return prod
-
- def update_basis(self):
- if self.active_basis is None:
- Phi = self.basis_map(self.x)
- self.active_basis = Phi
- W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64)
- self.W_inv = torch.inverse(W)
- else:
- v = self.set[-1](self.x).view(1, -1) / np.sqrt(self.x.size()[0])
- self.active_basis = torch.cat((self.active_basis, v), dim=0)
- W = torch.mm(self.active_basis, torch.t(self.active_basis)) + self.s * self.s * torch.eye(len(self.set),
- dtype=torch.float64)
- self.W_inv = torch.inverse(W)
-
- """
+ def __init__(self, dataset, s=0.1):
+
+ self.x = dataset[0]
+ self.y = dataset[1]
+
+ self.s = s
+ self.gamma = 1.0
+ self.distibution = lambda size: torch.from_numpy(
+ np.random.normal(size=size) * (1.0 / self.gamma)
+ )
+
+ self.n = self.x.size()[0]
+ self.d = self.x.size()[1]
+
+ self.basis_func = lambda x, theta: torch.cat(
+ (torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1
+ )
+ self.size = 2
+
+ self.set = []
+ self.weights = []
+ self.params = []
+ self.active_basis = None
+
+ def set_distribution(self, distibution):
+ self.distibution = distibution
+
+ def set_basis_function(self, fun, size):
+ self.basis_func = fun
+ self.size = size
+
+ def sample_basis_function(self):
+ param = self.distibution(self.d).view(-1, 1)
+ return [self.get_basis_function(param), param]
+
+ def sample_basis_function_qmc(self, size=1):
+ inv_cum_dist = lambda x: norm.ppf(x) * (1.0 / 1.0)
+ params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d)))
+ return params
+
+ def sample_basis_vector(self):
+ fun = self.sample_basis_function()[0]
+ return fun(self.x).view(-1) / np.sqrt(self.n)
+
+ def get_basis_function(self, param):
+ return lambda x: self.basis_func(param, x)
+
+ def add_to_basis(self, fun, weight, param):
+ self.set.append(fun)
+ self.weights.append(weight)
+ self.params.append(param)
+
+ def basis_func_dataset(self, param):
+ return self.basis_func(param, self.x).view(-1) / np.sqrt(self.n)
+
+ def basis_map_set(self, x, set, weights):
+ value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64)
+ # print (value.size(),x.size(),self.set[0](x).view(-1).size())
+ for index, elem in enumerate(set):
+ # print (np.sqrt(np.array(self.weights[index]).astype(complex)))
+ value[index, :] = elem(x).view(-1) / np.sqrt(
+ self.n
+ ) # * np.sqrt(weights[index])
+ return value
+
+ def empty(self):
+ self.active_basis = None
+ self.set = []
+ self.weights = []
+ self.params = []
+
+ def empty_add_random(self):
+ self.empty()
+ self.random_increase(1)
+
+ def basis_map(self, x):
+ return self.basis_map_set(x, self.set, self.weights)
+
+ def kernel(self, x, y, noise=True):
+ value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64)
+
+ for index, elem in enumerate(self.set):
+ value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index]
+ if noise == True:
+ value = value + self.s * self.s * torch.eye(
+ x.size()[0], y.size()[0], dtype=torch.float64
+ )
+
+ return value
+
+ def outer_kernel(self, x):
+ Phi = self.basis_map(x)
+ value = torch.mm(Phi, torch.t(Phi))
+ return value
+
+ def expected_phi(self, x, base=10000):
+ Ephi = torch.zeros(x.size()[0] * self.size, dtype=torch.float64)
+ for _ in range(base):
+ Ephi += self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n)
+ Ephi = Ephi / base
+ return Ephi
+
+ def expected_phi_squared(self, x, fun, base=10000):
+ prod = 0
+ v = fun(x).view(-1) / np.sqrt(self.n)
+ for _ in range(base):
+ sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n)
+ prod += torch.dot(sample, v) ** 2
+ prod = prod / base
+ return prod
+
+ def expected_phi_squared_set(self, x, base=10000):
+ v = self.active_basis
+
+ prod = torch.zeros(
+ x.size()[0],
+ )
+ for _ in range(base):
+ sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n)
+ prod += torch.mm(sample, v) ** 2
+ prod = prod / base
+ return prod
+
+ def update_basis(self):
+ if self.active_basis is None:
+ Phi = self.basis_map(self.x)
+ self.active_basis = Phi
+ W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(
+ len(self.set), dtype=torch.float64
+ )
+ self.W_inv = torch.inverse(W)
+ else:
+ v = self.set[-1](self.x).view(1, -1) / np.sqrt(self.x.size()[0])
+ self.active_basis = torch.cat((self.active_basis, v), dim=0)
+ W = torch.mm(
+ self.active_basis, torch.t(self.active_basis)
+ ) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64)
+ self.W_inv = torch.inverse(W)
+
+ """
Scores
"""
- def leverage_score(self, fun, adding=True, weighted=False, variance=True):
-
- if adding == True:
- print(fun(self.x).size())
- v = fun(self.x) / np.sqrt(self.x.size()[0])
- new_active_basis = torch.cat((self.active_basis, v), dim=0)
- W = torch.mm(new_active_basis, torch.t(new_active_basis)) + self.s * self.s * torch.eye(len(self.set) + 1,
- dtype=torch.float64)
- W_inv = torch.inverse(W)
- Phi = new_active_basis
- else:
- W_inv = self.W_inv
- Phi = self.active_basis
-
- if weighted == True:
- S = torch.diag(torch.sqrt(torch.from_numpy(np.array(self.weights))))
- Phi = torch.mm(S, Phi)
- else:
- pass
- # solve leverage score problem
- A = torch.mm(torch.t(Phi), torch.mm(W_inv, Phi))
- rhs = fun(self.x).view(-1, 1) / np.sqrt(self.x.size()[0])
- # print (torch.mm(torch.t(rhs),rhs), torch.mm(torch.t(rhs),torch.mm(A,rhs)))
- if variance == True:
- leverage_score = np.abs(torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs))) / (
- self.s ** 2)
- else:
- leverage_score = np.abs(torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs)))
-
- return leverage_score
-
- def bayes_quad_score(self, fun, base=1000, Ephi=None):
- """
- Implements score Phi(set,X)E[Phi(x)]K^{-1}E[Phi(x)]Phi(X,set)
-
- :param fun: new basis function
- :param base: size of the basis to approximate the expected mapping
- :return:
- """
- if Ephi is None:
- Ephi = self.expected_phi(self.x, base=base).view(-1, 1)
- else:
- pass
- new_set = self.set.copy()
- new_set.append(fun)
- new_Phi = self.basis_map_set(self.x, new_set, np.ones(len(new_set)).tolist())
- W = torch.mm(new_Phi, torch.t(new_Phi)) + self.s * self.s * torch.eye(len(new_set), dtype=torch.float64)
- W_inv = torch.inverse(W)
- v = torch.mm(new_Phi, Ephi)
- score = torch.mm(torch.t(v), torch.mm(W_inv, v))
- return score
-
- def greedy_score(self, candidates):
- K = self.kernel(self.x, self.x, noise=False)
- scores = torch.zeros(len(candidates), dtype=torch.float64)
- for j in range(len(candidates)):
- fun = candidates[j]
- score = torch.norm(torch.mm(fun, torch.t(fun)) - K)
- # print(torch.norm(torch.mm(fun,torch.t(fun))),torch.norm(K))
- scores[j] = score
- return scores
-
- def herding_score(self, fun, base=1000, Ephi=None):
- # if Ephi is None:
- # Ephi = self.expected_phi(self.x, base=base).view(-1,1)
- # else:
- # pass
- #
- phi = fun(self.x).view(-1) / np.sqrt(self.n)
- Phi = self.active_basis
- n, m = Phi.size()
- v = 0.0
- for j in range(n):
- v = v + torch.dot(Phi[j, :], phi) ** 2
- v = (1. / (n + 1)) * v
- z = self.expected_phi_squared(self.x, fun, base=base)
- r = z - v
- return r
-
- def variance_scores(self, set=None):
- if set is None:
- Phi = self.basis_map_set(self.x, self.set, np.ones(len(self.set)).tolist())
- W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64)
- else:
- Phi = self.basis_map_set(self.x, set, np.ones(len(set)).tolist())
- W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(set), dtype=torch.float64)
- W_inv = torch.inverse(W)
- vars = torch.einsum('ji,ij->j', W, W_inv).view(-1, 1)
- return vars
-
- ###############################
- ## Increasing the basis size ##
- ###############################
-
- def seq_bayes_quad_increase_heuristic(self, size=1, candidates=10, base=100):
- """
- Implements sequential bayes quadrature with inexact optimization
- :param size:
- :param base:
- :return:
- """
- Ephi = self.expected_phi(self.x, base=base).view(-1, 1)
- for _ in range(size):
- funs = []
- scores = torch.zeros(candidates, dtype=torch.float64)
- params = []
- for j in range(candidates):
- fun, param = self.sample_basis_function()
- leverage_score = self.bayes_quad_score(fun, Ephi=Ephi)
- funs.append(fun)
- scores[j] = leverage_score
- params.append(param)
- argmax = torch.argmax(scores)
- self.add_to_basis(funs[argmax], 1.0, params[argmax])
- self.quadrature_weights()
-
- # def herding_exact_increase(self, size = 1):
- # """
- # Solves exactly the herding problem with a non-linear solver
- # :param size: size of the basis to be increase
- # :return: None
- # """
- # for _ in range(size):
- # #fun = lambda x: self.basis_func(param,x)
- # p = lambda omega: np.exp(-np.sum(omega ** 2, axis=1).reshape(-1, 1) / 2 * (self.gamma ** 2)) * np.power(
- # (self.gamma / np.sqrt(2 * np.pi)), 1.) * np.power(np.pi / 2, 1.)
- # ls = lambda param: -self.leverage_score(self.get_basis_function(torch.from_numpy(param).view(-1,1))).numpy()[0]*p(param.reshape(-1,1))[0]
- # # plot ls
- #
- #
- # # optimize leverage score
- # from scipy.optimize import minimize
- # start = self.distibution(self.d).view(-1, 1).numpy()
- # res = minimize(ls, start , method="L-BFGS-B", tol=0.0000001, bounds=[[-5,5]])
- # solution = torch.from_numpy(res.x).view(-1,1)
- #
- # #print (start, solution)
- # # params = np.linspace(-10, 10, 1000).reshape(-1, 1)
- # # lss = []
- # #
- # # for param in params:
- # # #print (param, p(param.reshape(-1,1))[0])
- # # lss.append(ls(param)*p(param.reshape(-1,1))[0])
- # # index = np.argmin(np.array(lss))
- # # solution = torch.from_numpy(params[index]).view(-1,1)
- # # plt.plot(params, lss)
- # # plt.plot(start,ls(start),'ro')
- # # plt.plot(solution.numpy(),ls(solution.numpy()),'go')
- # #plt.show()
- # #print(start, solution)
- # self.add_to_basis(self.get_basis_function(solution), 1., solution)
-
- def herding_increase_heuristic(self, size=1, candidates=100, base=1000):
- """
-
- :param size:
- :param base:
- :return:
- """
- Ephi = self.expected_phi(self.x, base=base)
- for _ in range(size):
- # print (_)
- self.update_basis()
- funs = []
- scores = torch.zeros(candidates, dtype=torch.float64)
- params = []
- for j in range(candidates):
- fun, param = self.sample_basis_function()
- leverage_score = self.herding_score(fun, Ephi=Ephi)
- # print (j, leverage_score)
- funs.append(fun)
- scores[j] = leverage_score
- params.append(param)
- argmax = torch.argmax(scores)
- self.add_to_basis(funs[argmax], 1., params[argmax])
- self.uniformize_weights()
-
- def herding_increase_heuristic_group(self, size=1, candidates=100, base=1000):
- """
-
- :param size:
- :param base:
- :return:
- """
- Ephi = self.expected_phi(self.x, base=base)
- for _ in range(size):
- # print (_)
- self.update_basis()
- funs = []
- params = []
- cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float)
- for j in range(candidates):
- fun, param = self.sample_basis_function()
- funs.append(fun)
- cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n)
- leverage_scores = self.herding_score_group(cand)
-
- argmax = torch.argmax(leverage_scores)
- self.add_to_basis(funs[argmax], 1., params[argmax])
-
- self.uniformize_weights()
-
- def dpp_increase(self, size=1, candidates=1000):
- from dppy.finite_dpps import FiniteDPP
- funs = []
- params = []
- cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float64)
-
- for j in range(candidates):
- fun, param = self.sample_basis_function()
- funs.append(fun)
- params.append(param)
- cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n)
-
- # Random feature vectors
- Phi = torch.t(cand)
- L = Phi.numpy().T.dot(Phi.numpy()) + self.s * self.s * torch.eye(candidates, candidates,
- dtype=torch.float64).numpy()
- DPP = FiniteDPP('likelihood', **{'L': L})
- DPP.flush_samples()
- DPP.sample_exact_k_dpp(size=size)
- sample_ind = DPP.list_of_samples[0]
- for sample in sample_ind:
- self.add_to_basis(funs[sample], 1., params[sample])
- self.uniformize_weights()
-
- def leverage_score_sampling(self, size=1):
- count = 0
- self.update_basis()
- while count < size:
-
- fun, param = self.sample_basis_function()
- leverage_score = self.leverage_score(fun)
- q_bar = size
-
- q = np.random.binomial(q_bar, float(leverage_score))
- # print(count, q, leverage_score)
- if q > 0:
- w = (q / q_bar) / leverage_score
-
- self.add_to_basis(fun, w, param)
- self.update_basis()
- # print("adding", w.float(), param)
- count += 1
- else:
- pass
- # print ("reject", q)
- # print ("sum", np.sum(self.weights))
- # self.uniformize_weights()
- # self.quadrature_weights()
- # self.leverage_weights()
- self.normalize_weights()
-
- # optimize omp weights
-
- def hermite_quadrature_basis(self, size=1):
- self.set = []
- self.weights = []
- self.params = []
-
- (nodes, weights) = np.polynomial.hermite.hermgauss(int(size))
- nodes = torch.from_numpy(np.sqrt(2) * nodes / self.gamma)
- weights = weights / np.sqrt(np.pi)
- # self.weights = weights.tolist()
- # print (self.weights)
- for index in range(size):
- fun = self.get_basis_function(nodes[index].view(self.d, -1))
- self.add_to_basis(fun, weights[index], nodes[index])
-
- def greedy_increase(self, size=1, base=100):
- for _ in range(size):
- # print (_)
- self.update_basis()
- funs = []
- params = []
- cand = torch.zeros(base, self.n, self.size, dtype=torch.float64)
- for j in range(base):
- fun, param = self.sample_basis_function()
- funs.append(fun)
- params.append(param)
- cand[j, :] = fun(self.x) # / np.sqrt(self.n)
-
- scores = self.greedy_score(cand)
- argmax = torch.argmin(scores)
- self.add_to_basis(funs[argmax], 1., params[argmax])
- self.normalize_weights()
-
- # print (self.params)
-
- def random_increase(self, size=1):
- for _ in range(size):
- f, param = self.sample_basis_function()
- self.add_to_basis(f, 1., param)
- self.uniformize_weights()
-
- def qmc_increase(self, size=1):
- params = self.sample_basis_function_qmc(size=size)
- n = params.size()[0]
- for j in range(n):
- param = params[j, :].view(1, -1)
- # print (params)
- self.add_to_basis(self.get_basis_function(param), 1., param)
- self.uniformize_weights()
-
- def bach_algortihm(self, size=1, candidates=100):
- for _ in range(size):
- set = []
- params = []
- for j in range(candidates):
- f, param = self.sample_basis_function()
- set.append(f)
- params.append(param)
- vars = self.variance_scores(set=set)
- index = np.argmax(-vars)
- self.add_to_basis(set[index], 1., params[index])
- vars = self.variance_scores()
- self.weights = vars.view(-1).tolist()
- self.normalize_weights()
-
- def pca(self, kernel, size=1):
- if size > self.n:
- size = self.n
- GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="svd")
- GP.fit_gp(self.x, self.y)
- return GP.outer_kernel()
-
- def nystrom(self, kernel, size=1):
- if size > self.n:
- size = self.n
- GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="uniform")
- GP.fit_gp(self.x, self.y)
- return GP.outer_kernel()
-
- ###########################
- ## weights optimization ##
- ###########################
-
- def normalize_weights(self):
-
- # self.weights = np.ones(len(self.set))/len(self.set)
- sum = np.sum(np.array(self.weights))
- self.weights = np.array(self.weights) / sum
- self.weights = self.weights.tolist()
-
- # print (self.weights)
-
- def uniformize_weights(self):
- self.weights = np.ones(len(self.set)) / len(self.set)
- self.weights = self.weights.tolist()
-
- # print (self.weights)
-
- def bayesian_quadrature_weights(self, base=1000):
- """
- Bayesian Quadrature weights
- two possible kernels
- :return:
- """
-
- phi = fun(self.x).view(-1) / np.sqrt(self.n)
- Phi = self.active_basis
- n, m = Phi.size()
-
- Z = self.expected_phi_squared_set(self.x, base=base)
-
- # assemble kernel
- K = self.outer_kernel(self.x) * self.outer_kernel(self.x)
- # invert kernel
- self.weights = torch.mm(torch.mm(Z, torch.pinverse(K)), Z)
- self.weights = self.weights.tolist()
-
- def leverage_weights(self):
-
- Phi = self.basis_map(self.x)
- self.active_basis = Phi
- W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64)
- self.W_inv = torch.inverse(W)
-
- new_weights = []
- n = len(self.set)
- for fun in self.set:
- leverage_score = self.leverage_score(fun, adding=False, variance=True, weighted=False)
- # print (leverage_score)
- new_weights.append(leverage_score)
- self.weights = new_weights
- self.normalize_weights()
-
- def leverage_weights_experimental(self, Kinv):
-
- Phi = self.basis_map(self.x)
- self.active_basis = Phi
- W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64)
- W_outer = torch.mm(torch.t(Phi), Phi) + self.s * self.s * torch.eye(self.n * 2, dtype=torch.float64)
- W_outer_inv = torch.inverse(W_outer)
- self.W_inv = torch.inverse(W)
-
- print(torch.norm(W_outer - Kinv))
-
- # print (Kinv)
- new_weights = []
- n = len(self.set)
- for fun in self.set:
- # leverage_score = self.leverage_score(fun, adding = False, variance = False, weighted= True)
- v = fun(self.x).view(-1, 1) / np.sqrt(self.n)
- # print (torch.trace(torch.mm(torch.t(v),v)))
- mat = torch.mm(torch.t(v), torch.mm(W_outer_inv, v))
- # print (mat)
- leverage_score = torch.trace(mat)
- if leverage_score > 0.0:
- # print ("Violation!")
- lv = self.leverage_score(fun, adding=False, variance=True, weighted=False)
- print(float(leverage_score), float(lv))
- # new_weights.append(float(2./(n*leverage_score)))
- new_weights.append(1. / (n * leverage_score))
- self.weights = new_weights
- self.normalize_weights()
-
- # print (self.weights)
- # print (self.params)
- # print(self.weights)
- def omp_optimize(self, size=1):
- pass
+ def leverage_score(self, fun, adding=True, weighted=False, variance=True):
+
+ if adding == True:
+ print(fun(self.x).size())
+ v = fun(self.x) / np.sqrt(self.x.size()[0])
+ new_active_basis = torch.cat((self.active_basis, v), dim=0)
+ W = torch.mm(
+ new_active_basis, torch.t(new_active_basis)
+ ) + self.s * self.s * torch.eye(len(self.set) + 1, dtype=torch.float64)
+ W_inv = torch.inverse(W)
+ Phi = new_active_basis
+ else:
+ W_inv = self.W_inv
+ Phi = self.active_basis
+
+ if weighted == True:
+ S = torch.diag(torch.sqrt(torch.from_numpy(np.array(self.weights))))
+ Phi = torch.mm(S, Phi)
+ else:
+ pass
+ # solve leverage score problem
+ A = torch.mm(torch.t(Phi), torch.mm(W_inv, Phi))
+ rhs = fun(self.x).view(-1, 1) / np.sqrt(self.x.size()[0])
+ # print (torch.mm(torch.t(rhs),rhs), torch.mm(torch.t(rhs),torch.mm(A,rhs)))
+ if variance == True:
+ leverage_score = np.abs(
+ torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs))
+ ) / (self.s**2)
+ else:
+ leverage_score = np.abs(
+ torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs))
+ )
+
+ return leverage_score
+
+ def bayes_quad_score(self, fun, base=1000, Ephi=None):
+ """
+ Implements score Phi(set,X)E[Phi(x)]K^{-1}E[Phi(x)]Phi(X,set)
+
+ :param fun: new basis function
+ :param base: size of the basis to approximate the expected mapping
+ :return:
+ """
+ if Ephi is None:
+ Ephi = self.expected_phi(self.x, base=base).view(-1, 1)
+ else:
+ pass
+ new_set = self.set.copy()
+ new_set.append(fun)
+ new_Phi = self.basis_map_set(self.x, new_set, np.ones(len(new_set)).tolist())
+ W = torch.mm(new_Phi, torch.t(new_Phi)) + self.s * self.s * torch.eye(
+ len(new_set), dtype=torch.float64
+ )
+ W_inv = torch.inverse(W)
+ v = torch.mm(new_Phi, Ephi)
+ score = torch.mm(torch.t(v), torch.mm(W_inv, v))
+ return score
+
+ def greedy_score(self, candidates):
+ K = self.kernel(self.x, self.x, noise=False)
+ scores = torch.zeros(len(candidates), dtype=torch.float64)
+ for j in range(len(candidates)):
+ fun = candidates[j]
+ score = torch.norm(torch.mm(fun, torch.t(fun)) - K)
+ # print(torch.norm(torch.mm(fun,torch.t(fun))),torch.norm(K))
+ scores[j] = score
+ return scores
+
+ def herding_score(self, fun, base=1000, Ephi=None):
+ # if Ephi is None:
+ # Ephi = self.expected_phi(self.x, base=base).view(-1,1)
+ # else:
+ # pass
+ #
+ phi = fun(self.x).view(-1) / np.sqrt(self.n)
+ Phi = self.active_basis
+ n, m = Phi.size()
+ v = 0.0
+ for j in range(n):
+ v = v + torch.dot(Phi[j, :], phi) ** 2
+ v = (1.0 / (n + 1)) * v
+ z = self.expected_phi_squared(self.x, fun, base=base)
+ r = z - v
+ return r
+
+ def variance_scores(self, set=None):
+ if set is None:
+ Phi = self.basis_map_set(self.x, self.set, np.ones(len(self.set)).tolist())
+ W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(
+ len(self.set), dtype=torch.float64
+ )
+ else:
+ Phi = self.basis_map_set(self.x, set, np.ones(len(set)).tolist())
+ W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(
+ len(set), dtype=torch.float64
+ )
+ W_inv = torch.inverse(W)
+ vars = torch.einsum("ji,ij->j", W, W_inv).view(-1, 1)
+ return vars
+
+ ###############################
+ ## Increasing the basis size ##
+ ###############################
+
+ def seq_bayes_quad_increase_heuristic(self, size=1, candidates=10, base=100):
+ """
+ Implements sequential bayes quadrature with inexact optimization
+ :param size:
+ :param base:
+ :return:
+ """
+ Ephi = self.expected_phi(self.x, base=base).view(-1, 1)
+ for _ in range(size):
+ funs = []
+ scores = torch.zeros(candidates, dtype=torch.float64)
+ params = []
+ for j in range(candidates):
+ fun, param = self.sample_basis_function()
+ leverage_score = self.bayes_quad_score(fun, Ephi=Ephi)
+ funs.append(fun)
+ scores[j] = leverage_score
+ params.append(param)
+ argmax = torch.argmax(scores)
+ self.add_to_basis(funs[argmax], 1.0, params[argmax])
+ self.quadrature_weights()
+
+ # def herding_exact_increase(self, size = 1):
+ # """
+ # Solves exactly the herding problem with a non-linear solver
+ # :param size: size of the basis to be increase
+ # :return: None
+ # """
+ # for _ in range(size):
+ # #fun = lambda x: self.basis_func(param,x)
+ # p = lambda omega: np.exp(-np.sum(omega ** 2, axis=1).reshape(-1, 1) / 2 * (self.gamma ** 2)) * np.power(
+ # (self.gamma / np.sqrt(2 * np.pi)), 1.) * np.power(np.pi / 2, 1.)
+ # ls = lambda param: -self.leverage_score(self.get_basis_function(torch.from_numpy(param).view(-1,1))).numpy()[0]*p(param.reshape(-1,1))[0]
+ # # plot ls
+ #
+ #
+ # # optimize leverage score
+ # from scipy.optimize import minimize
+ # start = self.distibution(self.d).view(-1, 1).numpy()
+ # res = minimize(ls, start , method="L-BFGS-B", tol=0.0000001, bounds=[[-5,5]])
+ # solution = torch.from_numpy(res.x).view(-1,1)
+ #
+ # #print (start, solution)
+ # # params = np.linspace(-10, 10, 1000).reshape(-1, 1)
+ # # lss = []
+ # #
+ # # for param in params:
+ # # #print (param, p(param.reshape(-1,1))[0])
+ # # lss.append(ls(param)*p(param.reshape(-1,1))[0])
+ # # index = np.argmin(np.array(lss))
+ # # solution = torch.from_numpy(params[index]).view(-1,1)
+ # # plt.plot(params, lss)
+ # # plt.plot(start,ls(start),'ro')
+ # # plt.plot(solution.numpy(),ls(solution.numpy()),'go')
+ # #plt.show()
+ # #print(start, solution)
+ # self.add_to_basis(self.get_basis_function(solution), 1., solution)
+
+ def herding_increase_heuristic(self, size=1, candidates=100, base=1000):
+ """
+
+ :param size:
+ :param base:
+ :return:
+ """
+ Ephi = self.expected_phi(self.x, base=base)
+ for _ in range(size):
+ # print (_)
+ self.update_basis()
+ funs = []
+ scores = torch.zeros(candidates, dtype=torch.float64)
+ params = []
+ for j in range(candidates):
+ fun, param = self.sample_basis_function()
+ leverage_score = self.herding_score(fun, Ephi=Ephi)
+ # print (j, leverage_score)
+ funs.append(fun)
+ scores[j] = leverage_score
+ params.append(param)
+ argmax = torch.argmax(scores)
+ self.add_to_basis(funs[argmax], 1.0, params[argmax])
+ self.uniformize_weights()
+
+ def herding_increase_heuristic_group(self, size=1, candidates=100, base=1000):
+ """
+
+ :param size:
+ :param base:
+ :return:
+ """
+ Ephi = self.expected_phi(self.x, base=base)
+ for _ in range(size):
+ # print (_)
+ self.update_basis()
+ funs = []
+ params = []
+ cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float)
+ for j in range(candidates):
+ fun, param = self.sample_basis_function()
+ funs.append(fun)
+ cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n)
+ leverage_scores = self.herding_score_group(cand)
+
+ argmax = torch.argmax(leverage_scores)
+ self.add_to_basis(funs[argmax], 1.0, params[argmax])
+
+ self.uniformize_weights()
+
+ def dpp_increase(self, size=1, candidates=1000):
+ from dppy.finite_dpps import FiniteDPP
+
+ funs = []
+ params = []
+ cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float64)
+
+ for j in range(candidates):
+ fun, param = self.sample_basis_function()
+ funs.append(fun)
+ params.append(param)
+ cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n)
+
+ # Random feature vectors
+ Phi = torch.t(cand)
+ L = (
+ Phi.numpy().T.dot(Phi.numpy())
+ + self.s
+ * self.s
+ * torch.eye(candidates, candidates, dtype=torch.float64).numpy()
+ )
+ DPP = FiniteDPP("likelihood", **{"L": L})
+ DPP.flush_samples()
+ DPP.sample_exact_k_dpp(size=size)
+ sample_ind = DPP.list_of_samples[0]
+ for sample in sample_ind:
+ self.add_to_basis(funs[sample], 1.0, params[sample])
+ self.uniformize_weights()
+
+ def leverage_score_sampling(self, size=1):
+ count = 0
+ self.update_basis()
+ while count < size:
+
+ fun, param = self.sample_basis_function()
+ leverage_score = self.leverage_score(fun)
+ q_bar = size
+
+ q = np.random.binomial(q_bar, float(leverage_score))
+ # print(count, q, leverage_score)
+ if q > 0:
+ w = (q / q_bar) / leverage_score
+
+ self.add_to_basis(fun, w, param)
+ self.update_basis()
+ # print("adding", w.float(), param)
+ count += 1
+ else:
+ pass
+ # print ("reject", q)
+ # print ("sum", np.sum(self.weights))
+ # self.uniformize_weights()
+ # self.quadrature_weights()
+ # self.leverage_weights()
+ self.normalize_weights()
+
+ # optimize omp weights
+
+ def hermite_quadrature_basis(self, size=1):
+ self.set = []
+ self.weights = []
+ self.params = []
+
+ (nodes, weights) = np.polynomial.hermite.hermgauss(int(size))
+ nodes = torch.from_numpy(np.sqrt(2) * nodes / self.gamma)
+ weights = weights / np.sqrt(np.pi)
+ # self.weights = weights.tolist()
+ # print (self.weights)
+ for index in range(size):
+ fun = self.get_basis_function(nodes[index].view(self.d, -1))
+ self.add_to_basis(fun, weights[index], nodes[index])
+
+ def greedy_increase(self, size=1, base=100):
+ for _ in range(size):
+ # print (_)
+ self.update_basis()
+ funs = []
+ params = []
+ cand = torch.zeros(base, self.n, self.size, dtype=torch.float64)
+ for j in range(base):
+ fun, param = self.sample_basis_function()
+ funs.append(fun)
+ params.append(param)
+ cand[j, :] = fun(self.x) # / np.sqrt(self.n)
+
+ scores = self.greedy_score(cand)
+ argmax = torch.argmin(scores)
+ self.add_to_basis(funs[argmax], 1.0, params[argmax])
+ self.normalize_weights()
+
+ # print (self.params)
+
+ def random_increase(self, size=1):
+ for _ in range(size):
+ f, param = self.sample_basis_function()
+ self.add_to_basis(f, 1.0, param)
+ self.uniformize_weights()
+
+ def qmc_increase(self, size=1):
+ params = self.sample_basis_function_qmc(size=size)
+ n = params.size()[0]
+ for j in range(n):
+ param = params[j, :].view(1, -1)
+ # print (params)
+ self.add_to_basis(self.get_basis_function(param), 1.0, param)
+ self.uniformize_weights()
+
+ def bach_algortihm(self, size=1, candidates=100):
+ for _ in range(size):
+ set = []
+ params = []
+ for j in range(candidates):
+ f, param = self.sample_basis_function()
+ set.append(f)
+ params.append(param)
+ vars = self.variance_scores(set=set)
+ index = np.argmax(-vars)
+ self.add_to_basis(set[index], 1.0, params[index])
+ vars = self.variance_scores()
+ self.weights = vars.view(-1).tolist()
+ self.normalize_weights()
+
+ def pca(self, kernel, size=1):
+ if size > self.n:
+ size = self.n
+ GP = NystromFeatures(kernel, m=torch.tensor([size]), s=self.s, approx="svd")
+ GP.fit_gp(self.x, self.y)
+ return GP.outer_kernel()
+
+ def nystrom(self, kernel, size=1):
+ if size > self.n:
+ size = self.n
+ GP = NystromFeatures(kernel, m=torch.tensor([size]), s=self.s, approx="uniform")
+ GP.fit_gp(self.x, self.y)
+ return GP.outer_kernel()
+
+ ###########################
+ ## weights optimization ##
+ ###########################
+
+ def normalize_weights(self):
+
+ # self.weights = np.ones(len(self.set))/len(self.set)
+ sum = np.sum(np.array(self.weights))
+ self.weights = np.array(self.weights) / sum
+ self.weights = self.weights.tolist()
+
+ # print (self.weights)
+
+ def uniformize_weights(self):
+ self.weights = np.ones(len(self.set)) / len(self.set)
+ self.weights = self.weights.tolist()
+
+ # print (self.weights)
+
+ def bayesian_quadrature_weights(self, base=1000):
+ """
+ Bayesian Quadrature weights
+ two possible kernels
+ :return:
+ """
+
+ phi = fun(self.x).view(-1) / np.sqrt(self.n)
+ Phi = self.active_basis
+ n, m = Phi.size()
+
+ Z = self.expected_phi_squared_set(self.x, base=base)
+
+ # assemble kernel
+ K = self.outer_kernel(self.x) * self.outer_kernel(self.x)
+ # invert kernel
+ self.weights = torch.mm(torch.mm(Z, torch.pinverse(K)), Z)
+ self.weights = self.weights.tolist()
+
+ def leverage_weights(self):
+
+ Phi = self.basis_map(self.x)
+ self.active_basis = Phi
+ W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(
+ len(self.set), dtype=torch.float64
+ )
+ self.W_inv = torch.inverse(W)
+
+ new_weights = []
+ n = len(self.set)
+ for fun in self.set:
+ leverage_score = self.leverage_score(
+ fun, adding=False, variance=True, weighted=False
+ )
+ # print (leverage_score)
+ new_weights.append(leverage_score)
+ self.weights = new_weights
+ self.normalize_weights()
+
+ def leverage_weights_experimental(self, Kinv):
+
+ Phi = self.basis_map(self.x)
+ self.active_basis = Phi
+ W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(
+ len(self.set), dtype=torch.float64
+ )
+ W_outer = torch.mm(torch.t(Phi), Phi) + self.s * self.s * torch.eye(
+ self.n * 2, dtype=torch.float64
+ )
+ W_outer_inv = torch.inverse(W_outer)
+ self.W_inv = torch.inverse(W)
+
+ print(torch.norm(W_outer - Kinv))
+
+ # print (Kinv)
+ new_weights = []
+ n = len(self.set)
+ for fun in self.set:
+ # leverage_score = self.leverage_score(fun, adding = False, variance = False, weighted= True)
+ v = fun(self.x).view(-1, 1) / np.sqrt(self.n)
+ # print (torch.trace(torch.mm(torch.t(v),v)))
+ mat = torch.mm(torch.t(v), torch.mm(W_outer_inv, v))
+ # print (mat)
+ leverage_score = torch.trace(mat)
+ if leverage_score > 0.0:
+ # print ("Violation!")
+ lv = self.leverage_score(
+ fun, adding=False, variance=True, weighted=False
+ )
+ print(float(leverage_score), float(lv))
+ # new_weights.append(float(2./(n*leverage_score)))
+ new_weights.append(1.0 / (n * leverage_score))
+ self.weights = new_weights
+ self.normalize_weights()
+
+ # print (self.weights)
+ # print (self.params)
+ # print(self.weights)
+ def omp_optimize(self, size=1):
+ pass
if __name__ == "__main__":
- d = 1
- n = 1024
- N = 100
- L_infinity_ball = 1
- s = 0.001
- xtest = torch.from_numpy(interval(n, d))
- # x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)))
- x = torch.from_numpy(np.linspace(-1, 1, N)).view(N, d)
- f = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
- y = f(x)
-
- IK = IntegralKernel([x, y], s=s)
- IK.random_increase(1000)
- IK.uniformize_weights()
- IK.quadrature_weights()
-
- fun = IK.sample_basis_function()[0]
- print(IK.bayes_quad_score(fun))
+ d = 1
+ n = 1024
+ N = 100
+ L_infinity_ball = 1
+ s = 0.001
+ xtest = torch.from_numpy(interval(n, d))
+ # x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)))
+ x = torch.from_numpy(np.linspace(-1, 1, N)).view(N, d)
+ f = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
+ y = f(x)
+
+ IK = IntegralKernel([x, y], s=s)
+ IK.random_increase(1000)
+ IK.uniformize_weights()
+ IK.quadrature_weights()
+
+ fun = IK.sample_basis_function()[0]
+ print(IK.bayes_quad_score(fun))
diff --git a/stpy/legacy/integral_kernels2.py b/stpy/legacy/integral_kernels2.py
index d442d8d..b5699b5 100755
--- a/stpy/legacy/integral_kernels2.py
+++ b/stpy/legacy/integral_kernels2.py
@@ -7,133 +7,143 @@
class IntegralKernel:
- def __init__(self, dataset, s=0.1):
+ def __init__(self, dataset, s=0.1):
- self.x = dataset[0]
- self.y = dataset[1]
+ self.x = dataset[0]
+ self.y = dataset[1]
- self.s = s
- self.gamma = 1.0
- self.distibution = lambda size: torch.from_numpy(np.random.normal(size=size) * (1. / self.gamma))
+ self.s = s
+ self.gamma = 1.0
+ self.distibution = lambda size: torch.from_numpy(
+ np.random.normal(size=size) * (1.0 / self.gamma)
+ )
- self.n = self.x.size()[0]
- self.d = self.x.size()[1]
+ self.n = self.x.size()[0]
+ self.d = self.x.size()[1]
- self.basis_func = lambda x, theta: torch.cat((torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1)
- self.size = 2
+ self.basis_func = lambda x, theta: torch.cat(
+ (torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1
+ )
+ self.size = 2
- self.set = []
- self.weights = []
- self.params = []
- self.active_basis = None
+ self.set = []
+ self.weights = []
+ self.params = []
+ self.active_basis = None
- def set_distribution(self, distibution):
- self.distibution = distibution
+ def set_distribution(self, distibution):
+ self.distibution = distibution
- def set_basis_function(self, fun, size):
- self.basis_func = fun
- self.size = size
+ def set_basis_function(self, fun, size):
+ self.basis_func = fun
+ self.size = size
- def sample_basis_function(self):
- param = self.distibution(self.d).view(-1, 1)
- return [self.get_basis_function(param), param]
+ def sample_basis_function(self):
+ param = self.distibution(self.d).view(-1, 1)
+ return [self.get_basis_function(param), param]
- def sample_basis_function_qmc(self, size=1):
- inv_cum_dist = lambda x: norm.ppf(x) * (1. / 1.)
- params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d)))
- return params
+ def sample_basis_function_qmc(self, size=1):
+ inv_cum_dist = lambda x: norm.ppf(x) * (1.0 / 1.0)
+ params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d)))
+ return params
- def sample_basis_vector(self):
- fun = self.sample_basis_function()[0]
- return fun(self.x).view(-1) / np.sqrt(self.n)
+ def sample_basis_vector(self):
+ fun = self.sample_basis_function()[0]
+ return fun(self.x).view(-1) / np.sqrt(self.n)
- def get_basis_function(self, param):
- return lambda x: self.basis_func(param, x)
+ def get_basis_function(self, param):
+ return lambda x: self.basis_func(param, x)
- def add_to_basis(self, fun, weight, param):
- self.set.append(fun)
- self.weights.append(weight)
- self.params.append(param)
+ def add_to_basis(self, fun, weight, param):
+ self.set.append(fun)
+ self.weights.append(weight)
+ self.params.append(param)
- def empty(self):
- self.active_basis = None
- self.set = []
- self.weights = []
- self.params = []
+ def empty(self):
+ self.active_basis = None
+ self.set = []
+ self.weights = []
+ self.params = []
- def empty_add_random(self):
- self.empty()
- self.random_increase(1)
+ def empty_add_random(self):
+ self.empty()
+ self.random_increase(1)
- def kernel(self, x, y, noise=True):
- value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64)
+ def kernel(self, x, y, noise=True):
+ value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64)
- for index, elem in enumerate(self.set):
- value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index]
+ for index, elem in enumerate(self.set):
+ value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index]
- if noise == True:
- value = value + self.s * self.s * torch.eye(x.size()[0], y.size()[0], dtype=torch.float64)
+ if noise == True:
+ value = value + self.s * self.s * torch.eye(
+ x.size()[0], y.size()[0], dtype=torch.float64
+ )
- return value
+ return value
- def random_basis(self, size=1):
- for _ in range(size):
- f, param = self.sample_basis_function()
- self.add_to_basis(f, 1., param)
- self.uniformize_weights()
+ def random_basis(self, size=1):
+ for _ in range(size):
+ f, param = self.sample_basis_function()
+ self.add_to_basis(f, 1.0, param)
+ self.uniformize_weights()
- def leverage_socre(self, fun):
- v = fun(self.x) / np.sqrt(self.x.size()[0])
- new_set = self.set
+ def leverage_socre(self, fun):
+ v = fun(self.x) / np.sqrt(self.x.size()[0])
+ new_set = self.set
- def basis_map_set(self, x, set):
- value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64)
- for index, elem in enumerate(set):
- value[index, :] = elem(x).view(-1) / np.sqrt(self.n) # * np.sqrt(weights[index])
- return value
+ def basis_map_set(self, x, set):
+ value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64)
+ for index, elem in enumerate(set):
+ value[index, :] = elem(x).view(-1) / np.sqrt(
+ self.n
+ ) # * np.sqrt(weights[index])
+ return value
- def outer_kernel(self, x):
- Phi = self.basis_map_set(x, self.set)
- value = torch.mm(Phi, torch.t(Phi))
- return value
+ def outer_kernel(self, x):
+ Phi = self.basis_map_set(x, self.set)
+ value = torch.mm(Phi, torch.t(Phi))
+ return value
- def leverage_score(self, fun):
+ def leverage_score(self, fun):
- return 1.0
+ return 1.0
- def leverage_score_basis(self, size=1):
- count = 0
+ def leverage_score_basis(self, size=1):
+ count = 0
- while count < size:
- fun, param = self.sample_basis_function()
- leverage_score = self.leverage_score(fun)
- q_bar = size
+ while count < size:
+ fun, param = self.sample_basis_function()
+ leverage_score = self.leverage_score(fun)
+ q_bar = size
- q = np.random.binomial(q_bar, float(leverage_score))
- if q > 0:
- w = (q / q_bar) / leverage_score
+ q = np.random.binomial(q_bar, float(leverage_score))
+ if q > 0:
+ w = (q / q_bar) / leverage_score
- self.add_to_basis(fun, w, param)
- count += 1
- else:
- pass
+ self.add_to_basis(fun, w, param)
+ count += 1
+ else:
+ pass
- self.normalize_weights()
+ self.normalize_weights()
- def normalize_weights(self):
+ def normalize_weights(self):
- # self.weights = np.ones(len(self.set))/len(self.set)
- sum = np.sum(np.array(self.weights))
- self.weights = np.array(self.weights) / sum
- self.weights = self.weights.tolist()
+ # self.weights = np.ones(len(self.set))/len(self.set)
+ sum = np.sum(np.array(self.weights))
+ self.weights = np.array(self.weights) / sum
+ self.weights = self.weights.tolist()
+
+ # print (self.weights)
+
+ def uniformize_weights(self):
+ self.weights = np.ones(len(self.set)) / len(self.set)
+ self.weights = self.weights.tolist()
- # print (self.weights)
- def uniformize_weights(self):
- self.weights = np.ones(len(self.set)) / len(self.set)
- self.weights = self.weights.tolist()
# print (self.weights)
if __name__ == "__main__":
- pass
+ pass
diff --git a/stpy/optim/cost_functions.py b/stpy/optim/cost_functions.py
index 9248763..a31ba01 100755
--- a/stpy/optim/cost_functions.py
+++ b/stpy/optim/cost_functions.py
@@ -3,51 +3,62 @@
class CostFunction:
- def __init__(self, cost, number_args=1):
- self.cost = cost
- self.number_args = number_args
-
- def joined_egrad(self, Xx):
- for X in Xx:
- X.requires_grad_(True)
- y = self.cost(Xx)
- y.backward(retain_graph=True)
- output = []
- for X in Xx:
- output.append(X.grad)
- return output
-
- def joined_hess(self, Xx, Uu):
- for X in zip(Xx):
- X.requires_grad_(True)
- y = self.joined_egrad(Xx)
- y.backward(retain_graph=True)
- output = []
- for X, U in zip(Xx, Uu):
- output.append(torch.mm(X.grad, Uu))
- return output
-
- def egrad(self, X):
- X.requires_grad_(True)
- y = self.cost(X)
- y.backward(retain_graph=True)
- return X.grad
-
- def ehess(self, X, U):
- X.requires_grad_(True)
- y = self.egrad(X)
- y.backward(retain_graph=True)
- return torch.mm(X.grad, U)
-
- def define(self):
- if self.number_args == 1:
- cost_numpy = lambda X: self.cost(torch.from_numpy(X)).data.numpy()
- grad_numpy = lambda X: self.egrad(torch.from_numpy(X)).data.numpy()
- hess_numpy = lambda X, U: self.ehess(torch.from_numpy(X), torch.from_numpy(U)).data.numpy()
- return [cost_numpy, grad_numpy, hess_numpy]
- else:
- cost_numpy = lambda Xx: self.cost([torch.from_numpy(X) for X in Xx]).data.numpy()
- grad_numpy = lambda Xx: [z.data.numpy() for z in self.joined_egrad([torch.from_numpy(X) for X in Xx])]
- hess_numpy = lambda Xx, Uu: [z.data.numpy() for z in self.joined_ehess([torch.from_numpy(X) for X in Xx],
- [torch.from_numpy(U) for U in Uu])]
- return [cost_numpy, grad_numpy, hess_numpy]
+ def __init__(self, cost, number_args=1):
+ self.cost = cost
+ self.number_args = number_args
+
+ def joined_egrad(self, Xx):
+ for X in Xx:
+ X.requires_grad_(True)
+ y = self.cost(Xx)
+ y.backward(retain_graph=True)
+ output = []
+ for X in Xx:
+ output.append(X.grad)
+ return output
+
+ def joined_hess(self, Xx, Uu):
+ for X in zip(Xx):
+ X.requires_grad_(True)
+ y = self.joined_egrad(Xx)
+ y.backward(retain_graph=True)
+ output = []
+ for X, U in zip(Xx, Uu):
+ output.append(torch.mm(X.grad, Uu))
+ return output
+
+ def egrad(self, X):
+ X.requires_grad_(True)
+ y = self.cost(X)
+ y.backward(retain_graph=True)
+ return X.grad
+
+ def ehess(self, X, U):
+ X.requires_grad_(True)
+ y = self.egrad(X)
+ y.backward(retain_graph=True)
+ return torch.mm(X.grad, U)
+
+ def define(self):
+ if self.number_args == 1:
+ cost_numpy = lambda X: self.cost(torch.from_numpy(X)).data.numpy()
+ grad_numpy = lambda X: self.egrad(torch.from_numpy(X)).data.numpy()
+ hess_numpy = lambda X, U: self.ehess(
+ torch.from_numpy(X), torch.from_numpy(U)
+ ).data.numpy()
+ return [cost_numpy, grad_numpy, hess_numpy]
+ else:
+ cost_numpy = lambda Xx: self.cost(
+ [torch.from_numpy(X) for X in Xx]
+ ).data.numpy()
+ grad_numpy = lambda Xx: [
+ z.data.numpy()
+ for z in self.joined_egrad([torch.from_numpy(X) for X in Xx])
+ ]
+ hess_numpy = lambda Xx, Uu: [
+ z.data.numpy()
+ for z in self.joined_ehess(
+ [torch.from_numpy(X) for X in Xx], [torch.from_numpy(U) for U in Uu]
+ )
+ ]
+ return [cost_numpy, grad_numpy, hess_numpy]
diff --git a/stpy/optim/custom_optimizers.py b/stpy/optim/custom_optimizers.py
index 568802e..2db66c2 100644
--- a/stpy/optim/custom_optimizers.py
+++ b/stpy/optim/custom_optimizers.py
@@ -4,327 +4,340 @@
import torch
-def bisection(g, a, b, N, version='stop'):
- '''Approximate solution of g(x)=0 on interval [a,b] by bisection method.
-
- Parameters
- ----------
- g : function
- The function for which we are trying to approximate a solution g(x)=0.
- a,b : numbers
- The interval in which to search for a solution. The function returns
- None if g(a)*g(b) >= 0 since a solution is not guaranteed.
- N : (positive) integer
- The number of iterations to implement.
-
- Returns
- -------
- x_N : number
- The midpoint of the Nth interval computed by the bisection method. The
- initial interval [a_0,b_0] is given by [a,b]. If f(m_n) == 0 for some
- midpoint m_n = (a_n + b_n)/2, then the function returns this solution.
- If all signs of values f(a_n), f(b_n) and f(m_n) are the same at any
- iteration, the bisection method fails and return None.
-
- Examples
- --------
- >>> f = lambda x: x**2 - x - 1
- >>> bisection(f,1,2,25)
- 1.618033990263939
- >>> f = lambda x: (2*x - 1)*(x - 3)
- >>> bisection(f,0,1,10)
- 0.5
- '''
- d = {}
-
- def f(x):
- if x in d:
- return d[x]
- else:
- d[x] = g(x)
- return d[x]
-
- if version == 'stop':
- if f(a) < 0.:
- return a
- if f(a) * f(b) > 0.:
- print("Bisection method fails.")
- return None
-
- a_n = a
- b_n = b
- dict = {}
- for n in range(1, N + 1):
- m_n = (a_n + b_n) / 2.
- f_m_n = f(m_n)
- if f(a_n) * f_m_n < 0:
- a_n = a_n
- b_n = m_n
- elif f(b_n) * f_m_n < 0:
- a_n = m_n
- b_n = b_n
- elif f_m_n == 0:
- print("Found exact solution.")
- return m_n
- else:
- return a_n
- print("Bisection method fails.")
- return None
- return (a_n + b_n) / 2.
+def bisection(g, a, b, N, version="stop"):
+ """Approximate solution of g(x)=0 on interval [a,b] by bisection method.
+
+ Parameters
+ ----------
+ g : function
+ The function for which we are trying to approximate a solution g(x)=0.
+ a,b : numbers
+ The interval in which to search for a solution. The function returns
+ None if g(a)*g(b) >= 0 since a solution is not guaranteed.
+ N : (positive) integer
+ The number of iterations to implement.
+
+ Returns
+ -------
+ x_N : number
+ The midpoint of the Nth interval computed by the bisection method. The
+ initial interval [a_0,b_0] is given by [a,b]. If f(m_n) == 0 for some
+ midpoint m_n = (a_n + b_n)/2, then the function returns this solution.
+ If all signs of values f(a_n), f(b_n) and f(m_n) are the same at any
+ iteration, the bisection method fails and return None.
+
+ Examples
+ --------
+ >>> f = lambda x: x**2 - x - 1
+ >>> bisection(f,1,2,25)
+ 1.618033990263939
+ >>> f = lambda x: (2*x - 1)*(x - 3)
+ >>> bisection(f,0,1,10)
+ 0.5
+ """
+ d = {}
+
+ def f(x):
+ if x in d:
+ return d[x]
+ else:
+ d[x] = g(x)
+ return d[x]
+
+ if version == "stop":
+ if f(a) < 0.0:
+ return a
+ if f(a) * f(b) > 0.0:
+ print("Bisection method fails.")
+ return None
+
+ a_n = a
+ b_n = b
+ dict = {}
+ for n in range(1, N + 1):
+ m_n = (a_n + b_n) / 2.0
+ f_m_n = f(m_n)
+ if f(a_n) * f_m_n < 0:
+ a_n = a_n
+ b_n = m_n
+ elif f(b_n) * f_m_n < 0:
+ a_n = m_n
+ b_n = b_n
+ elif f_m_n == 0:
+ print("Found exact solution.")
+ return m_n
+ else:
+ return a_n
+ print("Bisection method fails.")
+ return None
+ return (a_n + b_n) / 2.0
def greedy_per_step(fun, add, ground_set, min=True):
- scores = []
- for elem in range(ground_set.size()[0]):
- new = add(ground_set[elem, :].view(1, -1))
- scores.append(fun(new))
- if min:
- j = np.argmin(scores)
- else:
- j = np.argmax(scores)
- return [j]
+ scores = []
+ for elem in range(ground_set.size()[0]):
+ new = add(ground_set[elem, :].view(1, -1))
+ scores.append(fun(new))
+ if min:
+ j = np.argmin(scores)
+ else:
+ j = np.argmax(scores)
+ return [j]
def QPQC_problem(A, a, s, Sigma=None):
- n = A.shape[0]
- if Sigma is None:
- I = np.eye(n)
- Sigma = I
+ n = A.shape[0]
+ if Sigma is None:
+ I = np.eye(n)
+ Sigma = I
- # SDP relaxation
- M = np.zeros(shape=(n + 1, n + 1))
+ # SDP relaxation
+ M = np.zeros(shape=(n + 1, n + 1))
- M[0, 1:] = -a.reshape(-1)
- M[1:, 0] = -a.T.reshape(-1)
- M[1:, 1:] = A
+ M[0, 1:] = -a.reshape(-1)
+ M[1:, 0] = -a.T.reshape(-1)
+ M[1:, 1:] = A
- # print (M)
+ # print (M)
- Meqconst = np.eye(n + 1)
- Meqconst[1:, 1:] = Sigma
- Meqconst[0][0] = 0
+ Meqconst = np.eye(n + 1)
+ Meqconst[1:, 1:] = Sigma
+ Meqconst[0][0] = 0
- # print (Meqconst)
+ # print (Meqconst)
- First = np.zeros(shape=(n + 1, n + 1))
- First[0, 0] = 1.
+ First = np.zeros(shape=(n + 1, n + 1))
+ First[0, 0] = 1.0
- X = cp.Variable((n + 1, n + 1))
+ X = cp.Variable((n + 1, n + 1))
- objective = cp.Maximize(cp.trace(M @ X))
+ objective = cp.Maximize(cp.trace(M @ X))
- constraints = [X >> 0]
- constraints += [cp.trace(Meqconst @ X) >= s]
- constraints += [cp.trace(First @ X) == 1]
+ constraints = [X >> 0]
+ constraints += [cp.trace(Meqconst @ X) >= s]
+ constraints += [cp.trace(First @ X) == 1]
- prob = cp.Problem(objective, constraints)
- prob.solve()
+ prob = cp.Problem(objective, constraints)
+ prob.solve()
- # print (X.value[1:,1:])
- eigvals, eigvecs = np.linalg.eig(X.value[1:, 1:])
+ # print (X.value[1:,1:])
+ eigvals, eigvecs = np.linalg.eig(X.value[1:, 1:])
- index = np.argmax(eigvals)
- val = np.max(eigvals)
- x = np.real(eigvecs[index] * np.sqrt(val))
- return val, x
+ index = np.argmax(eigvals)
+ val = np.max(eigvals)
+ x = np.real(eigvecs[index] * np.sqrt(val))
+ return val, x
def convex_QCQP(A, a, s, Sigma=None, threads=4, verbose=False):
- """
- Solving
-
- min xAx - 2ax
- s.t. xSigmax \leq s
- A, Sigma psd.
-
- :param A:
- :param a:
- :param s:
- :param Sigma:
- :return:
- """
- n = A.shape[0]
-
- if Sigma is None:
- I = np.eye(n)
- Sigma = I
-
- x = cp.Variable(n)
- objective = cp.Minimize(cp.quad_form(x, A) - 2 * x @ a)
- zero = np.zeros(n)
- # constraints = [ cp.SOC(zero@x + np.array([np.sqrt(s)]), Sigma @ x)]
- constraints = [cp.quad_form(x, Sigma) <= s]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-8,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-8,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-8},
- verbose=verbose)
-
- x_no_const = x.value.reshape(-1, 1)
- val = prob.value
- return val, x_no_const
+ r"""
+ Solving
+
+ min xAx - 2ax
+ s.t. xSigmax \leq s
+ A, Sigma psd.
+
+ :param A:
+ :param a:
+ :param s:
+ :param Sigma:
+ :return:
+ """
+ n = A.shape[0]
+
+ if Sigma is None:
+ I = np.eye(n)
+ Sigma = I
+
+ x = cp.Variable(n)
+ objective = cp.Minimize(cp.quad_form(x, A) - 2 * x @ a)
+ zero = np.zeros(n)
+ # constraints = [ cp.SOC(zero@x + np.array([np.sqrt(s)]), Sigma @ x)]
+ constraints = [cp.quad_form(x, Sigma) <= s]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-8,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-8,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-8,
+ },
+ verbose=verbose,
+ )
+
+ x_no_const = x.value.reshape(-1, 1)
+ val = prob.value
+ return val, x_no_const
def QCQP_problem(A, a, s, Sigma=None, threads=4, verbose=False):
- """
- Solving
-
- min xAx - 2ax
- s.t. xSigmax == s
-
-
- :param A:
- :param a:
- :param s:
- :param Sigma:
- :return:
- """
- n = A.shape[0]
- lam = cp.Variable(1)
- if Sigma is None:
- I = np.eye(n)
- Sigma = I
-
- objective = cp.Maximize(lam * s - cp.matrix_frac(a, A - lam * Sigma))
- constraints = [A - lam * Sigma >> 0]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-12,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-12,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-12},
- verbose=verbose)
-
- x_no_const = np.linalg.solve(A - lam.value * Sigma, a)
- val = prob.value
- return val, x_no_const
+ """
+ Solving
+
+ min xAx - 2ax
+ s.t. xSigmax == s
+
+
+ :param A:
+ :param a:
+ :param s:
+ :param Sigma:
+ :return:
+ """
+ n = A.shape[0]
+ lam = cp.Variable(1)
+ if Sigma is None:
+ I = np.eye(n)
+ Sigma = I
+
+ objective = cp.Maximize(lam * s - cp.matrix_frac(a, A - lam * Sigma))
+ constraints = [A - lam * Sigma >> 0]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-12,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-12,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-12,
+ },
+ verbose=verbose,
+ )
+
+ x_no_const = np.linalg.solve(A - lam.value * Sigma, a)
+ val = prob.value
+ return val, x_no_const
def solve_mpi(Q, c, tau, verbose=True, up=None, low=None, xwarm=None):
- """
- Solve MIP program
-
-
- """
- if verbose == True:
- print("Starting Acq. Fucn solver...")
- print("Resolution: ")
- # Grid
-
- # tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n
- N = tau.size()[0]
- d = Q.size()[0]
- s = torch.ones(N)
- Tau = torch.zeros(size=(d, d * N), dtype=torch.float64)
- S = torch.zeros(size=(d, d * N), dtype=torch.float64)
-
- for j in range(d):
- Tau[j, j * N:(j + 1) * N] = tau
- S[j, j * N:(j + 1) * N] = s
-
- B = Q @ Tau
-
- if (up is not None) or (low is not None):
- G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c)))
- h = torch.ones(4 * d + 2)
- h[0:2 * d] = 1
- h[3 * d:4 * d] = -1
- h[4 * d] = up
- h[4 * d + 1] = -low
- else:
- G = torch.cat((B, -B, S, -S))
- h = torch.ones(4 * d)
- h[0:2 * d] = 1
- h[3 * d:4 * d] = -1
- # Indicator variables
-
- x = cp.Variable(d * N, boolean=True)
- if xwarm is not None:
- x.value = xwarm.detach().numpy()
- c = c.view(-1).detach().numpy()
-
- objective = cp.Minimize(-c * x)
- constraints = [0 <= x, x <= 1, G.detach().numpy() * x <= h.view(-1).detach().numpy()]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, verbose=verbose, warm_start=True)
-
- # print (x.value)
-
- return (torch.from_numpy(Tau.numpy() @ x.value), np.dot(c, x.value))
+ """
+ Solve MIP program
+
+
+ """
+ if verbose == True:
+ print("Starting Acq. Fucn solver...")
+ print("Resolution: ")
+ # Grid
+
+ # tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n
+ N = tau.size()[0]
+ d = Q.size()[0]
+ s = torch.ones(N)
+ Tau = torch.zeros(size=(d, d * N), dtype=torch.float64)
+ S = torch.zeros(size=(d, d * N), dtype=torch.float64)
+
+ for j in range(d):
+ Tau[j, j * N : (j + 1) * N] = tau
+ S[j, j * N : (j + 1) * N] = s
+
+ B = Q @ Tau
+
+ if (up is not None) or (low is not None):
+ G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c)))
+ h = torch.ones(4 * d + 2)
+ h[0 : 2 * d] = 1
+ h[3 * d : 4 * d] = -1
+ h[4 * d] = up
+ h[4 * d + 1] = -low
+ else:
+ G = torch.cat((B, -B, S, -S))
+ h = torch.ones(4 * d)
+ h[0 : 2 * d] = 1
+ h[3 * d : 4 * d] = -1
+ # Indicator variables
+
+ x = cp.Variable(d * N, boolean=True)
+ if xwarm is not None:
+ x.value = xwarm.detach().numpy()
+ c = c.view(-1).detach().numpy()
+
+ objective = cp.Minimize(-c * x)
+ constraints = [
+ 0 <= x,
+ x <= 1,
+ G.detach().numpy() * x <= h.view(-1).detach().numpy(),
+ ]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK, verbose=verbose, warm_start=True)
+
+ # print (x.value)
+
+ return (torch.from_numpy(Tau.numpy() @ x.value), np.dot(c, x.value))
def newton_solve(f, x0, eps=1e-3, maxiter=100, verbose=False, grad=None):
- """
- >>> newton_solve(lambda x: x**2,torch.Tensor([2.0,1.0]).double().view(-1))
- tensor([0., 0.], dtype=torch.float64)
- """
- lam = 1.
- d = int(x0.size()[0])
- x0.requires_grad_(True)
- x = torch.zeros(size=(d, 1), requires_grad=True).view(-1).double()
- x.data = x0.data
- res = f(x) ** 2
- i = 0
- s = 1.
-
- while torch.max(res) > eps and i < maxiter:
- i = i + 1
-
- if grad is None:
- nabla_f = torch.autograd.functional.jacobian(f, x, strict=True)
- else:
- nabla_f = grad(x)
-
- if verbose:
- print(i, "err:", torch.max(res), s)
- print(nabla_f.size())
- print("-----------------------")
-
- xn = x - torch.linalg.solve(nabla_f + torch.eye(d).double() * s, f(x).view(-1, 1)).view(-1)
- resn = f(xn) ** 2
-
- if torch.max(resn) < torch.max(res):
- x = xn.requires_grad_(True)
- # lam = np.minimum(lam * 2,1)
- s = s / 2
- res = resn
-
- else:
- s = s * 2
- # lam = lam /2.
- return x
+ """
+ >>> newton_solve(lambda x: x**2,torch.tensor([2.0,1.0]).double().view(-1))
+ tensor([0., 0.], dtype=torch.float64)
+ """
+ lam = 1.0
+ d = int(x0.size()[0])
+ x0.requires_grad_(True)
+ x = torch.zeros(size=(d, 1), requires_grad=True).view(-1).double()
+ x.data = x0.data
+ res = f(x) ** 2
+ i = 0
+ s = 1.0
+
+ while torch.max(res) > eps and i < maxiter:
+ i = i + 1
+
+ if grad is None:
+ nabla_f = torch.autograd.functional.jacobian(f, x, strict=True)
+ else:
+ nabla_f = grad(x)
+
+ if verbose:
+ print(i, "err:", torch.max(res), s)
+ print(nabla_f.size())
+ print("-----------------------")
+
+ xn = x - torch.linalg.solve(
+ nabla_f + torch.eye(d).double() * s, f(x).view(-1, 1)
+ ).view(-1)
+ resn = f(xn) ** 2
+
+ if torch.max(resn) < torch.max(res):
+ x = xn.requires_grad_(True)
+ # lam = np.minimum(lam * 2,1)
+ s = s / 2
+ res = resn
+
+ else:
+ s = s * 2
+ # lam = lam /2.
+ return x
def matrix_recovery_hermitian_trace_regression(X, b, eps=1e-5):
- """
+ """
- :param X: list of matrices
- :param b: vector of resposnes
- :param eps: constraint tolerance
- :return: reocvered matrix
- """
+ :param X: list of matrices
+ :param b: vector of resposnes
+ :param eps: constraint tolerance
+ :return: reocvered matrix
+ """
- d = X[0].shape[0]
- N = len(X)
- Z = cp.Variable((d, d), symmetric=True)
+ d = X[0].shape[0]
+ N = len(X)
+ Z = cp.Variable((d, d), symmetric=True)
- constraints = [Z >> 0]
- constraints += [
- cp.trace(X[i] @ Z) >= b[i] - eps for i in range(N)
- ]
- constraints += [
- cp.trace(X[i] @ Z) <= b[i] + eps for i in range(N)
- ]
+ constraints = [Z >> 0]
+ constraints += [cp.trace(X[i] @ Z) >= b[i] - eps for i in range(N)]
+ constraints += [cp.trace(X[i] @ Z) <= b[i] + eps for i in range(N)]
- prob = cp.Problem(cp.Minimize(cp.norm(Z, "nuc")),
- constraints)
+ prob = cp.Problem(cp.Minimize(cp.norm(Z, "nuc")), constraints)
- prob.solve()
+ prob.solve()
- return Z.value
+ return Z.value
if __name__ == "__main__":
- newton_solve(lambda x: x ** 2, torch.Tensor([2.0, 1.0]).double().view(-1), verbose=True)
+ newton_solve(
+ lambda x: x**2, torch.tensor([2.0, 1.0]).double().view(-1), verbose=True
+ )
diff --git a/stpy/optim/frank_wolfe.py b/stpy/optim/frank_wolfe.py
index ebee977..0e303e4 100644
--- a/stpy/optim/frank_wolfe.py
+++ b/stpy/optim/frank_wolfe.py
@@ -3,57 +3,57 @@
from scipy.optimize import minimize_scalar
-def step_frank_wolfe_simplex(F, nablaF, x, step_size = 'opt'):
- d = x.shape[0]
- nabla = nablaF(x)
- index = np.argmax(nabla)
- e = np.zeros(d)
- e[index] = 1.
- if step_size == 'opt':
- fn = lambda h: -F(x * h + (1 - h) * e)
- res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method='bounded')
- gamma = res.x
- else:
- gamma = 1.
- x = x * gamma + (1 - gamma) * e
- return x
-
-
-def step_exponential_gradient_descent(nablaF, x, eta=1.):
- """
-
- :param nablaF:
- :param x:
- :param eta:
- :return:
- """
- x = x * torch.exp(eta * nablaF(x))
- x = x / torch.sum(x)
- return x
+def step_frank_wolfe_simplex(F, nablaF, x, step_size="opt"):
+ d = x.shape[0]
+ nabla = nablaF(x)
+ index = np.argmax(nabla)
+ e = np.zeros(d)
+ e[index] = 1.0
+ if step_size == "opt":
+ fn = lambda h: -F(x * h + (1 - h) * e)
+ res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method="bounded")
+ gamma = res.x
+ else:
+ gamma = 1.0
+ x = x * gamma + (1 - gamma) * e
+ return x
+
+
+def step_exponential_gradient_descent(nablaF, x, eta=1.0):
+ """
+
+ :param nablaF:
+ :param x:
+ :param eta:
+ :return:
+ """
+ x = x * torch.exp(eta * nablaF(x))
+ x = x / torch.sum(x)
+ return x
def step_wa_simlex(F, nablaF, x, optimality):
- d = x.shape[0]
- nabla = nablaF(x)
- e_plus = np.max(nabla)
- e_minus = np.min(nabla)
- i_minus = np.argmin(nabla)
- i_plus = np.argmax(nabla)
- e = np.zeros(d)
-
- if (e_plus - optimality) / optimality > (optimality - e_minus) / optimality:
- index = i_plus
- e[index] = 1.
- fn = lambda h: -F(x * h + (1 - h) * e)
- res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method='bounded')
- gamma = res.x
- x = x * gamma + (1. - gamma) * e
- else:
- index = i_minus
- e[index] = 1.
- fn = lambda h: -F((x + h * e) / (1 + h))
- # res = minimize_scalar(fn,bounds=(0.,1/(1-x[index])),method='bounded')
- res = minimize_scalar(fn, bounds=(-x[index], 1 - x[index]), method='bounded')
- gamma = res.x
- x = (x + gamma * e) / (1 + gamma)
- return x
+ d = x.shape[0]
+ nabla = nablaF(x)
+ e_plus = np.max(nabla)
+ e_minus = np.min(nabla)
+ i_minus = np.argmin(nabla)
+ i_plus = np.argmax(nabla)
+ e = np.zeros(d)
+
+ if (e_plus - optimality) / optimality > (optimality - e_minus) / optimality:
+ index = i_plus
+ e[index] = 1.0
+ fn = lambda h: -F(x * h + (1 - h) * e)
+ res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method="bounded")
+ gamma = res.x
+ x = x * gamma + (1.0 - gamma) * e
+ else:
+ index = i_minus
+ e[index] = 1.0
+ fn = lambda h: -F((x + h * e) / (1 + h))
+ # res = minimize_scalar(fn,bounds=(0.,1/(1-x[index])),method='bounded')
+ res = minimize_scalar(fn, bounds=(-x[index], 1 - x[index]), method="bounded")
+ gamma = res.x
+ x = (x + gamma * e) / (1 + gamma)
+ return x
diff --git a/stpy/optim/hyper_parameter_opt.py b/stpy/optim/hyper_parameter_opt.py
index aa25b28..33c1eed 100755
--- a/stpy/optim/hyper_parameter_opt.py
+++ b/stpy/optim/hyper_parameter_opt.py
@@ -6,127 +6,147 @@
class HyperParameterOpt:
- def __init__(self, obj, x, y, fun, params):
-
- self.mode = obj
- self.x = x
- self.y = y
- self.fun = fun
- self.params = params
-
- def optimize(self, type, optimizer, restarts):
-
- ## Bandwidth optimization
- def bandwidth_opt(X):
- gamma = X
- Rot = torch.eye(self.x.size()[1], dtype=torch.float64)
- return self.log_marginal_likelihood(gamma, Rot, 1.0, kernel=" ")
-
- def bandwidth_opt_handler():
- manifold = Euclidean(self.kernel_object.gamma.size()[0])
- C = CostFunction(bandwidth_opt, number_args=1)
- xinit = lambda: np.random.randn() ** 2 + np.abs(
- torch.zeros(self.kernel_object.gamma.size()[0], dtype=torch.float64).numpy())
- return optimize(manifold, C, 1, xinit)
-
- def bandwidth_kappa_opt(X):
- gamma = X[0]
- kappa = X[1]
- Rot = torch.eye(self.x.size()[1], dtype=torch.float64)
- return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ")
-
- def bandwidth_kappa_opt_handler():
- manifold1 = Euclidean(self.kernel_object.gamma.size()[0])
- manifold2 = Euclidean(1)
- manifold = Product((manifold1, manifold2))
- C = CostFunction(bandwidth_kappa_opt, number_args=2)
- xinit = lambda x: [torch.randn(self.kernel_object.gamma.size()[0], dtype=torch.float64).numpy(),
- np.abs(torch.randn(1, dtype=torch.float64).numpy())]
- return optimize(manifold, C, 2, xinit)
-
- ## Rotations optimization
- def rotations_opt(X):
- Rot = X
- return self.log_marginal_likelihood(self.kernel_object.gamma, Rot, self.kernel_object.kappa, kernel=" ")
-
- def rotations_opt_handler():
- rots = Rotations(self.kernel_object.gamma.size()[0])
- manifold = rots
- xinit = lambda: torch.qr(torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64))[0].numpy()
- C = CostFunction(rotations_opt, number_args=1)
- return optimize(manifold, C, 1, xinit)
-
- ## Bandwidth and Rotations optimization
- def bandwith_rotations_opt(X):
- gamma = X[0]
- Rot = X[1]
- return self.log_marginal_likelihood(gamma, Rot, 0.1, kernel=" ")
-
- def bandwidth_rotations_opt_handler():
- eucl = Euclidean(self.kernel_object.gamma.size()[0])
- rots = Rotations(self.kernel_object.gamma.size()[0])
- manifold = Product((eucl, rots))
- xinit = lambda: [torch.randn(self.kernel_object.gamma.size()[0], dtype=torch.float64).numpy(),
- torch.qr(torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64))[0].numpy()]
- C = CostFunction(bandwith_rotations_opt, number_args=2)
- return optimize(manifold, C, 2, xinit)
-
- ## Bandwidth and Rotations optimization
- def bandwith_kappa_rotations_opt(X):
- gamma = X[0]
- kappa = X[1]
- Rot = X[2]
- return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ")
-
- def bandwidth_kappa_rotations_opt_handler():
- eucl = Euclidean(self.kernel_object.gamma.size()[0])
- eucl2 = Euclidean(1)
- rots = Rotations(self.kernel_object.gamma.size()[0])
- manifold = Product((eucl, eucl2, rots))
- xinit = [self.kernel_object.gamma.numpy(), torch.eye(self.x.size()[1], dtype=torch.float64).numpy()]
- C = CostFunction(bandwith_kappa_rotations_opt, number_args=2)
- return optimize(manifold, C, 2, xinit)
-
- # Finalize
- if type == "bandwidth":
- best_params = bandwidth_opt_handler()
- self.kernel_object.gamma = torch.abs(best_params[0]).detach()
-
- elif type == "rots":
- best_params = rotations_opt_handler()
- Rot = best_params[0].detach()
- print("Rotation:", Rot)
- self.Rot = Rot
- self.x = torch.mm(self.x, Rot).detach()
-
- elif type == "bandwidth+kappa":
- best_params = bandwidth_kappa_opt_handler()
- self.kernel_object.gamma = torch.abs(best_params[0]).detach()
- self.s = torch.abs(best_params[1]).detach()
-
- elif type == "bandwidth+rots":
- best_params = bandwidth_rotations_opt_handler()
- self.kernel_object.gamma = torch.abs(best_params[0]).detach()
- Rot = best_params[1].detach()
- print("Rotation:", Rot)
- self.Rot = Rot
- self.x = torch.mm(self.x, Rot).detach()
-
- elif type == "bandwidth+kappa+rots":
- best_params = bandwidth_kappa_rotations_opt_handler()
- self.kernel_object.gamma = torch.abs(best_params[0]).detach()
- self.s = torch.abs(best_params[1]).detach()
- Rot = best_params[2].detach()
- print("Rotation:", Rot)
- self.Rot = Rot
- self.x = torch.mm(self.x, Rot).detach()
-
- else:
- raise AttributeError("Optimization scheme not implemented")
-
- self.back_prop = False
- self.fit = False
- self.fit_gp(self.x, self.y)
- print(self.description())
-
- return True
+ def __init__(self, obj, x, y, fun, params):
+
+ self.mode = obj
+ self.x = x
+ self.y = y
+ self.fun = fun
+ self.params = params
+
+ def optimize(self, type, optimizer, restarts):
+
+ ## Bandwidth optimization
+ def bandwidth_opt(X):
+ gamma = X
+ Rot = torch.eye(self.x.size()[1], dtype=torch.float64)
+ return self.log_marginal_likelihood(gamma, Rot, 1.0, kernel=" ")
+
+ def bandwidth_opt_handler():
+ manifold = Euclidean(self.kernel_object.gamma.size()[0])
+ C = CostFunction(bandwidth_opt, number_args=1)
+ xinit = lambda: np.random.randn() ** 2 + np.abs(
+ torch.zeros(
+ self.kernel_object.gamma.size()[0], dtype=torch.float64
+ ).numpy()
+ )
+ return optimize(manifold, C, 1, xinit)
+
+ def bandwidth_kappa_opt(X):
+ gamma = X[0]
+ kappa = X[1]
+ Rot = torch.eye(self.x.size()[1], dtype=torch.float64)
+ return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ")
+
+ def bandwidth_kappa_opt_handler():
+ manifold1 = Euclidean(self.kernel_object.gamma.size()[0])
+ manifold2 = Euclidean(1)
+ manifold = Product((manifold1, manifold2))
+ C = CostFunction(bandwidth_kappa_opt, number_args=2)
+ xinit = lambda x: [
+ torch.randn(
+ self.kernel_object.gamma.size()[0], dtype=torch.float64
+ ).numpy(),
+ np.abs(torch.randn(1, dtype=torch.float64).numpy()),
+ ]
+ return optimize(manifold, C, 2, xinit)
+
+ ## Rotations optimization
+ def rotations_opt(X):
+ Rot = X
+ return self.log_marginal_likelihood(
+ self.kernel_object.gamma, Rot, self.kernel_object.kappa, kernel=" "
+ )
+
+ def rotations_opt_handler():
+ rots = Rotations(self.kernel_object.gamma.size()[0])
+ manifold = rots
+ xinit = lambda: torch.qr(
+ torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64)
+ )[0].numpy()
+ C = CostFunction(rotations_opt, number_args=1)
+ return optimize(manifold, C, 1, xinit)
+
+ ## Bandwidth and Rotations optimization
+ def bandwith_rotations_opt(X):
+ gamma = X[0]
+ Rot = X[1]
+ return self.log_marginal_likelihood(gamma, Rot, 0.1, kernel=" ")
+
+ def bandwidth_rotations_opt_handler():
+ eucl = Euclidean(self.kernel_object.gamma.size()[0])
+ rots = Rotations(self.kernel_object.gamma.size()[0])
+ manifold = Product((eucl, rots))
+ xinit = lambda: [
+ torch.randn(
+ self.kernel_object.gamma.size()[0], dtype=torch.float64
+ ).numpy(),
+ torch.qr(
+ torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64)
+ )[0].numpy(),
+ ]
+ C = CostFunction(bandwith_rotations_opt, number_args=2)
+ return optimize(manifold, C, 2, xinit)
+
+ ## Bandwidth and Rotations optimization
+ def bandwith_kappa_rotations_opt(X):
+ gamma = X[0]
+ kappa = X[1]
+ Rot = X[2]
+ return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ")
+
+ def bandwidth_kappa_rotations_opt_handler():
+ eucl = Euclidean(self.kernel_object.gamma.size()[0])
+ eucl2 = Euclidean(1)
+ rots = Rotations(self.kernel_object.gamma.size()[0])
+ manifold = Product((eucl, eucl2, rots))
+ xinit = [
+ self.kernel_object.gamma.numpy(),
+ torch.eye(self.x.size()[1], dtype=torch.float64).numpy(),
+ ]
+ C = CostFunction(bandwith_kappa_rotations_opt, number_args=2)
+ return optimize(manifold, C, 2, xinit)
+
+ # Finalize
+ if type == "bandwidth":
+ best_params = bandwidth_opt_handler()
+ self.kernel_object.gamma = torch.abs(best_params[0]).detach()
+
+ elif type == "rots":
+ best_params = rotations_opt_handler()
+ Rot = best_params[0].detach()
+ print("Rotation:", Rot)
+ self.Rot = Rot
+ self.x = torch.mm(self.x, Rot).detach()
+
+ elif type == "bandwidth+kappa":
+ best_params = bandwidth_kappa_opt_handler()
+ self.kernel_object.gamma = torch.abs(best_params[0]).detach()
+ self.s = torch.abs(best_params[1]).detach()
+
+ elif type == "bandwidth+rots":
+ best_params = bandwidth_rotations_opt_handler()
+ self.kernel_object.gamma = torch.abs(best_params[0]).detach()
+ Rot = best_params[1].detach()
+ print("Rotation:", Rot)
+ self.Rot = Rot
+ self.x = torch.mm(self.x, Rot).detach()
+
+ elif type == "bandwidth+kappa+rots":
+ best_params = bandwidth_kappa_rotations_opt_handler()
+ self.kernel_object.gamma = torch.abs(best_params[0]).detach()
+ self.s = torch.abs(best_params[1]).detach()
+ Rot = best_params[2].detach()
+ print("Rotation:", Rot)
+ self.Rot = Rot
+ self.x = torch.mm(self.x, Rot).detach()
+
+ else:
+ raise AttributeError("Optimization scheme not implemented")
+
+ self.back_prop = False
+ self.fit = False
+ self.fit_gp(self.x, self.y)
+ print(self.description())
+
+ return True
diff --git a/stpy/optim/manifold_optimization.py b/stpy/optim/manifold_optimization.py
index acc25c9..3ade0eb 100644
--- a/stpy/optim/manifold_optimization.py
+++ b/stpy/optim/manifold_optimization.py
@@ -5,41 +5,50 @@
def optimize(manifold, cost_function, number_args, sampling_func, optimizer, restarts):
- [cost_numpy, egrad_numpy, ehess_numpy] = cost_function.define()
-
- if optimizer == "pymanopt":
- problem = Problem(manifold=manifold, cost=cost_numpy, egrad=egrad_numpy, ehess=ehess_numpy, verbosity=1)
- solver = SteepestDescent(maxiter=100, mingradnorm=1e-8, minstepsize=1e-10)
-
- def solve(problem, x=None):
- return solver.solve(problem, x=x)
-
- elif optimizer == "scipy":
- problem = None
-
- def solve(problem, x=None):
- res = minimize(cost_numpy, xinit, method="L-BFGS-B", jac=egrad_numpy, tol=0.0001)
- return res.x
- else:
- raise NotImplementedError
-
- # optimization
- repeats = restarts
- best = 10e10
- best_params = [i for i in range(number_args)]
-
- for _ in range(repeats):
- xinit = sampling_func()
- # try:
- Xopt = solve(problem, x=xinit)
- print(xinit)
- cost = cost_numpy(Xopt)
- print("Run:", _, " cost: ", cost)
- if cost < best:
- best = cost
- if len(best_params) > 1:
- for j in range(number_args):
- best_params[j] = torch.from_numpy(Xopt[j])
- else:
- best_params[0] = torch.from_numpy(Xopt)
- return best_params
+ [cost_numpy, egrad_numpy, ehess_numpy] = cost_function.define()
+
+ if optimizer == "pymanopt":
+ problem = Problem(
+ manifold=manifold,
+ cost=cost_numpy,
+ egrad=egrad_numpy,
+ ehess=ehess_numpy,
+ verbosity=1,
+ )
+ solver = SteepestDescent(maxiter=100, mingradnorm=1e-8, minstepsize=1e-10)
+
+ def solve(problem, x=None):
+ return solver.solve(problem, x=x)
+
+ elif optimizer == "scipy":
+ problem = None
+
+ def solve(problem, x=None):
+ res = minimize(
+ cost_numpy, xinit, method="L-BFGS-B", jac=egrad_numpy, tol=0.0001
+ )
+ return res.x
+
+ else:
+ raise NotImplementedError
+
+ # optimization
+ repeats = restarts
+ best = 10e10
+ best_params = [i for i in range(number_args)]
+
+ for _ in range(repeats):
+ xinit = sampling_func()
+ # try:
+ Xopt = solve(problem, x=xinit)
+ print(xinit)
+ cost = cost_numpy(Xopt)
+ print("Run:", _, " cost: ", cost)
+ if cost < best:
+ best = cost
+ if len(best_params) > 1:
+ for j in range(number_args):
+ best_params[j] = torch.from_numpy(Xopt[j])
+ else:
+ best_params[0] = torch.from_numpy(Xopt)
+ return best_params
diff --git a/stpy/point_processes/binomial/binomial_process.py b/stpy/point_processes/binomial/binomial_process.py
index ddbf89e..f3ce7f2 100644
--- a/stpy/point_processes/binomial/binomial_process.py
+++ b/stpy/point_processes/binomial/binomial_process.py
@@ -1,38 +1,38 @@
import torch
-class BernoulliPointProcess():
+class BernoulliPointProcess:
- def __init__(self, basic_sets, d=1, rate=None):
- self.basic_sets = basic_sets
- self.rate = rate
- self.d = d
+ def __init__(self, basic_sets, d=1, rate=None):
+ self.basic_sets = basic_sets
+ self.rate = rate
+ self.d = d
- def is_basic(self, S):
- """
- :return:
- """
- for set in self.basic_sets:
- if hash(set) == hash(S):
- return True
- return False
+ def is_basic(self, S):
+ """
+ :return:
+ """
+ for set in self.basic_sets:
+ if hash(set) == hash(S):
+ return True
+ return False
- def sample(self, S, t=None, dt=None):
- if self.is_basic(S):
- rv = torch.bernoulli(self.rate(S))
- if rv > 0.5:
- return (S, 1., 1., dt, t)
- else:
- return (S, 0., 1., dt, t)
- else:
- # iterate over all sets that contain it
- outcome = 0.
- for set in self.basic_sets:
- if S.inside(set):
- rv = float(torch.bernoulli(self.rate(S)))
- outcome = max(rv, 0.)
- if outcome > 0.5:
- return (S, 1., 1., dt, t)
- else:
- return (S, 0., 1., dt, t)
- pass
+ def sample(self, S, t=None, dt=None):
+ if self.is_basic(S):
+ rv = torch.bernoulli(self.rate(S))
+ if rv > 0.5:
+ return (S, 1.0, 1.0, dt, t)
+ else:
+ return (S, 0.0, 1.0, dt, t)
+ else:
+ # iterate over all sets that contain it
+ outcome = 0.0
+ for set in self.basic_sets:
+ if S.inside(set):
+ rv = float(torch.bernoulli(self.rate(S)))
+ outcome = max(rv, 0.0)
+ if outcome > 0.5:
+ return (S, 1.0, 1.0, dt, t)
+ else:
+ return (S, 0.0, 1.0, dt, t)
+ pass
diff --git a/stpy/point_processes/binomial/binomial_process_estimator.py b/stpy/point_processes/binomial/binomial_process_estimator.py
index bf88e90..2096af3 100644
--- a/stpy/point_processes/binomial/binomial_process_estimator.py
+++ b/stpy/point_processes/binomial/binomial_process_estimator.py
@@ -12,473 +12,653 @@
class BernoulliRateEstimator(RateEstimator):
- """
- without link function, but with inequality constraints
- """
-
- def __init__(self, hierarchy, d=1, m=100, kernel_object=None, B=1., s=1., jitter=10e-8, b=0., basis='triangle',
- offset=0.1, uncertainty='laplace'):
-
- self.d = d
- self.s = s
- self.b = b
- self.B = B
- self.uncertainty = uncertainty
- self.hierarchy = hierarchy
- self.kernel_object = kernel_object
- self.packing = TriangleEmbedding(d, m, kernel_object=kernel_object, B=1., b=0., offset=offset,
- s=np.sqrt(jitter))
- self.feedback = "histogram"
- self.data = None
-
- self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
- self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double()
-
- for index_set, set in enumerate(self.basic_sets):
- self.varphis[index_set, :] = self.embed_set(set)
-
- def embed_set(self, S):
- return self.packing.integral(S).view(1, -1)
-
- def load_data(self, data):
- """
-
- :param data: (S, no_events, out_of, duration, time)
- :return:
- """
- self.data = []
- self.phis = None
- for datapoint in data:
- self.add_data_point(datapoint)
-
- def add_data_point(self, datapoint):
-
- if self.data is None:
- self.load_data([datapoint])
- else:
-
- # add
- self.data.append(datapoint)
-
- S, count, pool, duration, time = datapoint
- phi = self.embed_set(S)
-
- if self.phis is not None:
- self.counts = torch.cat((self.counts, torch.Tensor([count])))
- self.pool = torch.cat((self.pool, torch.Tensor([pool])))
- self.phis = torch.cat((self.phis, phi), dim=0)
- else:
- self.counts = torch.Tensor([count]).double()
- self.pool = torch.Tensor([pool]).double()
- self.phis = phi
-
- def nabla(self, theta):
- # defining objective
- if self.data is not None:
- return - torch.einsum('i,ij,i->j', self.counts, self.phis, 1. / (self.phis @ theta).view(-1)).view(-1, 1) + \
- torch.einsum('i,ij,i->j', self.pool - self.counts, self.phis,
- 1. / (1. - self.phis @ theta).view(-1)).view(-1, 1) \
- + self.s * theta.view(-1, 1)
- else:
- return self.s * theta.view(-1, 1)
-
- def sample(self, steps=10, verbose=False):
- """
- Langevin dynamics to sample from constrained GP prior
-
- :param steps: Number of iterations
- :return:
- """
- l = np.zeros(shape=(len(self.basic_sets)))
- u = np.zeros(shape=(len(self.basic_sets))) + 1.
-
- # prox operator
- def prox(x):
- res = solve_qp(np.eye(self.get_m()), x.numpy().reshape(-1),
- C=np.vstack((-self.varphis.numpy(), self.varphis.numpy())).T,
- b=np.hstack((-u, l)), factorized=True)
- return torch.from_numpy(res[0]).view(-1, 1)
-
- # initialization
- if self.rate is not None:
- theta = self.rate.view(-1, 1)
- else:
- theta = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False).view(
- -1, 1) ** 2
-
- # loop
- for k in range(steps):
- w = torch.randn(size=(self.get_m(), 1)).double()
-
- # calculate proper step-size
- W = self.construct_covariance(theta=theta)
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3))
- eta = 0.5 / L
-
- theta = 0.5 * theta - eta * self.nabla(theta) + 0.5 * prox(theta) + np.sqrt(2 * eta) * w
- if verbose == True:
- print("Iter:", k, theta.T)
-
- self.sampled_theta = prox(theta)
-
- def construct_covariance(self, theta):
- D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2)
- D2 = torch.diag((self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2)
-
- W = self.phis.T @ (D1 + D2) @ self.phis + self.s * torch.eye(self.get_m()).double()
- return W
-
- def construct_confidence(self):
- self.W = self.construct_covariance(self.rate)
- self.invW = torch.pinverse(self.W)
-
- def construct_likelihood_ratio(self, method='full'):
- # for data
- phis = self.phis.numpy()
- counts = self.counts.numpy()
-
- # for constraints
- varphis = self.varphis.numpy()
-
- # current fit
- mean_theta = self.rate.numpy()
-
- if method == 'split':
- pass
- elif method == 'full':
- self.likelihood = - counts @ np.log(phis @ mean_theta) - (1 - counts) @ np.log(1 - phis @ mean_theta) \
- + self.s * 0.5 * np.sum(mean_theta - 0.5) ** 2
- elif method == 'cv':
- pass
-
- def ucb(self, S, beta=8., delta=0.1):
- if self.uncertainty == 'laplace':
- ucb = self.embed_set(S) @ self.rate + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
- return torch.minimum(torch.Tensor([[1.]]).double(), ucb)
-
- elif self.uncertainty == "ratio":
- phi = self.embed_set(S)
- phis = self.phis.numpy()
- varphis = self.varphis.numpy()
-
- counts = self.counts.numpy()
- theta = cp.Variable(self.get_m())
-
- objective = cp.Maximize(phi @ theta)
-
- v = np.log(1. / delta) + self.likelihood
- constraints = [- counts @ cp.log(phis @ theta) - (1 - counts) @ cp.log(1 - phis @ theta)
- + self.s * 0.5 * cp.sum_squares(theta - 0.5) <= v]
-
- # every set has probability between 0-1.
- constraints.append(varphis @ theta >= np.zeros(varphis.shape[0]))
- constraints.append(varphis @ theta <= np.ones(varphis.shape[0]))
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
- return torch.minimum(torch.Tensor([[1.]]).double(), torch.from_numpy(np.array(prob.value)))
-
- def lcb(self, S, beta=8., delta=0.1):
- if self.uncertainty == 'laplace':
- lcb = self.embed_set(S) @ self.rate - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
- return torch.maximum(torch.Tensor([[0.]]).double(), lcb)
-
- elif self.uncertainty == "ratio":
- phi = self.embed_set(S)
- phis = self.phis.numpy()
- varphis = self.varphis.numpy()
-
- counts = self.counts.numpy()
- theta = cp.Variable(self.get_m())
-
- objective = cp.Minimize(phi @ theta)
- v = np.log(1. / delta) + self.likelihood
- constraints = [- counts @ cp.log(phis @ theta) - (1 - counts) @ cp.log(1 - phis @ theta)
- + self.s * 0.5 * cp.sum_squares(theta - 0.5) <= v]
-
- # every set has probability between 0-1.
- constraints.append(varphis @ theta >= np.zeros(varphis.shape[0]))
- constraints.append(varphis @ theta <= np.ones(varphis.shape[0]))
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
-
- return torch.maximum(torch.Tensor([[0.]]).double(), torch.from_numpy(np.array(prob.value)))
-
- def fit_gp(self, threads=4):
-
- phis = self.phis.numpy()
- varphis = self.varphis.numpy()
-
- counts = self.counts.numpy()
- theta = cp.Variable(self.get_m())
- objective = cp.Minimize(- counts @ cp.log(phis @ theta) - (1 - counts) @ cp.log(1 - phis @ theta)
- + self.s * 0.5 * cp.sum_squares(theta - 0.5))
-
- # probability constraints
- constraints = []
-
- # every set has probability between 0-1.
- constraints.append(varphis @ theta >= np.zeros(varphis.shape[0]))
- constraints.append(varphis @ theta <= np.ones(varphis.shape[0]))
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
- self.rate = torch.from_numpy(theta.value)
- return self.rate
+ """
+ without link function, but with inequality constraints
+ """
+
+ def __init__(
+ self,
+ hierarchy,
+ d=1,
+ m=100,
+ kernel_object=None,
+ B=1.0,
+ s=1.0,
+ jitter=10e-8,
+ b=0.0,
+ basis="triangle",
+ offset=0.1,
+ uncertainty="laplace",
+ ):
+
+ self.d = d
+ self.s = s
+ self.b = b
+ self.B = B
+ self.uncertainty = uncertainty
+ self.hierarchy = hierarchy
+ self.kernel_object = kernel_object
+ self.packing = TriangleEmbedding(
+ d,
+ m,
+ kernel_object=kernel_object,
+ B=1.0,
+ b=0.0,
+ offset=offset,
+ s=np.sqrt(jitter),
+ )
+ self.feedback = "histogram"
+ self.data = None
+
+ self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
+ self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double()
+
+ for index_set, set in enumerate(self.basic_sets):
+ self.varphis[index_set, :] = self.embed_set(set)
+
+ def embed_set(self, S):
+ return self.packing.integral(S).view(1, -1)
+
+ def load_data(self, data):
+ """
+
+ :param data: (S, no_events, out_of, duration, time)
+ :return:
+ """
+ self.data = []
+ self.phis = None
+ for datapoint in data:
+ self.add_data_point(datapoint)
+
+ def add_data_point(self, datapoint):
+
+ if self.data is None:
+ self.load_data([datapoint])
+ else:
+
+ # add
+ self.data.append(datapoint)
+
+ S, count, pool, duration, time = datapoint
+ phi = self.embed_set(S)
+
+ if self.phis is not None:
+ self.counts = torch.cat((self.counts, torch.tensor([count])))
+ self.pool = torch.cat((self.pool, torch.tensor([pool])))
+ self.phis = torch.cat((self.phis, phi), dim=0)
+ else:
+ self.counts = torch.tensor([count]).double()
+ self.pool = torch.tensor([pool]).double()
+ self.phis = phi
+
+ def nabla(self, theta):
+ # defining objective
+ if self.data is not None:
+ return (
+ -torch.einsum(
+ "i,ij,i->j",
+ self.counts,
+ self.phis,
+ 1.0 / (self.phis @ theta).view(-1),
+ ).view(-1, 1)
+ + torch.einsum(
+ "i,ij,i->j",
+ self.pool - self.counts,
+ self.phis,
+ 1.0 / (1.0 - self.phis @ theta).view(-1),
+ ).view(-1, 1)
+ + self.s * theta.view(-1, 1)
+ )
+ else:
+ return self.s * theta.view(-1, 1)
+
+ def sample(self, steps=10, verbose=False):
+ """
+ Langevin dynamics to sample from constrained GP prior
+
+ :param steps: Number of iterations
+ :return:
+ """
+ l = np.zeros(shape=(len(self.basic_sets)))
+ u = np.zeros(shape=(len(self.basic_sets))) + 1.0
+
+ # prox operator
+ def prox(x):
+ res = solve_qp(
+ np.eye(self.get_m()),
+ x.numpy().reshape(-1),
+ C=np.vstack((-self.varphis.numpy(), self.varphis.numpy())).T,
+ b=np.hstack((-u, l)),
+ factorized=True,
+ )
+ return torch.from_numpy(res[0]).view(-1, 1)
+
+ # initialization
+ if self.rate is not None:
+ theta = self.rate.view(-1, 1)
+ else:
+ theta = (
+ self.b
+ + 0.05
+ * torch.rand(
+ size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False
+ ).view(-1, 1)
+ ** 2
+ )
+
+ # loop
+ for k in range(steps):
+ w = torch.randn(size=(self.get_m(), 1)).double()
+
+ # calculate proper step-size
+ W = self.construct_covariance(theta=theta)
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3
+ )
+ )
+ eta = 0.5 / L
+
+ theta = (
+ 0.5 * theta
+ - eta * self.nabla(theta)
+ + 0.5 * prox(theta)
+ + np.sqrt(2 * eta) * w
+ )
+ if verbose == True:
+ print("Iter:", k, theta.T)
+
+ self.sampled_theta = prox(theta)
+
+ def construct_covariance(self, theta):
+ D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2)
+ D2 = torch.diag(
+ (self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2
+ )
+
+ W = (
+ self.phis.T @ (D1 + D2) @ self.phis
+ + self.s * torch.eye(self.get_m()).double()
+ )
+ return W
+
+ def construct_confidence(self):
+ self.W = self.construct_covariance(self.rate)
+ self.invW = torch.pinverse(self.W)
+
+ def construct_likelihood_ratio(self, method="full"):
+ # for data
+ phis = self.phis.numpy()
+ counts = self.counts.numpy()
+
+ # for constraints
+ varphis = self.varphis.numpy()
+
+ # current fit
+ mean_theta = self.rate.numpy()
+
+ if method == "split":
+ pass
+ elif method == "full":
+ self.likelihood = (
+ -counts @ np.log(phis @ mean_theta)
+ - (1 - counts) @ np.log(1 - phis @ mean_theta)
+ + self.s * 0.5 * np.sum(mean_theta - 0.5) ** 2
+ )
+ elif method == "cv":
+ pass
+
+ def ucb(self, S, beta=8.0, delta=0.1):
+ if self.uncertainty == "laplace":
+ ucb = (
+ self.embed_set(S) @ self.rate
+ + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
+ )
+ return torch.minimum(torch.tensor([[1.0]]).double(), ucb)
+
+ elif self.uncertainty == "ratio":
+ phi = self.embed_set(S)
+ phis = self.phis.numpy()
+ varphis = self.varphis.numpy()
+
+ counts = self.counts.numpy()
+ theta = cp.Variable(self.get_m())
+
+ objective = cp.Maximize(phi @ theta)
+
+ v = np.log(1.0 / delta) + self.likelihood
+ constraints = [
+ -counts @ cp.log(phis @ theta)
+ - (1 - counts) @ cp.log(1 - phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta - 0.5)
+ <= v
+ ]
+
+ # every set has probability between 0-1.
+ constraints.append(varphis @ theta >= np.zeros(varphis.shape[0]))
+ constraints.append(varphis @ theta <= np.ones(varphis.shape[0]))
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-6,
+ },
+ )
+ return torch.minimum(
+ torch.tensor([[1.0]]).double(), torch.from_numpy(np.array(prob.value))
+ )
+
+ def lcb(self, S, beta=8.0, delta=0.1):
+ if self.uncertainty == "laplace":
+ lcb = (
+ self.embed_set(S) @ self.rate
+ - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
+ )
+ return torch.maximum(torch.tensor([[0.0]]).double(), lcb)
+
+ elif self.uncertainty == "ratio":
+ phi = self.embed_set(S)
+ phis = self.phis.numpy()
+ varphis = self.varphis.numpy()
+
+ counts = self.counts.numpy()
+ theta = cp.Variable(self.get_m())
+
+ objective = cp.Minimize(phi @ theta)
+ v = np.log(1.0 / delta) + self.likelihood
+ constraints = [
+ -counts @ cp.log(phis @ theta)
+ - (1 - counts) @ cp.log(1 - phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta - 0.5)
+ <= v
+ ]
+
+ # every set has probability between 0-1.
+ constraints.append(varphis @ theta >= np.zeros(varphis.shape[0]))
+ constraints.append(varphis @ theta <= np.ones(varphis.shape[0]))
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-6,
+ },
+ )
+
+ return torch.maximum(
+ torch.tensor([[0.0]]).double(), torch.from_numpy(np.array(prob.value))
+ )
+
+ def fit_gp(self, threads=4):
+
+ phis = self.phis.numpy()
+ varphis = self.varphis.numpy()
+
+ counts = self.counts.numpy()
+ theta = cp.Variable(self.get_m())
+ objective = cp.Minimize(
+ -counts @ cp.log(phis @ theta)
+ - (1 - counts) @ cp.log(1 - phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta - 0.5)
+ )
+
+ # probability constraints
+ constraints = []
+
+ # every set has probability between 0-1.
+ constraints.append(varphis @ theta >= np.zeros(varphis.shape[0]))
+ constraints.append(varphis @ theta <= np.ones(varphis.shape[0]))
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-6,
+ },
+ )
+ self.rate = torch.from_numpy(theta.value)
+ return self.rate
class LinkBernoulliRateEstimator(BernoulliRateEstimator):
- def construct_covariance(self, theta):
- D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2)
- D2 = torch.diag((self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2)
-
- W = self.phis.T @ (D1 + D2) @ self.phis + self.s * torch.eye(self.get_m()).double()
- return W
-
- def log_marginal(self, kernel, X):
- func = kernel.get_kernel()
- K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.s * self.s
-
- L = torch.linalg.cholesky(K)
- logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L)))
- alpha = torch.solve(self.y, K)[0]
- logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet
- logprob = -logprob
- return logprob
-
- def construct_likelihood_ratio(self, method='full'):
- # for data
- phis = self.phis.numpy()
- counts = self.counts.numpy()
-
- # for constraints
- varphis = self.varphis.numpy()
-
- # current fit
- mean_theta = self.rate.numpy()
-
- if method == 'split':
- pass
- elif method == 'full':
- self.likelihood = - counts @ phis @ mean_theta + np.log(1 + np.exp(phis @ mean_theta)) \
- + self.s * 0.5 * np.sum(mean_theta) ** 2
- elif method == 'cv':
- pass
-
- def fit_gp(self, threads=4):
- phis = self.phis.numpy()
-
- counts = self.counts.numpy()
- theta = cp.Variable(self.get_m())
- objective = cp.Minimize(-cp.sum(cp.multiply(counts, phis @ theta)) + cp.sum(cp.logistic(phis @ theta))
- + self.s * 0.5 * cp.sum_squares(theta))
-
- # probability constraints
- constraints = []
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
- self.rate = torch.from_numpy(theta.value)
- return self.rate
-
- def link(self, x):
- return 1. / (1. + torch.exp(-x))
-
- def mean_set(self, S):
- return self.link(self.embed_set(S) @ self.rate)
-
- def ucb(self, S, beta=8., delta=0.1):
- if self.uncertainty == "laplace":
- ucb = self.embed_set(S) @ self.rate + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
- return self.link(ucb)
- elif self.uncertainty == "martingale":
- phi = self.embed_set(S)
- hat_theta = self.rate.numpy()
-
- def constraint_value_gradient(theta, beta):
- y = cp.Variable(self.get_m())
- v = (theta - hat_theta)
- objective2 = cp.Maximize(y @ v - cp.sum(cp.abs(self.phis @ y)) - beta)
-
- prob = cp.Problem(objective2)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-4})
- print(prob.status)
- return prob.value, y.value
-
- beta = 2.
- iters = 10
- gamma = 0.00000001
- theta = hat_theta
- print(theta)
-
- for k in range(iters):
- print("Iter:", k)
- d = cp.Variable(self.get_m())
- objective = cp.Minimize(phi @ d.T)
- val, nabla = constraint_value_gradient(theta, beta)
- constraints = [val + nabla.reshape(1, -1) @ d <= 0., cp.sum_squares(d) <= gamma]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
- theta = theta + d.value
- print(theta)
-
- return phi @ theta
-
- elif self.uncertainty == "ratio":
- phi = self.embed_set(S)
- phis = self.phis.numpy()
-
- counts = self.counts.numpy()
- theta = cp.Variable(self.get_m())
-
- objective = cp.Maximize(phi @ theta)
- v = np.log(1. / delta) + self.likelihood
- constraints = [-cp.sum(cp.multiply(counts, phis @ theta)) + cp.sum(cp.logistic(phis @ theta))
- + self.s * 0.5 * cp.sum_squares(theta) <= v]
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
- return self.link(phi @ theta.value)
-
- def lcb(self, S, beta=8., delta=0.1):
- if self.uncertainty == "laplace":
- lcb = self.embed_set(S) @ self.rate - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
- return self.link(lcb)
- elif self.uncertainty == "ratio":
- phi = self.embed_set(S)
- phis = self.phis.numpy()
-
- counts = self.counts.numpy()
- theta = cp.Variable(self.get_m())
-
- objective = cp.Minimize(phi @ theta)
- v = np.log(1. / delta) + self.likelihood
- constraints = [-cp.sum(cp.multiply(counts, phis @ theta)) + cp.sum(cp.logistic(phis @ theta))
- + self.s * 0.5 * cp.sum_squares(theta) <= v]
-
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
- return self.link(phi @ theta.value)
-
- def nabla(self, theta):
- if self.data is not None:
- return -torch.einsum('i,ij->j', self.counts, self.phis).view(-1, 1) + \
- torch.einsum('i,ij,i->j', self.pool, self.phis,
- 1. / (1. + torch.exp(self.phis @ theta).view(-1))).view(-1, 1) \
- + self.s * theta.view(-1, 1)
- else:
- return self.s * theta.view(-1, 1)
-
- def construct_covariance(self, theta):
- W = torch.eye(self.get_m()).double() * self.s + torch.einsum('i,ij,ik->jk',
- torch.exp(self.phis @ theta).view(-1) / (
- 1 + torch.exp(self.phis @ theta)).view(
- -1) ** 2, self.phis, self.phis)
- return W
+ def construct_covariance(self, theta):
+ D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2)
+ D2 = torch.diag(
+ (self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2
+ )
+
+ W = (
+ self.phis.T @ (D1 + D2) @ self.phis
+ + self.s * torch.eye(self.get_m()).double()
+ )
+ return W
+
+ def log_marginal(self, kernel, X):
+ func = kernel.get_kernel()
+ K = (
+ func(self.x, self.x, **X)
+ + torch.eye(self.n, dtype=torch.float64) * self.s * self.s
+ )
+
+ L = torch.linalg.cholesky(K)
+ logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L)))
+ alpha = torch.solve(self.y, K)[0]
+ logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet
+ logprob = -logprob
+ return logprob
+
+ def construct_likelihood_ratio(self, method="full"):
+ # for data
+ phis = self.phis.numpy()
+ counts = self.counts.numpy()
+
+ # for constraints
+ varphis = self.varphis.numpy()
+
+ # current fit
+ mean_theta = self.rate.numpy()
+
+ if method == "split":
+ pass
+ elif method == "full":
+ self.likelihood = (
+ -counts @ phis @ mean_theta
+ + np.log(1 + np.exp(phis @ mean_theta))
+ + self.s * 0.5 * np.sum(mean_theta) ** 2
+ )
+ elif method == "cv":
+ pass
+
+ def fit_gp(self, threads=4):
+ phis = self.phis.numpy()
+
+ counts = self.counts.numpy()
+ theta = cp.Variable(self.get_m())
+ objective = cp.Minimize(
+ -cp.sum(cp.multiply(counts, phis @ theta))
+ + cp.sum(cp.logistic(phis @ theta))
+ + self.s * 0.5 * cp.sum_squares(theta)
+ )
+
+ # probability constraints
+ constraints = []
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-6,
+ },
+ )
+ self.rate = torch.from_numpy(theta.value)
+ return self.rate
+
+ def link(self, x):
+ return 1.0 / (1.0 + torch.exp(-x))
+
+ def mean_set(self, S):
+ return self.link(self.embed_set(S) @ self.rate)
+
+ def ucb(self, S, beta=8.0, delta=0.1):
+ if self.uncertainty == "laplace":
+ ucb = (
+ self.embed_set(S) @ self.rate
+ + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
+ )
+ return self.link(ucb)
+ elif self.uncertainty == "martingale":
+ phi = self.embed_set(S)
+ hat_theta = self.rate.numpy()
+
+ def constraint_value_gradient(theta, beta):
+ y = cp.Variable(self.get_m())
+ v = theta - hat_theta
+ objective2 = cp.Maximize(y @ v - cp.sum(cp.abs(self.phis @ y)) - beta)
+
+ prob = cp.Problem(objective2)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-4,
+ },
+ )
+ print(prob.status)
+ return prob.value, y.value
+
+ beta = 2.0
+ iters = 10
+ gamma = 0.00000001
+ theta = hat_theta
+ print(theta)
+
+ for k in range(iters):
+ print("Iter:", k)
+ d = cp.Variable(self.get_m())
+ objective = cp.Minimize(phi @ d.T)
+ val, nabla = constraint_value_gradient(theta, beta)
+ constraints = [
+ val + nabla.reshape(1, -1) @ d <= 0.0,
+ cp.sum_squares(d) <= gamma,
+ ]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
+ theta = theta + d.value
+ print(theta)
+
+ return phi @ theta
+
+ elif self.uncertainty == "ratio":
+ phi = self.embed_set(S)
+ phis = self.phis.numpy()
+
+ counts = self.counts.numpy()
+ theta = cp.Variable(self.get_m())
+
+ objective = cp.Maximize(phi @ theta)
+ v = np.log(1.0 / delta) + self.likelihood
+ constraints = [
+ -cp.sum(cp.multiply(counts, phis @ theta))
+ + cp.sum(cp.logistic(phis @ theta))
+ + self.s * 0.5 * cp.sum_squares(theta)
+ <= v
+ ]
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-6,
+ },
+ )
+ return self.link(phi @ theta.value)
+
+ def lcb(self, S, beta=8.0, delta=0.1):
+ if self.uncertainty == "laplace":
+ lcb = (
+ self.embed_set(S) @ self.rate
+ - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T
+ )
+ return self.link(lcb)
+ elif self.uncertainty == "ratio":
+ phi = self.embed_set(S)
+ phis = self.phis.numpy()
+
+ counts = self.counts.numpy()
+ theta = cp.Variable(self.get_m())
+
+ objective = cp.Minimize(phi @ theta)
+ v = np.log(1.0 / delta) + self.likelihood
+ constraints = [
+ -cp.sum(cp.multiply(counts, phis @ theta))
+ + cp.sum(cp.logistic(phis @ theta))
+ + self.s * 0.5 * cp.sum_squares(theta)
+ <= v
+ ]
+
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-6,
+ },
+ )
+ return self.link(phi @ theta.value)
+
+ def nabla(self, theta):
+ if self.data is not None:
+ return (
+ -torch.einsum("i,ij->j", self.counts, self.phis).view(-1, 1)
+ + torch.einsum(
+ "i,ij,i->j",
+ self.pool,
+ self.phis,
+ 1.0 / (1.0 + torch.exp(self.phis @ theta).view(-1)),
+ ).view(-1, 1)
+ + self.s * theta.view(-1, 1)
+ )
+ else:
+ return self.s * theta.view(-1, 1)
+
+ def construct_covariance(self, theta):
+ W = torch.eye(self.get_m()).double() * self.s + torch.einsum(
+ "i,ij,ik->jk",
+ torch.exp(self.phis @ theta).view(-1)
+ / (1 + torch.exp(self.phis @ theta)).view(-1) ** 2,
+ self.phis,
+ self.phis,
+ )
+ return W
if __name__ == "__main__":
- import matplotlib.pyplot as plt
- from stpy.point_processes.binomial.binomial_process import BernoulliPointProcess
-
- d = 1
- gamma = 0.1
- n = 64
- m = 128
- levels = 7
- k = KernelFunction(gamma=gamma, kappa=1.)
-
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- actions = hierarchical_structure.get_sets_level(levels)
- dummy = torch.zeros(size=(1, d)).double()
-
- estimator = BernoulliRateEstimator(hierarchical_structure, m=64, kernel_object=k, s=0.001, uncertainty='ratio')
- estimator_link = LinkBernoulliRateEstimator(hierarchical_structure, m=64, kernel_object=k, s=0.001,
- uncertainty="ratio")
-
- rate = lambda S: np.sin(np.pi * S.return_discretization(n=1) ** 2) * 0.5
- process = BernoulliPointProcess(hierarchical_structure.get_sets_level(levels), rate=rate)
-
- N = 100
-
- data = []
- for i in range(N):
- data.append(process.sample(actions[torch.randint(0, len(actions), size=(1, 1))]))
-
- estimator.load_data(data)
- estimator_link.load_data(data)
-
- estimator.fit_gp()
- estimator_link.fit_gp()
-
- # plot observations
- for datapoint in data:
- S, v, _, _, _ = datapoint
- x = S.return_discretization(n)
- if v == 1:
- plt.plot(x, x * 0, 'ko')
- else:
- plt.plot(x, x * 0, 'ro')
-
- xtest = hierarchical_structure.top_node.return_discretization(64)
- plt.plot(xtest, estimator.mean_rate(hierarchical_structure.top_node, 64) * actions[0].volume(), 'tab:blue')
-
- samples = 0
- for i in range(samples):
- estimator.sample(steps=100, verbose=False)
- plt.plot(xtest, estimator.sample_path(hierarchical_structure.top_node, 64) * actions[0].volume(), 'g--')
-
- estimator.construct_confidence()
- estimator.construct_likelihood_ratio()
-
- estimator_link.construct_confidence()
- estimator_link.construct_likelihood_ratio()
- # plot function
- for action in actions:
- val = estimator.mean_set(action)
- val_link = estimator_link.mean_set(action)
-
- ucb, lcb = float(estimator.ucb(action)), float(estimator.lcb(action))
- ucb_link, lcb_link = float(estimator_link.ucb(action, delta=0.5)), float(estimator_link.lcb(action, delta=0.5))
- x = action.return_discretization(64)
- plt.plot(x, x * 0 + rate(action), color='tab:red')
- x = x.view(-1)
-
- plt.plot(x, x * 0 + val, color='tab:blue', linestyle='--')
- plt.plot(x, x * 0 + val_link, color='tab:pink', linestyle='--')
- plt.fill_between(x, x * 0 + lcb, x * 0 + ucb, color='tab:blue', alpha=0.2)
- plt.fill_between(x, x * 0 + lcb_link, x * 0 + ucb_link, color='tab:pink', alpha=0.2)
-
- plt.show()
+ import matplotlib.pyplot as plt
+ from stpy.point_processes.binomial.binomial_process import BernoulliPointProcess
+
+ d = 1
+ gamma = 0.1
+ n = 64
+ m = 128
+ levels = 7
+ k = KernelFunction(gamma=gamma, kappa=1.0)
+
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ actions = hierarchical_structure.get_sets_level(levels)
+ dummy = torch.zeros(size=(1, d)).double()
+
+ estimator = BernoulliRateEstimator(
+ hierarchical_structure, m=64, kernel_object=k, s=0.001, uncertainty="ratio"
+ )
+ estimator_link = LinkBernoulliRateEstimator(
+ hierarchical_structure, m=64, kernel_object=k, s=0.001, uncertainty="ratio"
+ )
+
+ rate = lambda S: np.sin(np.pi * S.return_discretization(n=1) ** 2) * 0.5
+ process = BernoulliPointProcess(
+ hierarchical_structure.get_sets_level(levels), rate=rate
+ )
+
+ N = 100
+
+ data = []
+ for i in range(N):
+ data.append(
+ process.sample(actions[torch.randint(0, len(actions), size=(1, 1))])
+ )
+
+ estimator.load_data(data)
+ estimator_link.load_data(data)
+
+ estimator.fit_gp()
+ estimator_link.fit_gp()
+
+ # plot observations
+ for datapoint in data:
+ S, v, _, _, _ = datapoint
+ x = S.return_discretization(n)
+ if v == 1:
+ plt.plot(x, x * 0, "ko")
+ else:
+ plt.plot(x, x * 0, "ro")
+
+ xtest = hierarchical_structure.top_node.return_discretization(64)
+ plt.plot(
+ xtest,
+ estimator.mean_rate(hierarchical_structure.top_node, 64) * actions[0].volume(),
+ "tab:blue",
+ )
+
+ samples = 0
+ for i in range(samples):
+ estimator.sample(steps=100, verbose=False)
+ plt.plot(
+ xtest,
+ estimator.sample_path(hierarchical_structure.top_node, 64)
+ * actions[0].volume(),
+ "g--",
+ )
+
+ estimator.construct_confidence()
+ estimator.construct_likelihood_ratio()
+
+ estimator_link.construct_confidence()
+ estimator_link.construct_likelihood_ratio()
+ # plot function
+ for action in actions:
+ val = estimator.mean_set(action)
+ val_link = estimator_link.mean_set(action)
+
+ ucb, lcb = float(estimator.ucb(action)), float(estimator.lcb(action))
+ ucb_link, lcb_link = float(estimator_link.ucb(action, delta=0.5)), float(
+ estimator_link.lcb(action, delta=0.5)
+ )
+ x = action.return_discretization(64)
+ plt.plot(x, x * 0 + rate(action), color="tab:red")
+ x = x.view(-1)
+
+ plt.plot(x, x * 0 + val, color="tab:blue", linestyle="--")
+ plt.plot(x, x * 0 + val_link, color="tab:pink", linestyle="--")
+ plt.fill_between(x, x * 0 + lcb, x * 0 + ucb, color="tab:blue", alpha=0.2)
+ plt.fill_between(
+ x, x * 0 + lcb_link, x * 0 + ucb_link, color="tab:pink", alpha=0.2
+ )
+
+ plt.show()
diff --git a/stpy/point_processes/link_fun_rate_estimator.py b/stpy/point_processes/link_fun_rate_estimator.py
index 20cf463..ee51e4e 100644
--- a/stpy/point_processes/link_fun_rate_estimator.py
+++ b/stpy/point_processes/link_fun_rate_estimator.py
@@ -1,3 +1,4 @@
+from typing import List
import numpy as np
import torch
import scipy
@@ -8,460 +9,614 @@
import matplotlib.pyplot as plt
from stpy.embeddings.embedding import HermiteEmbedding
import scipy.integrate as integrate
-from stpy.helpers.ellipsoid_algorithms import maximize_quadratic_on_ellipse, minimize_quadratic_on_ellipse
-from stpy.helpers.ellipsoid_algorithms import maximize_matrix_quadratic_on_ellipse, minimize_matrix_quadratic_on_ellipse
+from stpy.helpers.ellipsoid_algorithms import (
+ maximize_quadratic_on_ellipse,
+ minimize_quadratic_on_ellipse,
+)
+from stpy.helpers.ellipsoid_algorithms import (
+ maximize_matrix_quadratic_on_ellipse,
+ minimize_matrix_quadratic_on_ellipse,
+)
from stpy.point_processes.poisson import PoissonPointProcess
-from stpy.point_processes.poisson_rate_estimator import PositiveRateEstimator
+from stpy.point_processes.poisson_rate_estimator import PoissonRateEstimator
from stpy.borel_set import BorelSet, HierarchicalBorelSets
from stpy.kernels import KernelFunction
## implement loading data
-class PermanentalProcessRateEstimator(PositiveRateEstimator):
- def __init__(self, *args, **kwargs):
- super().__init__(*args,**kwargs)
-
- self.integration = "fixed_quad"
- self.product_integrals = {}
- self.varLambdas = torch.zeros(size=(len(self.basic_sets), self.get_m(),self.get_m())).double()
- self.opt = 'cvxpy'
- if self.feedback == "count-record" and self.estimator=="least-sq":
- print ("precomputing-integrals:")
- for index_set, set in enumerate(self.basic_sets):
- print (index_set,"/",len(self.basic_sets))
- self.varLambdas[index_set, :] = self.product_integral(set)
- self.variances[index_set] = set.volume() * self.B
-
-
- def product_integral(self,S):
-
- if S in self.product_integrals.keys():
- return self.product_integrals[S]
- else:
-
- if "product_integral" in dir(self.packing):
- Psi = self.packing.product_integral(S)
- self.product_integrals[S] = Psi
- return Psi
-
- elif self.integration == "vec_quad":
-
- if S.d == 2:
- #Psi = torch.zeros(size=(self.get_m(), self.get_m())).double()
- F = lambda x: (self.packing.embed(x).view(-1, 1) @\
- self.packing.embed(x).view(1, -1)).view(-1)
- integrand = lambda x, y: F(torch.Tensor([x, y]).view(1, 2).double()).numpy()
-
- val = quadvec2(integrand,float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- float(S.bounds[1, 0]), float(S.bounds[1, 1]),limit = 10,epsrel = 10e-3, epsabs = 10e-3, quadrature = 'gk15')
- Psi = torch.from_numpy(val).view((self.get_m(), self.get_m()))
-
- elif self.integration == "fixed_quad":
-
- if S.d ==1:
- weights, nodes = S.return_legendre_discretization(n=128)
- Z = self.packing.embed(nodes)
- M = torch.einsum('ij,ik->ijk', Z, Z)
- Psi = torch.einsum('i,ijk->jk', weights, M)
-
- if S.d ==2:
- weights, nodes = S.return_legendre_discretization(n = 50)
- Z = self.packing.embed(nodes)
- M = torch.einsum('ij,ik->ijk',Z,Z)
- Psi = torch.einsum('i,ijk->jk',weights,M)
-
- else:
- Psi = torch.zeros(size = (self.get_m(),self.get_m())).double()
- for i in range(self.get_m()):
- for j in range(self.get_m()):
-
- if S.d == 1:
- F_ij = lambda x: (self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[i] *
- self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[
- j]).numpy()
- val, status = integrate.quad(F_ij,float(S.bounds[0,0]), float(S.bounds[0,1]))
-
-
- elif S.d == 2:
- F_ij = lambda x: self.packing.embed(x).view(-1)[i] *self.packing.embed(x).view(-1)[j]
- integrand = lambda x, y: F_ij(torch.Tensor([x, y]).view(1, 2).double()).numpy()
- val,status = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- lambda x: float(S.bounds[1, 0]),
- lambda x: float(S.bounds[1, 1]),epsabs=1.49e-03, epsrel=1.49e-03)
- else:
- raise NotImplementedError("Integration above d>2 not implemented.")
-
- Psi[i,j] = val
- print(i, j, val)
-
- self.product_integrals[S] = Psi
- return Psi
-
- def get_constraints(self):
- s = self.get_m()
- l = np.full(s, self.b)
- u = np.full(s, self.B)
- Lambda = np.identity(s)
- return (l,Lambda,u)
-
- def cov(self, inverse=False):
- s = self.get_m()
-
- if inverse==False:
- return torch.zeros(size = (s,s)).double()
- else:
- return torch.zeros(size=(s, s)).double(),torch.zeros(size=(s, s)).double()
-
-
- def sample(self, verbose = False, steps = 10, stepsize = None):
-
- if self.data is None:
- self.sampled_theta = torch.zeros(self.get_m()).double().view(-1,1)
- return None
-
- if self.observations is not None:
- observations = self.observations.double()
- sumLambda = self.sumLambda.double()
- nabla = lambda theta: -torch.sum(torch.diag(1. /(observations@theta).view(-1)) @ observations) \
- + (sumLambda.T + sumLambda) @ theta + self.s*theta.view(-1,1)
- else:
- sumLambda = self.sumLambda.double()
- nabla = lambda theta: (sumLambda.T + sumLambda) @ theta + self.s*theta.view(-1,1)
-
- theta = self.rate.view(-1, 1)
-
- W = self.construct_covariance_matrix_laplace()
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3))
- eta = 0.5 / (L + 1)
-
- for k in range(steps):
- W = torch.randn(size=(self.get_m(), 1)).double()
- theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W
- if verbose == True:
- print("Iter:", k, theta.T)
-
- self.sampled_theta = theta
- return None
-
- def sample_value(self, S):
- """
- Given a pre-sampled value evaluate certain portions of the domain S
- :param S:
- :return:
- """
- Z = self.product_integral(S)
- map = self.sampled_theta.T@ Z @self.sampled_theta
- return map
-
-
- def sample_path(self, S, n=128):
- xtest = S.return_discretization(n)
- return (self.packing.embed(xtest) @ self.sampled_theta)**2
-
-
-
-
- def load_data(self,data):
- super().load_data(data, times = False)
- self.sumLambda = torch.zeros(size = (self.get_m(),self.get_m()))
- if len(data) > 1:
- for sample in data:
- (S,obs,dt) = sample
- self.sumLambda += self.product_integral(S) * dt
-
- def add_data_point(self, new_data):
- super().add_data_point(new_data, times = False)
- (S, obs, dt) = new_data
- self.sumLambda += self.product_integral(S) * dt
-
- def penalized_likelihood(self, threads = 4):
- sumLambda = self.sumLambda.numpy()
- if self.observations is not None:
- observations = self.observations.numpy()
- loss = lambda theta: float(-np.sum(np.log( (observations@theta)**2 )) + np.dot(theta, sumLambda@theta) + 0.5*self.s*np.sum(theta**2))
- else:
- loss = lambda theta: float(np.dot(theta, sumLambda @ theta) + 0.5*self.s * np.sum(theta ** 2))
-
- theta = np.random.randn(self.get_m())
- res = minimize(loss, theta, jac=None, method='L-BFGS-B')
- self.rate = torch.from_numpy(res.x)
- return self.rate
-
- def construct_covariance_matrix_laplace(self):
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
-
- if self.feedback == "count-record":
- if self.observations is not None:
- for i in range(self.observations.size()[0]):
- A = self.observations[i, :].view(-1, 1) @ self.observations[i, :].view(1, -1)
- k = np.maximum(torch.dot(self.observations[i, :],self.rate.view(-1)) ** 2,self.b)
- W = W + A / k
- W += 2*self.sumLambda
- else:
- raise AssertionError("Not implemented.")
- return W + torch.eye(self.get_m()).double()*self.s
-
-
- def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.):
-
- phi = self.packing.integral(S)
- map = (phi @ self.rate)
-
- ucb = np.maximum((map + beta*np.sqrt(phi@self.W_inv_approx@phi.T))**2,(map - beta*np.sqrt(phi@self.W_inv_approx@phi.T))**2)
- ucb = np.minimum(ucb,self.B*S.volume()*dt)
- lcb = 0.
-
- return dt*map**2, dt*lcb, dt*ucb
-
- def mean_std_per_action(self,S,W, dt , beta):
- Z = self.product_integral(S)
-
- ucb, _ = maximize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
- lcb, _ = minimize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
-
- map = self.rate.T @ Z @ self.rate
-
- return dt * map, dt * ucb, -lcb * dt
-
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- return (self.packing.embed(xtest) @ self.rate)**2
-
- def mean_rate_latent(self,S,n = 128):
- xtest = S.return_discretization(n)
- return self.packing.embed(xtest) @ self.rate
-
-
- def map_lcb_ucb_approx(self,S,n,beta = 2.0, delta = 0.01):
- xtest = S.return_discretization(n)
- if self.data is None:
- return 0 * xtest[:, 0].view(-1, 1),self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:,0].view(-1,xtest.size()[0])
- self.fit_ellipsoid_approx()
-
- Phi = self.packing.embed(xtest).double()
- map = Phi @ self.rate
- N = Phi.size()[0]
-
- ucb = torch.zeros(size=(N, 1)).double()
- lcb = torch.zeros(size=(N, 1)).double()
-
- for i in range(N):
- x = Phi[i, :].view(-1,1)
- maximum = np.maximum((map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x))**2, (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x))**2)
- ucb[i,0] = np.minimum( maximum ,self.B)
- lcb[i,0] = 0.
- #lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2
- return map**2, lcb, ucb
-
- def map_lcb_ucb(self, S, n, beta = 2.0, delta = 0.01):
- """
- Calculate exact confidence using laplace approximation on a whole set domain
- :param S: set
- :param n: discretization
- :param beta: beta
- :return:
- """
-
- xtest = S.return_discretization(n)
- if self.data is None:
- return self.b+0*xtest[:,0].view(-1,1),self.b+0*xtest[:,0].view(-1,1),self.B+0*xtest[:,0].view(-1,1)
-
- N = xtest.size()[0]
- Phi = self.packing.embed(xtest)
- map = (Phi @ self.rate)**2
-
- if self.uncertainty == "laplace":
- W = self.construct_covariance_matrix_laplace()
- ucb = torch.zeros(size=(N, 1)).double()
- lcb = torch.zeros(size=(N, 1)).double()
-
- for i in range(N):
- x = Phi[i, :]
- ucbi, _ = maximize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
- lcbi, _ = minimize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
- ucb[i, 0] = ucbi
- lcb[i, 0] = lcbi
-
- return map, lcb, ucb
+class PermanentalProcessRateEstimator(PoissonRateEstimator):
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.integration = "fixed_quad"
+ self.product_integrals = {}
+ self.varLambdas = torch.zeros(
+ size=(len(self.basic_sets), self.get_m(), self.get_m())
+ ).double()
+ self.opt = "cvxpy"
+ if self.feedback == "count-record" and self.estimator == "least-sq":
+ print("precomputing-integrals:")
+ for index_set, set in enumerate(self.basic_sets):
+ print(index_set, "/", len(self.basic_sets))
+ self.varLambdas[index_set, :] = self.product_integral(set)
+ self.variances[index_set] = set.volume() * self.B
+
+ def product_integral(self, S):
+
+ if S in self.product_integrals.keys():
+ return self.product_integrals[S]
+ else:
+
+ if "product_integral" in dir(self.packing):
+ Psi = self.packing.product_integral(S)
+ self.product_integrals[S] = Psi
+ return Psi
+
+ elif self.integration == "vec_quad":
+
+ if S.d == 2:
+ # Psi = torch.zeros(size=(self.get_m(), self.get_m())).double()
+ F = lambda x: (
+ self.packing.embed(x).view(-1, 1)
+ @ self.packing.embed(x).view(1, -1)
+ ).view(-1)
+ integrand = lambda x, y: F(
+ torch.tensor([x, y]).view(1, 2).double()
+ ).numpy()
+
+ val = quadvec2(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ float(S.bounds[1, 0]),
+ float(S.bounds[1, 1]),
+ limit=10,
+ epsrel=10e-3,
+ epsabs=10e-3,
+ quadrature="gk15",
+ )
+ Psi = torch.from_numpy(val).view((self.get_m(), self.get_m()))
+
+ elif self.integration == "fixed_quad":
+
+ if S.d == 1:
+ weights, nodes = S.return_legendre_discretization(n=128)
+ Z = self.packing.embed(nodes)
+ M = torch.einsum("ij,ik->ijk", Z, Z)
+ Psi = torch.einsum("i,ijk->jk", weights, M)
+
+ if S.d == 2:
+ weights, nodes = S.return_legendre_discretization(n=50)
+ Z = self.packing.embed(nodes)
+ M = torch.einsum("ij,ik->ijk", Z, Z)
+ Psi = torch.einsum("i,ijk->jk", weights, M)
+
+ else:
+ Psi = torch.zeros(size=(self.get_m(), self.get_m())).double()
+ for i in range(self.get_m()):
+ for j in range(self.get_m()):
+
+ if S.d == 1:
+ F_ij = lambda x: (
+ self.packing.embed(
+ torch.from_numpy(np.array(x)).view(1, -1)
+ ).view(-1)[i]
+ * self.packing.embed(
+ torch.from_numpy(np.array(x)).view(1, -1)
+ ).view(-1)[j]
+ ).numpy()
+ val, status = integrate.quad(
+ F_ij, float(S.bounds[0, 0]), float(S.bounds[0, 1])
+ )
+
+ elif S.d == 2:
+ F_ij = (
+ lambda x: self.packing.embed(x).view(-1)[i]
+ * self.packing.embed(x).view(-1)[j]
+ )
+ integrand = lambda x, y: F_ij(
+ torch.tensor([x, y]).view(1, 2).double()
+ ).numpy()
+ val, status = integrate.dblquad(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ lambda x: float(S.bounds[1, 0]),
+ lambda x: float(S.bounds[1, 1]),
+ epsabs=1.49e-03,
+ epsrel=1.49e-03,
+ )
+ else:
+ raise NotImplementedError(
+ "Integration above d>2 not implemented."
+ )
+
+ Psi[i, j] = val
+ print(i, j, val)
+
+ self.product_integrals[S] = Psi
+ return Psi
+
+ def get_constraints(self):
+ s = self.get_m()
+ l = np.full(s, self.b)
+ u = np.full(s, self.B)
+ Lambda = np.identity(s)
+ return (l, Lambda, u)
+
+ def cov(self, inverse=False):
+ s = self.get_m()
+
+ if inverse == False:
+ return torch.zeros(size=(s, s)).double()
+ else:
+ return torch.zeros(size=(s, s)).double(), torch.zeros(size=(s, s)).double()
+
+ def sample(self, verbose=False, steps=10, stepsize=None):
+
+ if self.data is None:
+ self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1)
+ return None
+
+ if self.observations is not None:
+ observations = self.observations.double()
+ sumLambda = self.sumLambda.double()
+ nabla = (
+ lambda theta: -torch.sum(
+ torch.diag(1.0 / (observations @ theta).view(-1)) @ observations
+ )
+ + (sumLambda.T + sumLambda) @ theta
+ + self.s * theta.view(-1, 1)
+ )
+ else:
+ sumLambda = self.sumLambda.double()
+ nabla = lambda theta: (
+ sumLambda.T + sumLambda
+ ) @ theta + self.s * theta.view(-1, 1)
+
+ theta = self.rate.view(-1, 1)
+
+ W = self.construct_covariance_matrix_laplace()
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3
+ )
+ )
+ eta = 0.5 / (L + 1)
+
+ for k in range(steps):
+ W = torch.randn(size=(self.get_m(), 1)).double()
+ theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W
+ if verbose == True:
+ print("Iter:", k, theta.T)
+
+ self.sampled_theta = theta
+ return None
+
+ def sample_value(self, S):
+ """
+ Given a pre-sampled value evaluate certain portions of the domain S
+ :param S:
+ :return:
+ """
+ Z = self.product_integral(S)
+ map = self.sampled_theta.T @ Z @ self.sampled_theta
+ return map
+
+ def sample_path(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return (self.packing.embed(xtest) @ self.sampled_theta) ** 2
+
+ def load_data(self, data):
+ super().load_data(data, times=False)
+ self.sumLambda = torch.zeros(size=(self.get_m(), self.get_m()))
+ if len(data) > 1:
+ for sample in data:
+ (S, obs, dt) = sample
+ self.sumLambda += self.product_integral(S) * dt
+ else:
+ self.S = data[0][0]
+ self.dt = data[0][2]
+ assert isinstance(self.S, BorelSet)
+
+ def add_data_point(self, new_data):
+ super().add_data_point(new_data, times=False)
+ (S, obs, dt) = new_data
+ self.sumLambda += self.product_integral(S) * dt
+
+ def penalized_likelihood(self, threads=4):
+ sumLambda = self.sumLambda.numpy()
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ loss = lambda theta: float(
+ -np.sum(np.log((observations @ theta) ** 2))
+ + np.dot(theta, sumLambda @ theta)
+ + 0.5 * self.s * np.sum(theta**2)
+ )
+ else:
+ loss = lambda theta: float(
+ np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum(theta**2)
+ )
+
+ theta = np.random.randn(self.get_m())
+ res = minimize(loss, theta, jac=None, method="L-BFGS-B")
+ self.rate = torch.from_numpy(res.x)
+ return self.rate
+
+ def construct_covariance_matrix_laplace(self):
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+
+ if self.feedback == "count-record":
+ if self.observations is not None:
+ for i in range(self.observations.size()[0]):
+ A = self.observations[i, :].view(-1, 1) @ self.observations[
+ i, :
+ ].view(1, -1)
+ k = np.maximum(
+ torch.dot(self.observations[i, :], self.rate.view(-1)) ** 2,
+ self.b,
+ )
+ W = W + A / k
+ W += 2 * self.sumLambda
+ else:
+ raise AssertionError("Not implemented.")
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def map_lcb_ucb_approx_action(self, S, dt=1.0, beta=2.0):
+
+ phi = self.packing.integral(S)
+ map = phi @ self.rate
+
+ ucb = np.maximum(
+ (map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2,
+ (map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2,
+ )
+ ucb = np.minimum(ucb, self.B * S.volume() * dt)
+ lcb = 0.0
+
+ return dt * map**2, dt * lcb, dt * ucb
+
+ def mean_std_per_action(self, S, W, dt, beta):
+ Z = self.product_integral(S)
+
+ ucb, _ = maximize_matrix_quadratic_on_ellipse(
+ Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+ lcb, _ = minimize_matrix_quadratic_on_ellipse(
+ Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+
+ map = self.rate.T @ Z @ self.rate
+
+ return dt * map, dt * ucb, -lcb * dt
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return (self.packing.embed(xtest) @ self.rate) ** 2
+
+ def mean_rate_latent(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return self.packing.embed(xtest) @ self.rate
+
+ def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01):
+ xtest = S.return_discretization(n)
+ if self.data is None:
+ return (
+ 0 * xtest[:, 0].view(-1, 1),
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, xtest.size()[0]),
+ )
+ self.fit_ellipsoid_approx()
+
+ Phi = self.packing.embed(xtest).double()
+ map = Phi @ self.rate
+ N = Phi.size()[0]
+
+ ucb = torch.zeros(size=(N, 1)).double()
+ lcb = torch.zeros(size=(N, 1)).double()
+
+ for i in range(N):
+ x = Phi[i, :].view(-1, 1)
+ maximum = np.maximum(
+ (map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2,
+ (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2,
+ )
+ ucb[i, 0] = np.minimum(maximum, self.B)
+ lcb[i, 0] = 0.0
+ # lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2
+ return map**2, lcb, ucb
+
+ def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01):
+ """
+ Calculate exact confidence using laplace approximation on a whole set domain
+ :param S: set
+ :param n: discretization
+ :param beta: beta
+ :return:
+ """
+
+ xtest = S.return_discretization(n)
+ if self.data is None:
+ return (
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, 1),
+ )
+
+ N = xtest.size()[0]
+ Phi = self.packing.embed(xtest)
+ map = (Phi @ self.rate) ** 2
+
+ if self.uncertainty == "laplace":
+ W = self.construct_covariance_matrix_laplace()
+ ucb = torch.zeros(size=(N, 1)).double()
+ lcb = torch.zeros(size=(N, 1)).double()
+
+ for i in range(N):
+ x = Phi[i, :]
+ ucbi, _ = maximize_quadratic_on_ellipse(
+ x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+ lcbi, _ = minimize_quadratic_on_ellipse(
+ x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+ ucb[i, 0] = ucbi
+ lcb[i, 0] = lcbi
+
+ return map, lcb, ucb
class LogisticGaussProcessRateEstimator(PermanentalProcessRateEstimator):
- def penalized_likelihood(self, threads=4):
- logistic = lambda x: np.log(1 + np.exp(x))
- weights = self.weights.numpy()
- nodes = self.nodes.numpy()
-
- if self.observations is not None:
- observations = self.observations.numpy()
- loss = lambda theta: float(-np.sum(np.log(logistic(observations @ theta))) + np.sum(
- weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2))
- else:
- loss = lambda theta: float(np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2))
-
- theta = np.random.randn(self.get_m())
- res = minimize(loss, theta, jac= None, method='L-BFGS-B',options={'maxcor': 20,'iprint':-1,'maxfun':150000,'maxls': 50})
- self.rate = torch.from_numpy(res.x)
-
- return self.rate
-
- def logistic(self, x):
- return torch.log(1 + torch.exp(x))
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- return self.logistic(self.packing.embed(xtest) @ self.rate)
+ def penalized_likelihood(self, threads=4):
+ logistic = lambda x: np.log(1 + np.exp(x))
+ weights = self.weights.numpy()
+ nodes = self.nodes.numpy()
+
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ loss = lambda theta: float(
+ -np.sum(np.log(logistic(observations @ theta)))
+ + np.sum(weights * logistic(theta @ nodes.T))
+ + self.s * np.sum(theta**2)
+ )
+ else:
+ loss = lambda theta: float(
+ np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta**2)
+ )
+
+ theta = np.random.randn(self.get_m())
+ res = minimize(
+ loss,
+ theta,
+ jac=None,
+ method="L-BFGS-B",
+ options={"maxcor": 20, "iprint": -1, "maxfun": 150000, "maxls": 50},
+ )
+ self.rate = torch.from_numpy(res.x)
+
+ return self.rate
+
+ def logistic(self, x):
+ return torch.log(1 + torch.exp(x))
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return self.logistic(self.packing.embed(xtest) @ self.rate)
class ExpGaussProcessRateEstimator(PermanentalProcessRateEstimator):
+ def penalized_likelihood(self, threads=4):
+ # ONLY WORKS WITH ONE DATASET given by load_data!
+ # Get node function values and weights for Gauss-Legendre quadrature
+ weights, nodes = self.S.return_legendre_discretization(n=50)
+ nodes = nodes.to(torch.get_default_device())
+ weights = weights.cpu().numpy()
+ vals = self.packing.embed(nodes).cpu().numpy()
+
+ if self.observations is not None:
+ observations = self.observations.cpu().numpy()
+ loss = lambda theta: float(
+ -np.sum(observations @ theta)
+ + self.dt * np.sum(weights * np.exp(theta @ vals.T))
+ + self.s * 0.5 * np.sum(theta**2)
+ )
+ else:
+ loss = lambda theta: float(
+ np.sum(weights * np.exp(theta @ nodes.T)) + self.s * np.sum(theta**2)
+ )
+
+ theta = np.zeros(self.get_m())
+ res = minimize(
+ loss,
+ theta,
+ jac=None,
+ method="L-BFGS-B",
+ options={
+ "maxcor": 20,
+ "iprint": -1,
+ "maxfun": 150000,
+ "maxls": 100,
+ "ftol": 1e-12,
+ "eps": 1e-12,
+ "gtol": 1e-8,
+ },
+ )
+ self.rate = torch.tensor(res.x)
+
+ return self.rate
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return torch.exp(self.packing.embed(xtest) @ self.rate)
+
+ def rate_value(self, x, dt=1):
+ phi = self.packing.embed(x) * dt
+
+ if self.rate is not None:
+ map = torch.exp(phi @ self.rate.view(-1, 1))
+ else:
+ print("Rate function not fitted!")
+ map = 0 * phi[:, 0].view(-1, 1) + self.b
+
+ return map
- def penalized_likelihood(self, threads=4):
- weights = self.weights.numpy()
- nodes = self.nodes.numpy()
-
- if self.observations is not None:
- observations = self.observations.numpy()
- loss = lambda theta: float(np.sum(observations @ theta) + np.sum(
- weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2))
- else:
- loss = lambda theta: float(np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2))
-
- theta = np.zeros(self.get_m())
- res = minimize(loss, theta, jac= None, method='L-BFGS-B',options={'maxcor': 20,'iprint':-1,
- 'maxfun':150000,'maxls': 100,
- 'ftol':1e-12,'eps':1e-12,'gtol':1e-8})
- self.rate = torch.from_numpy(res.x)
-
- return self.rate
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- return torch.exp(-self.packing.embed(xtest) @ self.rate)
if __name__ == "__main__":
- torch.manual_seed(2)
- np.random.seed(2)
- d = 1
- gamma = 0.1
- n = 64
- B = 4.
- b = 0.1
-
- process = PoissonPointProcess(d=1, B=B, b=b)
- Sets = []
- levels = 4
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- Sets = hierarchical_structure.get_all_sets()
-
- D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double())
-
- m = 64
- embedding = HermiteEmbedding(m = m, d = 1, gamma = gamma)
- k = KernelFunction(gamma = gamma)
-
- estimator5 = PositiveRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d = d)
-
- estimator4 = PermanentalProcessRateEstimator(process, hierarchical_structure,kernel_object=k, B=B, m=m, d = d)
- #estimator = PermanentalProcessRateEstimator(process, hierarchical_structure,
- # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid")
- #estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
- estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B+1, m=m, d=d, embedding=embedding)
-
- #estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
- estimator2 = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding)
- #estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
- estimator3 = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding)
-
- estimators = [estimator,estimator2,estimator3,estimator4,estimator5]
- names = ['sigmoid','logistic','exp','square','no-link']
- bands = [True,False,False,False,True]
-
-
- estimators = [estimator,estimator5,estimator4]
- names = ['sigmoid','no-link','square']
- bands = [False,False,False]
-
- min_vol, max_vol = estimator.get_min_max()
- dt = 10. / (b * min_vol)
- dt = dt * 2
-
- print("Suggested dt:", dt)
- c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)]
-
- no_sets = len(Sets)
-
-
- # no_samples = 3
- # data = []
- # samples = []
- # repeats = 2
- #
- # for i in range(no_samples):
- # j = np.random.randint(0, no_sets, 1)
- # S = Sets[j[0]]
- # for _ in range(repeats):
- # sample = process.sample_discretized(S, dt)
- # samples.append(sample)
- # data.append((S, sample, dt))
- #
- # sample_D = process.sample_discretized(D, dt)
- # samples.append(sample_D)
- # no_samples = repeats * no_samples + 1
- # data.append((D, sample_D, dt))
-
-
- data_single = []
- basic_sets = hierarchical_structure.get_sets_level(levels)
- samples = []
-
- for set in basic_sets:
- sample = process.sample_discretized(set,dt)
- data_single.append((set,sample,dt))
- samples.append(sample)
- data = data_single
-
- # sample_D = torch.cat(samples)
- # data = [(D,sample_D,dt)]
-
- # data2 = []
- # samples = []
- # for set in basic_sets:
- # sample = process.sample_discretized(set,dt*2)
- # data2.append((set,sample,dt*2))
- # samples.append(sample)
- #
- # sample_D_2 = torch.cat(samples)
- # data = [(D, sample_D_2, dt*2)]
- #
- # data = data + data2
-
- for estimator,name,band in zip(estimators,names,bands):
- estimator.load_data(data)
-
- xtest = D.return_discretization(n=n)
-
- # likelihood based
- estimator.fit_gp()
- rate_mean = estimator.mean_rate(D,n = n)
- p = plt.plot(xtest, rate_mean, label='likelihood: '+name)
-
- if band == True:
- _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.)
- plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4,
- color=p[0].get_color(), label=name)
-
-
-
- for j in range(len(samples)):
- if samples[j] is not None:
- plt.plot(samples[j], samples[j] * 0, 'o', color=c[j])
-
- # for action in Sets:
- # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.)
- # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2)
- # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green")
- process.visualize(D, samples=0, n=n, dt=1.)
- plt.show()
+ torch.manual_seed(2)
+ np.random.seed(2)
+ d = 1
+ gamma = 0.1
+ n = 64
+ B = 4.0
+ b = 0.1
+
+ process = PoissonPointProcess(d=1, B=B, b=b)
+ Sets = []
+ levels = 4
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ Sets = hierarchical_structure.get_all_sets()
+
+ D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double())
+
+ m = 64
+ embedding = HermiteEmbedding(m=m, d=1, gamma=gamma)
+ k = KernelFunction(gamma=gamma)
+
+ estimator5 = PositiveRateEstimator(
+ process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d
+ )
+
+ estimator4 = PermanentalProcessRateEstimator(
+ process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d
+ )
+ # estimator = PermanentalProcessRateEstimator(process, hierarchical_structure,
+ # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid")
+ # estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
+ estimator = LogGaussProcessRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B + 1,
+ m=m,
+ d=d,
+ embedding=embedding,
+ )
+
+ # estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
+ estimator2 = LogisticGaussProcessRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ embedding=embedding,
+ )
+ # estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
+ estimator3 = ExpGaussProcessRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ embedding=embedding,
+ )
+
+ estimators = [estimator, estimator2, estimator3, estimator4, estimator5]
+ names = ["sigmoid", "logistic", "exp", "square", "no-link"]
+ bands = [True, False, False, False, True]
+
+ estimators = [estimator, estimator5, estimator4]
+ names = ["sigmoid", "no-link", "square"]
+ bands = [False, False, False]
+
+ min_vol, max_vol = estimator.get_min_max()
+ dt = 10.0 / (b * min_vol)
+ dt = dt * 2
+
+ print("Suggested dt:", dt)
+ c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [
+ "k" for i in range(500)
+ ]
+
+ no_sets = len(Sets)
+
+ # no_samples = 3
+ # data = []
+ # samples = []
+ # repeats = 2
+ #
+ # for i in range(no_samples):
+ # j = np.random.randint(0, no_sets, 1)
+ # S = Sets[j[0]]
+ # for _ in range(repeats):
+ # sample = process.sample_discretized(S, dt)
+ # samples.append(sample)
+ # data.append((S, sample, dt))
+ #
+ # sample_D = process.sample_discretized(D, dt)
+ # samples.append(sample_D)
+ # no_samples = repeats * no_samples + 1
+ # data.append((D, sample_D, dt))
+
+ data_single = []
+ basic_sets = hierarchical_structure.get_sets_level(levels)
+ samples = []
+
+ for set in basic_sets:
+ sample = process.sample_discretized(set, dt)
+ data_single.append((set, sample, dt))
+ samples.append(sample)
+ data = data_single
+
+ # sample_D = torch.cat(samples)
+ # data = [(D,sample_D,dt)]
+
+ # data2 = []
+ # samples = []
+ # for set in basic_sets:
+ # sample = process.sample_discretized(set,dt*2)
+ # data2.append((set,sample,dt*2))
+ # samples.append(sample)
+ #
+ # sample_D_2 = torch.cat(samples)
+ # data = [(D, sample_D_2, dt*2)]
+ #
+ # data = data + data2
+
+ for estimator, name, band in zip(estimators, names, bands):
+ estimator.load_data(data)
+
+ xtest = D.return_discretization(n=n)
+
+ # likelihood based
+ estimator.fit_gp()
+ rate_mean = estimator.mean_rate(D, n=n)
+ p = plt.plot(xtest, rate_mean, label="likelihood: " + name)
+
+ if band == True:
+ _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.0)
+ plt.fill_between(
+ xtest.numpy().flatten(),
+ lcb.numpy().flatten(),
+ ucb.numpy().flatten(),
+ alpha=0.4,
+ color=p[0].get_color(),
+ label=name,
+ )
+
+ for j in range(len(samples)):
+ if samples[j] is not None:
+ plt.plot(samples[j], samples[j] * 0, "o", color=c[j])
+
+ # for action in Sets:
+ # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.)
+ # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2)
+ # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green")
+ process.visualize(D, samples=0, n=n, dt=1.0)
+ plt.show()
diff --git a/stpy/point_processes/log_cox_process.py b/stpy/point_processes/log_cox_process.py
new file mode 100644
index 0000000..03e1780
--- /dev/null
+++ b/stpy/point_processes/log_cox_process.py
@@ -0,0 +1,247 @@
+from functools import partial
+from typing import List
+import numpy as np
+import scipy
+from stpy.borel_set import BorelSet
+from stpy.kernels import KernelFunction
+from tqdm import tqdm
+from autograd_minimize import minimize
+import torch
+
+device = torch.get_default_device()
+
+
+def sqrt(matrix: torch.Tensor) -> torch.Tensor:
+ return torch.from_numpy(
+ np.real(scipy.linalg.sqrtm(matrix.cpu().numpy() + 1e-5))
+ ).to(device)
+
+
+class LogCoxProcess:
+ def __init__(self, kernel_object: KernelFunction, integral_discretization: int):
+ self.kernel_object = kernel_object
+ self.kernel = kernel_object.kernel
+ self.integral_discretization = integral_discretization
+
+ def load_data(self, data: List):
+ # only works with 2d data!
+ observations = []
+ self.areas = []
+ dts = []
+ a_xs = []
+ a_ys = []
+ b_xs = []
+ b_ys = []
+
+ for A, x, dt in data:
+ observations.append(x)
+ a_xs.append(A.bounds[0][0])
+ b_xs.append(A.bounds[0][1])
+ a_ys.append(A.bounds[1][0])
+ b_ys.append(A.bounds[1][1])
+ dts.append(dt)
+ self.areas.append((A, dt))
+
+ self.observations = torch.cat(observations, dim=0)
+ self.dt = torch.tensor(dts, dtype=torch.float64)
+ self.a_x = torch.tensor(a_xs)
+ self.a_y = torch.tensor(a_ys)
+ self.b_x = torch.tensor(b_xs)
+ self.b_y = torch.tensor(b_ys)
+
+ def fit(self):
+ # Get the map by representer theorem
+ k_func = partial(self.kernel, b=self.observations)
+ k_int = self.kernel_object.integral(self.a_x, self.a_y, self.b_x, self.b_y)
+ k_obs = torch.cat(
+ (
+ k_func(a=self.observations),
+ self.dt.unsqueeze(1) * k_int(self.observations),
+ )
+ )
+
+ k_weights = []
+ k_nodes = []
+ k_factors = []
+ for A, dt in self.areas:
+ weights, nodes = A.return_legendre_discretization(
+ self.integral_discretization
+ )
+ nodes = nodes.to(device)
+ weights = weights.to(device)
+ k_n = torch.cat((k_func(a=nodes), self.dt.unsqueeze(1) * k_int(nodes)))
+ k_weights.append(weights)
+ k_nodes.append(k_n)
+ k_factors.append(dt)
+
+ k_int_int = []
+ for A, dt in self.areas:
+ weights, nodes = A.return_legendre_discretization(
+ self.integral_discretization
+ )
+ nodes = nodes.to(device)
+ weights = weights.to(device)
+ integral = dt * torch.sum(
+ weights * self.dt.unsqueeze(1) * k_int(nodes), dim=1
+ ) # sum over nodes
+ k_int_int.append(integral)
+
+ k_int_int = torch.stack(k_int_int)
+ k_obs_obs = k_func(a=self.observations)
+ k_int_obs = self.dt.unsqueeze(1) * k_int(
+ self.observations
+ ) # number of observations is columns
+ k_obs_int = k_int_obs.T
+
+ # Create one big kernel matrix out of the above four matrices
+ k_top = torch.cat((k_obs_obs, k_obs_int), dim=1)
+ k_bottom = torch.cat((k_int_obs, k_int_int), dim=1)
+ k_big = torch.cat((k_top, k_bottom), dim=0)
+
+ # Check if k_big is above zero everywhere
+ assert torch.all(k_big >= 0), "Kernel matrix should be strictly positive"
+
+ # Check if k_big is approximately symmetric
+ assert torch.allclose(
+ k_big, k_big.T, atol=1e-4
+ ), "Kernel matrix should be approximately symmetric"
+
+ def objective(alpha):
+ lkl_term_1 = (alpha @ k_obs).sum() # Should be a single number now
+ lkl_term_2 = torch.sum(
+ torch.stack(
+ [
+ dt * torch.sum(w * torch.exp(alpha @ kn))
+ for w, kn, dt in zip(k_weights, k_nodes, k_factors)
+ ]
+ )
+ )
+
+ regularizer = alpha.T @ k_big @ alpha
+ return -lkl_term_1 + lkl_term_2 + regularizer * 0.5
+
+ alpha_0 = torch.zeros([len(self.observations) + len(self.a_x)])
+ res = minimize(
+ objective,
+ alpha_0.cpu().numpy(),
+ backend="torch",
+ method="L-BFGS-B",
+ precision="float64",
+ tol=1e-8,
+ torch_device=str(device),
+ options={
+ "ftol": 1e-08,
+ "gtol": 1e-08,
+ "eps": 1e-08,
+ "maxfun": 15000,
+ "maxiter": 15000,
+ "maxls": 20,
+ },
+ )
+ print(f"optimum found")
+
+ self.alpha_opt = torch.tensor(res.x)
+
+ def intensity(x: torch.tensor, dt=1):
+ k_obs = torch.cat((k_func(x), self.dt.unsqueeze(1) * k_int(x)))
+ return dt * torch.exp(torch.tensor(res.x) @ k_obs).unsqueeze(1)
+
+ self.rate_value = intensity
+
+ return intensity
+
+ def get_gamma_MAP(self, n, x, a, dt, lr=0.01, max_it=10000, eps=1e-6):
+ mean = 0
+ cov_Y = self.kernel(x, x)
+ Q = sqrt(cov_Y)
+ self.Q = Q
+
+ def f(arg):
+ y = arg @ Q + mean
+ return (-0.5) * arg.pow(2).sum() + (y * n - torch.exp(y) * a * dt).sum()
+
+ gamma = torch.zeros(len(x), dtype=torch.float64, requires_grad=True)
+ optimizer = torch.optim.SGD([gamma], lr=lr)
+
+ # Use tqdm to show progress
+ prev_loss = float("inf")
+ for _ in tqdm(range(max_it), desc="Optimizing gamma"):
+ optimizer.zero_grad()
+ loss = -f(gamma) # we minimize -f because we want to maximize f
+ # if loss.item() > prev_loss:
+ # print("Warning: Loss did not decrease")
+ prev_loss = loss.item()
+ loss.backward()
+ # If gradient is smaller than eps, return
+ if torch.norm(gamma.grad) < eps:
+ print("Solved to eps")
+ break
+ optimizer.step()
+
+ assert f(gamma) > f(
+ torch.distributions.MultivariateNormal(
+ loc=gamma, covariance_matrix=torch.eye(len(gamma)) * 50
+ ).sample()
+ )
+
+ return gamma.detach()
+
+ def sample_mala(self, n, x, a, dt, h, num_steps, burn_in_steps, initial_gamma=None):
+ # param n is 1d tensor with the counts of points in the cells
+ # param x is the discretization of the area we're interested in
+ # param a is either a 2d tensor with the areas of the discretization
+ # or a float that gives all areas
+ # step size h
+ gamma = self.get_MAP() if initial_gamma is None else initial_gamma
+ mean = 0 # prior mean I think?
+ cov_Y = self.kernel(x, x)
+ Q = sqrt(cov_Y)
+ self.Q = Q
+ accept_prob_sum = 0
+
+ # The log posterior over gamma given the data
+ def log_f(arg):
+ y = arg @ Q + mean
+ return (-0.5) * arg.pow(2).sum() + (y * n - torch.exp(y) * a * dt).sum()
+
+ base_line = log_f(gamma)
+
+ def f(arg):
+ return log_f(arg) # - 2 * base_line
+
+ # Gradient of the energy
+ def grad(arg):
+ y = arg @ Q + mean
+ return -arg + (n - torch.exp(y) * a * dt) @ Q.T
+
+ # mean of the proposal distribution, named \xi in paper
+ def r_mean_given_arg(arg):
+ return arg + (h / 2.0) * grad(arg)
+
+ for i in range(num_steps):
+ # Proposal
+ proposal = torch.distributions.MultivariateNormal(
+ loc=r_mean_given_arg(gamma),
+ covariance_matrix=h * torch.eye(len(gamma), dtype=torch.float64),
+ ).sample()
+
+ accept_prob = torch.exp(
+ f(proposal)
+ - (gamma - r_mean_given_arg(proposal)).pow(2).sum() / (2 * h)
+ ) / (
+ torch.exp(
+ f(gamma)
+ - (proposal - r_mean_given_arg(gamma)).pow(2).sum() / (2 * h)
+ )
+ )
+
+ if np.random.rand() < accept_prob:
+ gamma = proposal
+
+ accept_prob_sum += min(accept_prob.item(), 1.0)
+
+ if i > burn_in_steps:
+ yield torch.exp(gamma @ Q + mean)
+
+ mean_accept_prob = accept_prob_sum / num_steps
+ print(mean_accept_prob)
diff --git a/stpy/point_processes/log_link_rate_estimator.py b/stpy/point_processes/log_link_rate_estimator.py
index 006470c..9ee1086 100644
--- a/stpy/point_processes/log_link_rate_estimator.py
+++ b/stpy/point_processes/log_link_rate_estimator.py
@@ -3,215 +3,267 @@
import torch
from scipy.optimize import minimize
-from stpy.point_processes.poisson.link_fun_rate_estimator import PermanentalProcessRateEstimator
+from stpy.point_processes.poisson.link_fun_rate_estimator import (
+ PermanentalProcessRateEstimator,
+)
class LogGaussProcessRateEstimator(PermanentalProcessRateEstimator):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.discretization = 64
-
- self.nodes = None
- self.weights = None
-
- def load_data(self, data):
- super().load_data(data)
-
- if len(data) > 1:
- weights_arr = []
- nodes_arr = []
- for sample in data:
- (S, obs, dt) = sample
- weights, nodes = S.return_legendre_discretization(self.discretization)
- nodes_arr.append(nodes)
- weights_arr.append(weights * dt)
-
- self.nodes = self.packing.embed(torch.cat(nodes_arr))
- self.weights = torch.cat(weights_arr)
-
- def add_data_point(self, new_data):
- super().add_data_point(new_data)
-
- S, obs, dt = new_data
- weights, nodes = S.return_legendre_discretization(self.discretization)
-
- if self.nodes is None:
- self.nodes = self.packing.embed(nodes)
- self.weights = weights * dt
- else:
- self.nodes = torch.cat((self.nodes, self.packing.embed(nodes)))
- self.weights = torch.cat((self.weights, weights * dt))
-
- def sample(self, verbose=False, steps=100, stepsize=None):
-
- sigmoid_der_1 = lambda x: torch.exp(-x) / (torch.exp(-x) + 1) ** 2
-
- if self.data is None:
- self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1)
- return None
-
- if self.observations is not None:
- weights = self.weights
- nodes = self.nodes
-
- nabla = lambda theta: -torch.sum(
- torch.diag(
- sigmoid_der_1(self.observations @ theta).view(-1) / self.sigmoid(self.observations @ theta).view(
- -1)) @ self.observations, dim=0).view(-1, 1) \
- + self.B * torch.sum(
- torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1)) @ nodes, dim=0).view(-1,
- 1) + self.s * theta.view(
- -1, 1)
- else:
- weights = self.weights
- nodes = self.nodes
- nabla = lambda theta: self.B * torch.sum(
- torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1)) @ nodes, dim=0).view(-1,
- 1) + self.s * theta.view(
- -1, 1)
-
- # theta = self.rate.view(-1, 1)*np.nan
-
- # while torch.sum(torch.isnan(theta))>0:
-
- theta = self.rate.view(-1, 1)
- for k in range(steps):
-
- W = self.construct_covariance_matrix_laplace(theta.view(-1))
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-4))
- eta = 0.5 / (L + 1)
- print(eta)
- s = torch.randn(size=(self.get_m(), 1)).double()
- theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * s
-
- if verbose == True:
- print("Iter:", k, theta.T)
-
- self.sampled_theta = theta
- return None
-
- def sample_value(self, S):
- """
- Given a pre-sampled value evaluate certain portions of the domain S
- :param S:
- :return:
- """
- weights, nodes = S.return_legendre_discretization(64)
- Phi = self.packing.embed(nodes)
- map_vals = torch.sum(weights * self.B * self.sigmoid(Phi @ self.sampled_theta))
- return map_vals
-
- def sample_path(self, S, n=128):
- xtest = S.return_discretization(n)
- return self.sigmoid(self.packing.embed(xtest) @ self.sampled_theta) * self.B
-
- def penalized_likelihood(self, threads=4):
- sigmoid = lambda x: 1. / (1. + np.exp(-x))
- weights = self.weights.numpy()
- nodes = self.nodes.numpy()
- # times = self.times.numpy()
- # times = self.times.numpy()
-
- if self.observations is not None:
- observations = self.observations.numpy()
- # loss = lambda theta: float(-np.sum(np.log(self.B * sigmoid(observations @ theta))) \
- # + self.B * np.einsum('i,i',(weights ,sigmoid(nodes @ theta))) + self.s * np.sum(theta ** 2))
- loss = lambda theta: float(-np.sum(np.log(self.B * sigmoid(observations @ theta))) \
- + self.B * np.sum(
- weights * sigmoid(nodes @ theta).reshape(-1)) + 0.5 * self.s * np.sum(theta ** 2))
-
- else:
- loss = lambda theta: float(
- +self.B * np.sum(weights * sigmoid(theta @ nodes.T)) + self.s * np.sum(theta ** 2))
-
- theta = np.zeros(self.get_m())
- res = minimize(loss, theta, jac=None, method='L-BFGS-B', options={'maxcor': 20, 'iprint': -1,
- 'maxfun': 150000, 'maxls': 50, 'ftol': 1e-12,
- 'eps': 1e-12, 'gtol': 1e-8})
-
- self.rate = torch.from_numpy(res.x)
-
- return self.rate
-
- def construct_covariance_matrix_laplace(self, theta=None):
- sigmoid_der_1 = lambda x: np.exp(-x) / (np.exp(-x) + 1) ** 2
- sigmoid_der_2 = lambda x: 2 * np.exp(-2 * x) / (np.exp(-x) + 1) ** 3 - np.exp(-x) / (np.exp(-x) + 1) ** 2
- sigmoid = lambda x: 1. / (1. + np.exp(-x))
-
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
-
- if self.feedback == "count-record":
- if self.observations is not None:
- if theta is None:
- input = (self.observations @ self.rate).view(-1)
- else:
- input = (self.observations @ theta).view(-1)
- scales = (sigmoid_der_1(input) ** 2 + sigmoid_der_2(input) * sigmoid(input)) / (sigmoid(input) ** 2)
- W = torch.einsum('ij,i,ik->jk', self.observations, scales, self.observations)
-
- if self.nodes is not None:
- if theta is None:
- scales = self.B * sigmoid_der_2(self.nodes @ self.rate) * self.weights
- else:
- scales = self.B * sigmoid_der_2(self.nodes @ theta) * self.weights
- Z = torch.einsum('ij,i,ik->jk', self.nodes, scales, self.nodes)
- W = W + Z
-
- else:
- raise AssertionError("Not implemented.")
- return W + torch.eye(self.get_m()).double() * self.s
-
- def mean_var_laplace_set(self, S, dt, beta=2.):
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix_laplace()
- self.approx_fit = True
- self.W_inv_approx = torch.pinverse(self.W)
- return self.mean_std_per_action(S, self.W, dt, beta)
-
- def mean_std_per_action(self, S, W, dt, beta):
- weights, nodes = S.return_legendre_discretization(64)
- Phi = self.packing.embed(nodes)
- vars = torch.einsum('ij,jk,ki->i', Phi, self.W_inv_approx, Phi.T)
-
- vars = (vars + np.abs(vars)) / 2
- map_vals = weights * self.B * self.sigmoid(Phi @ self.rate)
- lcb_vals = weights * self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars))
- ucb_vals = weights * self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars))
-
- return dt * torch.sum(map_vals), dt * torch.sum(ucb_vals), torch, sum(lcb_vals) * dt
-
- def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01):
- """
- Calculate exact confidence using laplace approximation on a whole set domain
- :param S: set
- :param n: discretization
- :param beta: beta
- :return:
- """
-
- xtest = S.return_discretization(n)
- if self.data is None:
- return self.b + 0 * xtest[:, 0].view(-1, 1), self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:,
- 0].view(-1,
- 1)
-
- Phi = self.packing.embed(xtest)
- map = self.B * self.sigmoid(Phi @ self.rate)
-
- if self.uncertainty == "laplace":
- W = self.construct_covariance_matrix_laplace()
- W_inv = torch.pinverse(W)
-
- vars = torch.einsum('ij,jk,ki->i', Phi, W_inv, Phi.T)
- lcb = self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars))
- ucb = self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars))
-
- return map, lcb, ucb
-
- def sigmoid(self, x):
- return 1. / (1. + torch.exp(-x))
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- return self.sigmoid(self.packing.embed(xtest) @ self.rate) * self.B
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.discretization = 64
+
+ self.nodes = None
+ self.weights = None
+
+ def load_data(self, data):
+ super().load_data(data)
+
+ if len(data) > 1:
+ weights_arr = []
+ nodes_arr = []
+ for sample in data:
+ (S, obs, dt) = sample
+ weights, nodes = S.return_legendre_discretization(self.discretization)
+ nodes_arr.append(nodes)
+ weights_arr.append(weights * dt)
+
+ self.nodes = self.packing.embed(torch.cat(nodes_arr))
+ self.weights = torch.cat(weights_arr)
+
+ def add_data_point(self, new_data):
+ super().add_data_point(new_data)
+
+ S, obs, dt = new_data
+ weights, nodes = S.return_legendre_discretization(self.discretization)
+
+ if self.nodes is None:
+ self.nodes = self.packing.embed(nodes)
+ self.weights = weights * dt
+ else:
+ self.nodes = torch.cat((self.nodes, self.packing.embed(nodes)))
+ self.weights = torch.cat((self.weights, weights * dt))
+
+ def sample(self, verbose=False, steps=100, stepsize=None):
+
+ sigmoid_der_1 = lambda x: torch.exp(-x) / (torch.exp(-x) + 1) ** 2
+
+ if self.data is None:
+ self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1)
+ return None
+
+ if self.observations is not None:
+ weights = self.weights
+ nodes = self.nodes
+
+ nabla = (
+ lambda theta: -torch.sum(
+ torch.diag(
+ sigmoid_der_1(self.observations @ theta).view(-1)
+ / self.sigmoid(self.observations @ theta).view(-1)
+ )
+ @ self.observations,
+ dim=0,
+ ).view(-1, 1)
+ + self.B
+ * torch.sum(
+ torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1))
+ @ nodes,
+ dim=0,
+ ).view(-1, 1)
+ + self.s * theta.view(-1, 1)
+ )
+ else:
+ weights = self.weights
+ nodes = self.nodes
+ nabla = lambda theta: self.B * torch.sum(
+ torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1))
+ @ nodes,
+ dim=0,
+ ).view(-1, 1) + self.s * theta.view(-1, 1)
+
+ # theta = self.rate.view(-1, 1)*np.nan
+
+ # while torch.sum(torch.isnan(theta))>0:
+
+ theta = self.rate.view(-1, 1)
+ for k in range(steps):
+
+ W = self.construct_covariance_matrix_laplace(theta.view(-1))
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-4
+ )
+ )
+ eta = 0.5 / (L + 1)
+ print(eta)
+ s = torch.randn(size=(self.get_m(), 1)).double()
+ theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * s
+
+ if verbose == True:
+ print("Iter:", k, theta.T)
+
+ self.sampled_theta = theta
+ return None
+
+ def sample_value(self, S):
+ """
+ Given a pre-sampled value evaluate certain portions of the domain S
+ :param S:
+ :return:
+ """
+ weights, nodes = S.return_legendre_discretization(64)
+ Phi = self.packing.embed(nodes)
+ map_vals = torch.sum(weights * self.B * self.sigmoid(Phi @ self.sampled_theta))
+ return map_vals
+
+ def sample_path(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return self.sigmoid(self.packing.embed(xtest) @ self.sampled_theta) * self.B
+
+ def penalized_likelihood(self, threads=4):
+ sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
+ weights = self.weights.numpy()
+ nodes = self.nodes.numpy()
+ # times = self.times.numpy()
+ # times = self.times.numpy()
+
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ # loss = lambda theta: float(-np.sum(np.log(self.B * sigmoid(observations @ theta))) \
+ # + self.B * np.einsum('i,i',(weights ,sigmoid(nodes @ theta))) + self.s * np.sum(theta ** 2))
+ loss = lambda theta: float(
+ -np.sum(np.log(self.B * sigmoid(observations @ theta)))
+ + self.B * np.sum(weights * sigmoid(nodes @ theta).reshape(-1))
+ + 0.5 * self.s * np.sum(theta**2)
+ )
+
+ else:
+ loss = lambda theta: float(
+ +self.B * np.sum(weights * sigmoid(theta @ nodes.T))
+ + self.s * np.sum(theta**2)
+ )
+
+ theta = np.zeros(self.get_m())
+ res = minimize(
+ loss,
+ theta,
+ jac=None,
+ method="L-BFGS-B",
+ options={
+ "maxcor": 20,
+ "iprint": -1,
+ "maxfun": 150000,
+ "maxls": 50,
+ "ftol": 1e-12,
+ "eps": 1e-12,
+ "gtol": 1e-8,
+ },
+ )
+
+ self.rate = torch.from_numpy(res.x)
+
+ return self.rate
+
+ def construct_covariance_matrix_laplace(self, theta=None):
+ sigmoid_der_1 = lambda x: np.exp(-x) / (np.exp(-x) + 1) ** 2
+ sigmoid_der_2 = (
+ lambda x: 2 * np.exp(-2 * x) / (np.exp(-x) + 1) ** 3
+ - np.exp(-x) / (np.exp(-x) + 1) ** 2
+ )
+ sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
+
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+
+ if self.feedback == "count-record":
+ if self.observations is not None:
+ if theta is None:
+ input = (self.observations @ self.rate).view(-1)
+ else:
+ input = (self.observations @ theta).view(-1)
+ scales = (
+ sigmoid_der_1(input) ** 2 + sigmoid_der_2(input) * sigmoid(input)
+ ) / (sigmoid(input) ** 2)
+ W = torch.einsum(
+ "ij,i,ik->jk", self.observations, scales, self.observations
+ )
+
+ if self.nodes is not None:
+ if theta is None:
+ scales = (
+ self.B * sigmoid_der_2(self.nodes @ self.rate) * self.weights
+ )
+ else:
+ scales = self.B * sigmoid_der_2(self.nodes @ theta) * self.weights
+ Z = torch.einsum("ij,i,ik->jk", self.nodes, scales, self.nodes)
+ W = W + Z
+
+ else:
+ raise AssertionError("Not implemented.")
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def mean_var_laplace_set(self, S, dt, beta=2.0):
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix_laplace()
+ self.approx_fit = True
+ self.W_inv_approx = torch.pinverse(self.W)
+ return self.mean_std_per_action(S, self.W, dt, beta)
+
+ def mean_std_per_action(self, S, W, dt, beta):
+ weights, nodes = S.return_legendre_discretization(64)
+ Phi = self.packing.embed(nodes)
+ vars = torch.einsum("ij,jk,ki->i", Phi, self.W_inv_approx, Phi.T)
+
+ vars = (vars + np.abs(vars)) / 2
+ map_vals = weights * self.B * self.sigmoid(Phi @ self.rate)
+ lcb_vals = (
+ weights * self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars))
+ )
+ ucb_vals = (
+ weights * self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars))
+ )
+
+ return (
+ dt * torch.sum(map_vals),
+ dt * torch.sum(ucb_vals),
+ torch,
+ sum(lcb_vals) * dt,
+ )
+
+ def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01):
+ """
+ Calculate exact confidence using laplace approximation on a whole set domain
+ :param S: set
+ :param n: discretization
+ :param beta: beta
+ :return:
+ """
+
+ xtest = S.return_discretization(n)
+ if self.data is None:
+ return (
+ self.min_intensity + 0 * xtest[:, 0].view(-1, 1),
+ self.min_intensity + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, 1),
+ )
+
+ Phi = self.packing.embed(xtest)
+ map = self.B * self.sigmoid(Phi @ self.rate)
+
+ if self.uncertainty == "laplace":
+ W = self.construct_covariance_matrix_laplace()
+ W_inv = torch.pinverse(W)
+
+ vars = torch.einsum("ij,jk,ki->i", Phi, W_inv, Phi.T)
+ lcb = self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars))
+ ucb = self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars))
+
+ return map, lcb, ucb
+
+ def sigmoid(self, x):
+ return 1.0 / (1.0 + torch.exp(-x))
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return self.sigmoid(self.packing.embed(xtest) @ self.rate) * self.B
diff --git a/stpy/point_processes/loglinear_estimator.py b/stpy/point_processes/loglinear_estimator.py
index 221880a..179dc79 100644
--- a/stpy/point_processes/loglinear_estimator.py
+++ b/stpy/point_processes/loglinear_estimator.py
@@ -8,179 +8,207 @@
import matplotlib.pyplot as plt
from stpy.embeddings.embedding import HermiteEmbedding
import scipy.integrate as integrate
-from stpy.helpers.ellipsoid_algorithms import maximize_quadratic_on_ellipse, minimize_quadratic_on_ellipse
+from stpy.helpers.ellipsoid_algorithms import (
+ maximize_quadratic_on_ellipse,
+ minimize_quadratic_on_ellipse,
+)
from stpy.point_processes.poisson import PoissonPointProcess
from stpy.point_processes.poisson_rate_estimator import PositiveRateEstimator
from stpy.borel_set import BorelSet, HierarchicalBorelSets
from stpy.kernels import KernelFunction
-class LogLinearRateEstimator(PositiveRateEstimator):
-
- def __init__(self,*args,**kwargs):
- super().__init__(*args,**kwargs)
-
- def least_squares_weighted(self, threads=0):
- theta = cp.Variable(self.get_m())
-
- mask = self.bucketized_counts.clone().numpy() > 0
-
- observations = self.total_bucketized_obs[mask].clone().numpy()
- phis = self.varphis[mask, :].clone().numpy()
- tau = self.total_bucketized_time.clone().numpy()
-
- variances = self.variances.view(-1).clone().numpy()
-
- for i in range(variances.shape[0]):
- if mask[i] > 0:
- variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i])
-
- selected_variances = variances[mask]
- print (np.log(observations))
- print (selected_variances)
- objective = cp.Minimize(
- cp.sum_squares((phis @ theta) - np.log(observations)/tau[mask]) )#+ self.s * cp.norm2(theta))
-
- prob = cp.Problem(objective)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=True,
- mosek_params={mosek.iparam.num_threads: threads})
-
- self.rate = torch.from_numpy(theta.value)
- print (self.rate)
- return self.rate
-
- def mean_var_reg_set(self, S, dt=1., beta=2.):
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix_regression()
- self.approx_fit = True
-
- map = 0
- lcb = 0
- ucb = 0
- for set in self.basic_sets:
- if S.inside(set):
- x = self.packing.integral(set).view(-1,1)
- lcb = lcb +torch.exp(dt*(x@self.rate - beta*np.sqrt(x.T@self.W_inv@x) ))
- ucb = ucb + torch.exp(dt*(x@self.rate + beta*np.sqrt(x.T@self.W_inv@x)))
- map = map + torch.exp(dt*x@self.rate)
- return map,ucb, lcb
-
- def fit_ellipsoid_approx(self):
- self.W =self.construct_covariance_matrix_regression()
- self.W_inv = torch.pinverse(self.W)
-
- # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.):
- # phi = self.packing.integral(S) * dt
- # map = phi @ self.rate
- # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
- # ucb = np.minimum(ucb, self.B * S.volume() * dt)
- #
- # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
- # lcb = np.maximum(lcb, self.b * S.volume() * dt)
- # return map, lcb, ucb
-
-
- def construct_covariance_matrix_regression(self):
-
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
-
- if self.data is not None:
- variances = self.variances
-
- if self.feedback == "count-record":
- mask = self.bucketized_counts > 0
- tau = self.total_bucketized_time
- for index_o, o in enumerate(self.bucketized_obs):
- n = mask[index_o]
- if n > 0:
- A = self.varphis[index_o, :].view(-1, 1) @ self.varphis[index_o, :].view(1, -1) * tau[index_o]
- W = W + A / (variances[index_o])
-
- elif self.feedback == "histogram":
-
- for datapoint in self.data:
- (S, obs, dt) = datapoint
- varphi = self.packing.integral(S) * dt
- variance = varphi@self.rate
- variance = variance
- A = varphi.view(-1, 1) @ varphi.view(1, -1)
- W = W + A / variance
-
- return W + torch.eye(self.get_m()).double() * self.s
+class LogLinearRateEstimator(PositiveRateEstimator):
- def mean_set(self, S, dt=1.):
- mu = 0
- for set in self.basic_sets:
- if S.inside(set):
- mu = mu + torch.exp(dt*self.packing.integral(set)@self.rate)
- return mu
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def least_squares_weighted(self, threads=0):
+ theta = cp.Variable(self.get_m())
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ phis = self.varphis[mask, :].clone().numpy()
+ tau = self.total_bucketized_time.clone().numpy()
+
+ variances = self.variances.view(-1).clone().numpy()
+
+ for i in range(variances.shape[0]):
+ if mask[i] > 0:
+ variances[i] = (
+ variances[i]
+ * tau[i]
+ * self.variance_correction(variances[i] * tau[i])
+ )
+
+ selected_variances = variances[mask]
+ print(np.log(observations))
+ print(selected_variances)
+ objective = cp.Minimize(
+ cp.sum_squares((phis @ theta) - np.log(observations) / tau[mask])
+ ) # + self.s * cp.norm2(theta))
+
+ prob = cp.Problem(objective)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=True,
+ mosek_params={mosek.iparam.num_threads: threads},
+ )
+
+ self.rate = torch.from_numpy(theta.value)
+ print(self.rate)
+ return self.rate
+
+ def mean_var_reg_set(self, S, dt=1.0, beta=2.0):
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix_regression()
+ self.approx_fit = True
+
+ map = 0
+ lcb = 0
+ ucb = 0
+ for set in self.basic_sets:
+ if S.inside(set):
+ x = self.packing.integral(set).view(-1, 1)
+ lcb = lcb + torch.exp(
+ dt * (x @ self.rate - beta * np.sqrt(x.T @ self.W_inv @ x))
+ )
+ ucb = ucb + torch.exp(
+ dt * (x @ self.rate + beta * np.sqrt(x.T @ self.W_inv @ x))
+ )
+ map = map + torch.exp(dt * x @ self.rate)
+ return map, ucb, lcb
+
+ def fit_ellipsoid_approx(self):
+ self.W = self.construct_covariance_matrix_regression()
+ self.W_inv = torch.pinverse(self.W)
+
+ # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.):
+ # phi = self.packing.integral(S) * dt
+ # map = phi @ self.rate
+ # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
+ # ucb = np.minimum(ucb, self.B * S.volume() * dt)
+ #
+ # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
+ # lcb = np.maximum(lcb, self.b * S.volume() * dt)
+ # return map, lcb, ucb
+
+ def construct_covariance_matrix_regression(self):
+
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+
+ if self.data is not None:
+ variances = self.variances
+
+ if self.feedback == "count-record":
+ mask = self.bucketized_counts > 0
+ tau = self.total_bucketized_time
+ for index_o, o in enumerate(self.bucketized_obs):
+ n = mask[index_o]
+ if n > 0:
+ A = (
+ self.varphis[index_o, :].view(-1, 1)
+ @ self.varphis[index_o, :].view(1, -1)
+ * tau[index_o]
+ )
+ W = W + A / (variances[index_o])
+
+ elif self.feedback == "histogram":
+
+ for datapoint in self.data:
+ (S, obs, dt) = datapoint
+ varphi = self.packing.integral(S) * dt
+ variance = varphi @ self.rate
+ variance = variance
+ A = varphi.view(-1, 1) @ varphi.view(1, -1)
+ W = W + A / variance
+
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def mean_set(self, S, dt=1.0):
+ mu = 0
+ for set in self.basic_sets:
+ if S.inside(set):
+ mu = mu + torch.exp(dt * self.packing.integral(set) @ self.rate)
+ return mu
if __name__ == "__main__":
- torch.manual_seed(2)
- np.random.seed(2)
- d = 1
- gamma = 0.1
- n = 64
- B = 4.
- b = 0.1
-
- process = PoissonPointProcess(d=1, B=B, b=b)
- Sets = []
- levels = 5
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- Sets = hierarchical_structure.get_all_sets()
-
- D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double())
-
- m = 128
- k = KernelFunction(gamma = gamma)
- estimator = LogLinearRateEstimator(process, hierarchical_structure,
- kernel_object=k, B=B, m=m, d=d, estimator='least-sq')
-
- min_vol, max_vol = estimator.get_min_max()
-
- dt = 1. / (b * min_vol)
- dt = dt * 2
-
- print("Suggested dt:", dt)
- c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)]
-
- no_sets = len(Sets)
- no_samples = 0
- data = []
- samples = []
- repeats = 2
-
- for i in range(no_samples):
- j = np.random.randint(0, no_sets, 1)
- S = Sets[j[0]]
- for _ in range(repeats):
- sample = process.sample_discretized(S, dt)
- samples.append(sample)
- data.append((S, sample, dt))
-
- sample_D = process.sample_discretized(D, dt)
- samples.append(sample_D)
- no_samples = repeats * no_samples + 1
- data.append((D, sample_D, dt))
-
- estimator.load_data(data)
-
- xtest = D.return_discretization(n=n)
-
- # likelihood based
- estimator.fit_gp()
-
- for set in estimator.basic_sets:
- x = np.linspace(set.bounds[0,0],set.bounds[0,1],2)
- val = estimator.mean_set(set)
- plt.plot(x,x*0+float(val),'b-o')
- vol = process.rate_volume(set)
- plt.plot(x, x * 0 + float(vol), '-o',color = 'orange')
- for j in range(no_samples):
- if samples[j] is not None:
- plt.plot(samples[j], samples[j] * 0, 'o', color=c[j])
-
- process.visualize(D, samples=0, n=n, dt=1.)
\ No newline at end of file
+ torch.manual_seed(2)
+ np.random.seed(2)
+ d = 1
+ gamma = 0.1
+ n = 64
+ B = 4.0
+ b = 0.1
+
+ process = PoissonPointProcess(d=1, B=B, b=b)
+ Sets = []
+ levels = 5
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ Sets = hierarchical_structure.get_all_sets()
+
+ D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double())
+
+ m = 128
+ k = KernelFunction(gamma=gamma)
+ estimator = LogLinearRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ estimator="least-sq",
+ )
+
+ min_vol, max_vol = estimator.get_min_max()
+
+ dt = 1.0 / (b * min_vol)
+ dt = dt * 2
+
+ print("Suggested dt:", dt)
+ c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [
+ "k" for i in range(500)
+ ]
+
+ no_sets = len(Sets)
+ no_samples = 0
+ data = []
+ samples = []
+ repeats = 2
+
+ for i in range(no_samples):
+ j = np.random.randint(0, no_sets, 1)
+ S = Sets[j[0]]
+ for _ in range(repeats):
+ sample = process.sample_discretized(S, dt)
+ samples.append(sample)
+ data.append((S, sample, dt))
+
+ sample_D = process.sample_discretized(D, dt)
+ samples.append(sample_D)
+ no_samples = repeats * no_samples + 1
+ data.append((D, sample_D, dt))
+
+ estimator.load_data(data)
+
+ xtest = D.return_discretization(n=n)
+
+ # likelihood based
+ estimator.fit_gp()
+
+ for set in estimator.basic_sets:
+ x = np.linspace(set.bounds[0, 0], set.bounds[0, 1], 2)
+ val = estimator.mean_set(set)
+ plt.plot(x, x * 0 + float(val), "b-o")
+ vol = process.rate_volume(set)
+ plt.plot(x, x * 0 + float(vol), "-o", color="orange")
+ for j in range(no_samples):
+ if samples[j] is not None:
+ plt.plot(samples[j], samples[j] * 0, "o", color=c[j])
+
+ process.visualize(D, samples=0, n=n, dt=1.0)
diff --git a/stpy/point_processes/mbr_positive_estimator.py b/stpy/point_processes/mbr_positive_estimator.py
index ac753c5..ef0d29d 100644
--- a/stpy/point_processes/mbr_positive_estimator.py
+++ b/stpy/point_processes/mbr_positive_estimator.py
@@ -9,357 +9,428 @@
import numpy as np
import mosek
-class MBRPositiveEstimator(PermanentalProcessRateEstimator):
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args,**kwargs)
-
- if self.feedback == "count-record":
- self.varLambdas_vec = torch.zeros( size = (self.varLambdas.size()[0],self.varLambdas.size()[1]*self.varLambdas.size()[2])).double()
- for i in range(self.varLambdas.size()[0]):
- self.varLambdas_vec[i,:] = self.varLambdas[i,:,:].reshape(-1)
-
- self.approx_solver = True
-
- def fit_gp(self, threads=4):
- if self.data is not None:
- super().fit_gp(threads=threads)
- else:
- self.rate = None
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- emb = self.packing.embed(xtest)
- mu = torch.einsum('ij,jk,ik->i',emb,self.rate,emb).view(-1,1)
- return mu
- def rate_value(self, x, dt=1):
- emb = self.packing.embed(x)*dt
- mu = torch.einsum('ij,jk,ik->i',emb,self.rate,emb).view(-1,1)
- return mu
-
- def mean_set(self,S,dt =1.):
- if self.data is not None:
- emb = self.product_integral(S) * dt
- mu = torch.trace(emb@self.rate).view(1,1)
- else:
- mu = self.b*S.volume()
- return mu
-
- def penalized_likelihood(self, threads=4):
- sumLambda = self.sumLambda.numpy()
- Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
-
- if self.observations is not None:
- observations = self.observations.numpy()
- # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro")
- objective = -cp.sum(cp.log(observations @ Theta @ observations.T)) + \
- cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta))
- else:
- objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta))
-
- # if self.get_m() == 2:
- # # use Lorentz-cone special result
- # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )]
- # else:
- # constraints = [Theta >> 0]
- constraints = []
- prob = cp.Problem(cp.Minimize(objective), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form:mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas:1e-3,
- mosek.dparam.intpnt_co_tol_dfeas:1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap:1e-3})
- self.rate = torch.from_numpy(Theta.value)
- return self.rate
-
-
-
- def penalized_likelihood_bins(self, threads=4):
- Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
-
-
- mask = self.bucketized_counts.clone().numpy() > 0
- observations = self.total_bucketized_obs[mask].clone().numpy()
- tau = self.total_bucketized_time[mask].clone().numpy()
- varLambdas_vec = self.varLambdas_vec[mask,:].clone().numpy()
-
-
- objective = -cp.sum(observations @ cp.log(cp.multiply(tau,varLambdas_vec @ cp.vec(Theta)) ) ) + \
- cp.sum(cp.multiply(tau,varLambdas_vec @ cp.vec(Theta))) + self.s * cp.sum_squares(cp.vec(Theta))
-
- constraints = [Theta >> 0]
- prob = cp.Problem(cp.Minimize(objective), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form:mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas:1e-3,
- mosek.dparam.intpnt_co_tol_dfeas:1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap:1e-3})
- self.rate = torch.from_numpy(Theta.value)
- return self.rate
-
-
- def least_squares_weighted(self,threads = 4 ):
+class MBRPositiveEstimator(PermanentalProcessRateEstimator):
- if self.approx_fit == False:
- self.bucketization()
-
- Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
-
- mask = self.bucketized_counts.clone().numpy() > 0
- observations = self.total_bucketized_obs[mask].clone().numpy()
- tau = self.total_bucketized_time.clone().numpy()
-
- # varsumLambdas
- varLambdas_vec = self.varLambdas_vec[mask,:].clone().numpy()
-
- variances = self.variances.view(-1).clone().numpy()
-
- for i in range(variances.shape[0]):
- if mask[i] > 0:
- variances[i] = variances[i] * tau[i]* self.variance_correction(variances[i] * tau[i])
-
- selected_variances = variances[mask]
-
-
- objective = cp.sum_squares( (varLambdas_vec@cp.vec(Theta) +
- - observations)/np.sqrt(selected_variances) )+ self.s*cp.sum_squares(cp.vec(Theta))/2
- constraints = [Theta >> 0]
- prob = cp.Problem(cp.Minimize(objective), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form:mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas:1e-3,
- mosek.dparam.intpnt_co_tol_dfeas:1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap:1e-3})
-
- self.rate = torch.from_numpy(Theta.value)
- return self.rate
-
- def construct_covariance_matrix(self):
- if self.estimator == "bins":
- self.construct_covariance_matrix_bins()
- elif self.estimator =="least-sq":
- self.construct_covariance_matrix_regression()
- else:
- raise NotImplementedError("Covariance not implemented")
-
- def construct_covariance_matrix_regression(self):
- varLambdas = self.varLambdas_vec.clone()
- variances = self.variances
- mask = self.bucketized_counts > 0
- tau = self.total_bucketized_time
- W = torch.zeros(size=(self.get_m()**2, self.get_m()**2)).double()
- I = torch.eye(self.get_m() ** 2).double()
- W_inv = self.s * torch.eye(self.get_m() ** 2).double()
-
- for index_o, o in enumerate(self.bucketized_obs):
- n = mask[index_o]
- if n > 0:
- k = self.variance_correction(tau[index_o] * variances[index_o])
- v = tau[index_o] / (variances[index_o] * k)
-
- vec = varLambdas[index_o, :].view(-1, 1)
- A = vec @ vec.T
- W = W + A * v
- denom = 1. + v*vec.T@W_inv@vec
- W_inv = W_inv @ (I - v* vec@(vec.T@W_inv)/denom )
-
- self.W = W + self.s * torch.eye(self.get_m() ** 2).double()
- self.W_inv = W_inv
- #self.W_cholesky = torch.cholesky(self.W, upper=True)
- return self.W
-
-
- def construct_covariance_matrix_bins(self):
- self.construct_covariance_matrix_regression()
-
- def mean_var_reg_set(self,S, dt=1., beta=2., lcb_compute = False):
-
- if self.data is None:
- return S.volume()*self.b,S.volume()*self.B,S.volume()*self.b
-
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix()
- self.approx_fit = True
-
- map = None
- lcb = None
-
- if self.approx_solver == True:
- ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True)
- if lcb_compute == True:
- lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False)
- else:
- ucb = self.band(S, beta=beta,dt=dt, maximization=True)
- if lcb_compute == True:
- lcb = self.band(S, beta=beta,dt=dt, maximization=False)
-
- return map, ucb, lcb
-
- def mean_var_bins_set(self,S, dt=1., beta=2., lcb_compute = False):
- return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute = lcb_compute)
-
- def band(self, S, beta=2.,dt=1., maximization=True):
- emb = self.product_integral(S) * dt
- A = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
- cost = cp.trace(A @ emb)
- Z = self.W_cholesky.clone()
- zero = np.zeros(self.get_m() ** 2)
- constraints = [cp.SOC(zero.T @ cp.vec(A) + self.s * beta**2, Z @ (cp.vec(A) - cp.vec(self.rate.numpy())))]
- constraints += [A >> 0]
-
- if maximization == True:
- prob = cp.Problem(cp.Maximize(cost), constraints)
- else:
- prob = cp.Problem(cp.Minimize(cost), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: 4,
- mosek.iparam.intpnt_solve_form:mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas:1e-3,
- mosek.dparam.intpnt_co_tol_dfeas:1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap:1e-3})
- ucb = torch.trace(torch.from_numpy(A.value) @ emb)
- return ucb
-
- def band_no_opt(self, S, beta=2.,dt=1., maximization=True):
-
- if self.rate is None:
- if maximization == True:
- return S.volume()*dt*self.B
- else:
- return S.volume() * dt * self.b
- else:
- emb = self.product_integral(S)
- cost = torch.trace(self.rate @ emb)
- if maximization == True:
- out = cost + beta* emb.view(1,-1)@self.W_inv@emb.view(-1,1)
- else:
- out = np.maximum(cost - beta* emb.view(1,-1)@self.W_inv@emb.view(-1,1),0.)
- return out*dt
-
- def gap(self, S, actions, w, dt, beta=2.):
- """
- Estimates the gap of an action S,
- :param S:
- :param dt:
- :return:
- """
-
- if self.data is None:
- return (self.B-self.b)*S.volume()/w(S)
-
- if self.ucb_identified == False:
- print("Recomputing UCB.....")
- self.ucb_identified = True
- self.max_ucb = -1000
- self.ucb_action = None
- for action in actions:
- _, ucb,__ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0))
- ucb = ucb / w(action)
- if ucb > self.max_ucb:
- self.max_ucb = ucb
- self.ucb_action = action
- map, ucb, lcb = self.mean_var_reg_set(S, dt=dt, beta=self.beta(0), lcb_compute = True)
- gap = w(S) * self.max_ucb - lcb
- return gap
-
- def information(self, S, dt, precomputed = None):
-
- if self.data is None:
- return 1.
-
- if self.W is None:
- self.construct_covariance_matrix()
-
- if self.feedback == "count-record":
- varphi_UCB = self.product_integral(self.ucb_action).view(1,-1)*dt
-
- ind = []
- for index, set in enumerate(self.basic_sets):
- if S.inside(set):
- ind.append(index)
- Upsilon = self.varLambdas_vec[ind, :]*dt
-
- I = torch.eye(Upsilon.size()[0]).double()
- G = self.W_inv - self.W_inv@Upsilon.T@torch.inverse(I + Upsilon @ Upsilon.T)@Upsilon@self.W_inv
- return 10e-4 + torch.logdet( varphi_UCB @self.W_inv @ varphi_UCB.T) - torch.logdet( varphi_UCB @ G @ varphi_UCB.T)
-
- elif self.feedback =="histogram":
- raise NotImplementedError("Not implemented.")
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ if self.feedback == "count-record":
+ self.varLambdas_vec = torch.zeros(
+ size=(
+ self.varLambdas.size()[0],
+ self.varLambdas.size()[1] * self.varLambdas.size()[2],
+ )
+ ).double()
+ for i in range(self.varLambdas.size()[0]):
+ self.varLambdas_vec[i, :] = self.varLambdas[i, :, :].reshape(-1)
+
+ self.approx_solver = True
+
+ def fit_gp(self, threads=4):
+ if self.data is not None:
+ super().fit_gp(threads=threads)
+ else:
+ self.rate = None
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ emb = self.packing.embed(xtest)
+ mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1)
+ return mu
+
+ def rate_value(self, x, dt=1):
+ emb = self.packing.embed(x) * dt
+ mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1)
+ return mu
+
+ def mean_set(self, S, dt=1.0):
+ if self.data is not None:
+ emb = self.product_integral(S) * dt
+ mu = torch.trace(emb @ self.rate).view(1, 1)
+ else:
+ mu = self.b * S.volume()
+ return mu
+
+ def penalized_likelihood(self, threads=4):
+ sumLambda = self.sumLambda.numpy()
+ Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro")
+ objective = (
+ -cp.sum(cp.log(observations @ Theta @ observations.T))
+ + cp.trace(sumLambda @ Theta)
+ + self.s * cp.sum_squares(cp.vec(Theta))
+ )
+ else:
+ objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(
+ cp.vec(Theta)
+ )
+
+ # if self.get_m() == 2:
+ # # use Lorentz-cone special result
+ # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )]
+ # else:
+ # constraints = [Theta >> 0]
+ constraints = []
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+ self.rate = torch.from_numpy(Theta.value)
+ return self.rate
+
+ def penalized_likelihood_bins(self, threads=4):
+ Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ tau = self.total_bucketized_time[mask].clone().numpy()
+ varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy()
+
+ objective = (
+ -cp.sum(
+ observations @ cp.log(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta)))
+ )
+ + cp.sum(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta)))
+ + self.s * cp.sum_squares(cp.vec(Theta))
+ )
+
+ constraints = [Theta >> 0]
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+ self.rate = torch.from_numpy(Theta.value)
+ return self.rate
+
+ def least_squares_weighted(self, threads=4):
+
+ if self.approx_fit == False:
+ self.bucketization()
+
+ Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ tau = self.total_bucketized_time.clone().numpy()
+
+ # varsumLambdas
+ varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy()
+
+ variances = self.variances.view(-1).clone().numpy()
+
+ for i in range(variances.shape[0]):
+ if mask[i] > 0:
+ variances[i] = (
+ variances[i]
+ * tau[i]
+ * self.variance_correction(variances[i] * tau[i])
+ )
+
+ selected_variances = variances[mask]
+
+ objective = (
+ cp.sum_squares(
+ (varLambdas_vec @ cp.vec(Theta) + -observations)
+ / np.sqrt(selected_variances)
+ )
+ + self.s * cp.sum_squares(cp.vec(Theta)) / 2
+ )
+ constraints = [Theta >> 0]
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+
+ self.rate = torch.from_numpy(Theta.value)
+ return self.rate
+
+ def construct_covariance_matrix(self):
+ if self.estimator == "bins":
+ self.construct_covariance_matrix_bins()
+ elif self.estimator == "least-sq":
+ self.construct_covariance_matrix_regression()
+ else:
+ raise NotImplementedError("Covariance not implemented")
+
+ def construct_covariance_matrix_regression(self):
+ varLambdas = self.varLambdas_vec.clone()
+ variances = self.variances
+ mask = self.bucketized_counts > 0
+ tau = self.total_bucketized_time
+ W = torch.zeros(size=(self.get_m() ** 2, self.get_m() ** 2)).double()
+ I = torch.eye(self.get_m() ** 2).double()
+ W_inv = self.s * torch.eye(self.get_m() ** 2).double()
+
+ for index_o, o in enumerate(self.bucketized_obs):
+ n = mask[index_o]
+ if n > 0:
+ k = self.variance_correction(tau[index_o] * variances[index_o])
+ v = tau[index_o] / (variances[index_o] * k)
+
+ vec = varLambdas[index_o, :].view(-1, 1)
+ A = vec @ vec.T
+ W = W + A * v
+ denom = 1.0 + v * vec.T @ W_inv @ vec
+ W_inv = W_inv @ (I - v * vec @ (vec.T @ W_inv) / denom)
+
+ self.W = W + self.s * torch.eye(self.get_m() ** 2).double()
+ self.W_inv = W_inv
+ # self.W_cholesky = torch.cholesky(self.W, upper=True)
+ return self.W
+
+ def construct_covariance_matrix_bins(self):
+ self.construct_covariance_matrix_regression()
+
+ def mean_var_reg_set(self, S, dt=1.0, beta=2.0, lcb_compute=False):
+
+ if self.data is None:
+ return S.volume() * self.b, S.volume() * self.B, S.volume() * self.b
+
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix()
+ self.approx_fit = True
+
+ map = None
+ lcb = None
+
+ if self.approx_solver == True:
+ ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True)
+ if lcb_compute == True:
+ lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False)
+ else:
+ ucb = self.band(S, beta=beta, dt=dt, maximization=True)
+ if lcb_compute == True:
+ lcb = self.band(S, beta=beta, dt=dt, maximization=False)
+
+ return map, ucb, lcb
+
+ def mean_var_bins_set(self, S, dt=1.0, beta=2.0, lcb_compute=False):
+ return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute=lcb_compute)
+
+ def band(self, S, beta=2.0, dt=1.0, maximization=True):
+ emb = self.product_integral(S) * dt
+ A = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+ cost = cp.trace(A @ emb)
+ Z = self.W_cholesky.clone()
+ zero = np.zeros(self.get_m() ** 2)
+ constraints = [
+ cp.SOC(
+ zero.T @ cp.vec(A) + self.s * beta**2,
+ Z @ (cp.vec(A) - cp.vec(self.rate.numpy())),
+ )
+ ]
+ constraints += [A >> 0]
+
+ if maximization == True:
+ prob = cp.Problem(cp.Maximize(cost), constraints)
+ else:
+ prob = cp.Problem(cp.Minimize(cost), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: 4,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+ ucb = torch.trace(torch.from_numpy(A.value) @ emb)
+ return ucb
+
+ def band_no_opt(self, S, beta=2.0, dt=1.0, maximization=True):
+
+ if self.rate is None:
+ if maximization == True:
+ return S.volume() * dt * self.B
+ else:
+ return S.volume() * dt * self.b
+ else:
+ emb = self.product_integral(S)
+ cost = torch.trace(self.rate @ emb)
+ if maximization == True:
+ out = cost + beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1)
+ else:
+ out = np.maximum(
+ cost - beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1), 0.0
+ )
+ return out * dt
+
+ def gap(self, S, actions, w, dt, beta=2.0):
+ """
+ Estimates the gap of an action S,
+ :param S:
+ :param dt:
+ :return:
+ """
+
+ if self.data is None:
+ return (self.B - self.b) * S.volume() / w(S)
+
+ if self.ucb_identified == False:
+ print("Recomputing UCB.....")
+ self.ucb_identified = True
+ self.max_ucb = -1000
+ self.ucb_action = None
+ for action in actions:
+ _, ucb, __ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0))
+ ucb = ucb / w(action)
+ if ucb > self.max_ucb:
+ self.max_ucb = ucb
+ self.ucb_action = action
+ map, ucb, lcb = self.mean_var_reg_set(
+ S, dt=dt, beta=self.beta(0), lcb_compute=True
+ )
+ gap = w(S) * self.max_ucb - lcb
+ return gap
+
+ def information(self, S, dt, precomputed=None):
+
+ if self.data is None:
+ return 1.0
+
+ if self.W is None:
+ self.construct_covariance_matrix()
+
+ if self.feedback == "count-record":
+ varphi_UCB = self.product_integral(self.ucb_action).view(1, -1) * dt
+
+ ind = []
+ for index, set in enumerate(self.basic_sets):
+ if S.inside(set):
+ ind.append(index)
+ Upsilon = self.varLambdas_vec[ind, :] * dt
+
+ I = torch.eye(Upsilon.size()[0]).double()
+ G = (
+ self.W_inv
+ - self.W_inv
+ @ Upsilon.T
+ @ torch.inverse(I + Upsilon @ Upsilon.T)
+ @ Upsilon
+ @ self.W_inv
+ )
+ return (
+ 10e-4
+ + torch.logdet(varphi_UCB @ self.W_inv @ varphi_UCB.T)
+ - torch.logdet(varphi_UCB @ G @ varphi_UCB.T)
+ )
+
+ elif self.feedback == "histogram":
+ raise NotImplementedError("Not implemented.")
if __name__ == "__main__":
- torch.manual_seed(2)
- np.random.seed(2)
- d = 1
- gamma = 0.2
- n = 64
- B = 4.
- b = 0.5
-
- process = PoissonPointProcess(d=1, B=B, b=b)
- Sets = []
- levels = 3
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- Sets = hierarchical_structure.get_all_sets()
-
- D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double())
-
- m = 32
- embedding = HermiteEmbedding(m = m, d = 1, gamma = gamma)
- k = KernelFunction(gamma = gamma)
- estimator = MBRPositiveEstimator(process, hierarchical_structure, kernel_object=k,
- B=B, m=m, d=d, embedding=embedding, basis = "custom")
- min_vol, max_vol = estimator.get_min_max()
-
- dt = 10. / (b * min_vol)
- dt = dt * 2
-
- print("Suggested dt:", dt)
- c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)]
-
- no_sets = len(Sets)
- no_samples = 0
- data = []
- samples = []
- repeats = 2
-
- for i in range(no_samples):
- j = np.random.randint(0, no_sets, 1)
- S = Sets[j[0]]
- for _ in range(repeats):
- sample = process.sample_discretized(S, dt)
- samples.append(sample)
- data.append((S, sample, dt))
-
- sample_D = process.sample_discretized(D, dt)
- samples.append(sample_D)
- no_samples = repeats * no_samples + 1
- data.append((D, sample_D, dt))
-
- estimator.load_data(data)
-
- xtest = D.return_discretization(n=n)
-
- # likelihood based
- estimator.penalized_likelihood()
- rate_mean = estimator.mean_rate(D,n = n)
-
- #_, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.)
-
-
- for j in range(no_samples):
- if samples[j] is not None:
- plt.plot(samples[j], samples[j] * 0, 'o', color=c[j])
-
- plt.plot(xtest, rate_mean, label='likelihood - locations known')
- #plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4,
- # color='blue', label='triangle')
- process.visualize(D, samples=0, n=n, dt=1.)
+ torch.manual_seed(2)
+ np.random.seed(2)
+ d = 1
+ gamma = 0.2
+ n = 64
+ B = 4.0
+ b = 0.5
+
+ process = PoissonPointProcess(d=1, B=B, b=b)
+ Sets = []
+ levels = 3
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ Sets = hierarchical_structure.get_all_sets()
+
+ D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double())
+
+ m = 32
+ embedding = HermiteEmbedding(m=m, d=1, gamma=gamma)
+ k = KernelFunction(gamma=gamma)
+ estimator = MBRPositiveEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ embedding=embedding,
+ basis="custom",
+ )
+ min_vol, max_vol = estimator.get_min_max()
+
+ dt = 10.0 / (b * min_vol)
+ dt = dt * 2
+
+ print("Suggested dt:", dt)
+ c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [
+ "k" for i in range(500)
+ ]
+
+ no_sets = len(Sets)
+ no_samples = 0
+ data = []
+ samples = []
+ repeats = 2
+
+ for i in range(no_samples):
+ j = np.random.randint(0, no_sets, 1)
+ S = Sets[j[0]]
+ for _ in range(repeats):
+ sample = process.sample_discretized(S, dt)
+ samples.append(sample)
+ data.append((S, sample, dt))
+
+ sample_D = process.sample_discretized(D, dt)
+ samples.append(sample_D)
+ no_samples = repeats * no_samples + 1
+ data.append((D, sample_D, dt))
+
+ estimator.load_data(data)
+
+ xtest = D.return_discretization(n=n)
+
+ # likelihood based
+ estimator.penalized_likelihood()
+ rate_mean = estimator.mean_rate(D, n=n)
+
+ # _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.)
+
+ for j in range(no_samples):
+ if samples[j] is not None:
+ plt.plot(samples[j], samples[j] * 0, "o", color=c[j])
+
+ plt.plot(xtest, rate_mean, label="likelihood - locations known")
+ # plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4,
+ # color='blue', label='triangle')
+ process.visualize(D, samples=0, n=n, dt=1.0)
diff --git a/stpy/point_processes/poisson.py b/stpy/point_processes/poisson.py
index 6776f2d..8a1ca1f 100644
--- a/stpy/point_processes/poisson.py
+++ b/stpy/point_processes/poisson.py
@@ -4,157 +4,194 @@
from stpy.borel_set import BorelSet
+class PoissonPointProcess:
+ """
+ parametrized by log linear model
+
+ """
+
+ def __init__(self, d=1, B=1, b=0.2, rate=None, rate_volume=None):
+ self.B = B
+ self.d = d
+ self.b = b
+ if rate is None:
+ self.rate = self.rate_default
+ else:
+ self.rate = rate
+
+ self.rate_volume_f = rate_volume
+ self.exact = True
+
+ def rate_default(self, x, dt=1.0):
+ return (
+ self.B
+ * torch.sum(
+ torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1
+ ).view(-1, 1)
+ + self.b
+ ) * dt
+
+ def rate_volume(self, S, dt=1, rate=None):
+ if self.rate_volume_f is None:
+ # integrate rate numerically over S
+ import scipy.integrate as integrate
+
+ if rate is None:
+ rate = self.rate
+ else:
+ rate = rate
+ integral = 0
+ if self.d == 1:
+ # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1))
+ integral, _ = integrate.quad(
+ lambda x: rate(torch.tensor([x]).view(1, 1)).numpy(),
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ )
+ elif self.d == 2:
+ integrand = lambda x, y: rate(
+ torch.tensor([x, y]).view(1, 2).double()
+ ).numpy()
+ integral, _ = integrate.dblquad(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ lambda x: float(S.bounds[1, 0]),
+ lambda x: float(S.bounds[1, 1]),
+ )
+
+ return integral * dt
+ else:
+ return self.rate_volume_f(S) * dt
+
+ def sample_discretized(self, S, dt, n=50):
+ lam = np.maximum(float(self.rate_volume(S, dt)), 0)
+ count = np.random.poisson(lam=lam)
+ if count > 0:
+ x = S.return_discretization(n)
+ r = self.rate(x) * dt
+ r = torch.maximum(r, r * 0)
+ sample = torch.from_numpy(
+ np.random.choice(
+ np.arange(0, x.size()[0], 1),
+ size=count,
+ p=(r / torch.sum(r)).numpy().reshape(-1),
+ )
+ )
+ return x[sample, :]
+ else:
+ return None
+
+ def sample_discretized_direct(self, x, val):
+ lam = 1000
+ count = np.random.poisson(lam=np.maximum(0, lam))
+ if count > 0:
+ val = torch.abs(val)
+ sample = torch.from_numpy(
+ np.random.choice(
+ np.arange(0, x.size()[0], 1),
+ size=count,
+ p=(val / torch.sum(val)).numpy().reshape(-1),
+ )
+ )
+ return x[sample, :]
+ else:
+ return None
+
+ def sample(self, S, dt=1.0, verbose=False, rate=None):
+ """
+
+ :param S: set where it should be sampled
+ :return:
+ """
+ if self.exact == True:
+
+ return self.sample_discretized(S, dt=dt)
+
+ else:
+
+ lam = self.rate_volume(S, dt)
+ n = np.random.poisson(lam=lam)
+ new_sample = []
+ vol = S.volume()
+ size = 0
+
+ alpha = 1.0 / lam
+
+ while size < n:
+ # uniform sample g(s) = 1/vol(S)
+ sample = S.uniform_sample(1)
+
+ t = self.rate(sample) / (alpha * lam)
+ p = np.random.uniform(0, 1)
+ if p < t:
+ new_sample.append(sample.view(1, -1))
+ size = size + 1
+
+ if len(new_sample) > 1:
+ x = torch.cat(new_sample, dim=0)
+ else:
+ return None
+ return x
+
+ def rate_sets(self, Sets, dt=1):
+ res = []
+ for S in Sets:
+ res.append(self.rate_volume(S, dt=dt))
+ return res
+
+ def visualize(self, S, samples=2, n=10, dt=1.0, show=True):
+ xtest = S.return_discretization(n)
+ rate = self.rate(xtest)
+
+ if self.d == 1:
+ plt.plot(xtest, rate, label="rate", lw=3)
+ for i in range(samples):
+
+ x = self.sample(S, dt=dt)
+ if x is not None:
+ n = x.size()[0]
+ plt.plot(x, x * 0, "o", label="sample n=" + str(n))
+
+ elif self.d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu, label="rate")
+ ax.contour(cs, colors="k")
+
+ for i in range(samples):
+ x = self.sample(S, dt=dt)
+ if x is not None:
+ ax.plot(
+ x[:, 0].detach().numpy(),
+ x[:, 1].detach().numpy(),
+ "o",
+ ms=10,
+ alpha=0.5,
+ label="sample",
+ )
+ ax.grid(c="k", ls="-", alpha=0.1)
+ plt.colorbar(cs)
+
+ plt.legend()
+ if show == True:
+ plt.show()
-class PoissonPointProcess():
- """
- parametrized by log linear model
-
- """
- def __init__(self, d = 1, B = 1, b= 0.2, rate = None, rate_volume = None):
- self.B = B
- self.d = d
- self.b = b
- if rate is None:
- self.rate = self.rate_default
- else:
- self.rate = rate
-
-
- self.rate_volume_f = rate_volume
- self.exact = True
-
- def rate_default(self,x, dt = 1.):
- return (self.B*torch.sum(torch.exp(-(x+1))*torch.sin(2*x*np.pi)**2 ,dim =1).view(-1,1)+ self.b) *dt
-
- def rate_volume(self,S, dt = 1, rate = None):
- if self.rate_volume_f is None:
- # integrate rate numerically over S
- import scipy.integrate as integrate
- if rate is None:
- rate = self.rate
- else:
- rate = rate
- integral = 0
- if self.d == 1:
- #integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1))
- integral,_ = integrate.quad(lambda x : rate(torch.Tensor([x]).view(1,1)).numpy(), float(S.bounds[0,0]), float(S.bounds[0,1]) )
- elif self.d ==2:
- integrand = lambda x, y: rate(torch.Tensor([x, y]).view(1, 2).double()).numpy()
- integral, _ = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- lambda x: float(S.bounds[1, 0]), lambda x: float(S.bounds[1, 1]))
-
- return integral*dt
- else:
- return self.rate_volume_f(S)*dt
-
- def sample_discretized(self, S, dt, n = 50):
- lam = np.maximum(float(self.rate_volume(S, dt)),0)
- count = np.random.poisson(lam=lam)
- if count > 0:
- x = S.return_discretization(n)
- r = self.rate(x)*dt
- r = torch.maximum(r,r*0)
- sample = torch.from_numpy(np.random.choice(np.arange(0,x.size()[0],1), size = count, p=(r/torch.sum(r)).numpy().reshape(-1) ))
- return x[sample,:]
- else:
- return None
-
- def sample_discretized_direct(self, x,val):
- lam = 1000
- count = np.random.poisson(lam=np.maximum(0,lam))
- if count > 0:
- val = torch.abs(val)
- sample = torch.from_numpy(np.random.choice(np.arange(0,x.size()[0],1),
- size = count, p=(val/torch.sum(val)).numpy().reshape(-1) ))
- return x[sample,:]
- else:
- return None
-
- def sample(self, S, dt = 1., verbose = False, rate = None):
- """
-
- :param S: set where it should be sampled
- :return:
- """
- if self.exact == True:
-
- return self.sample_discretized(S, dt = dt)
-
- else:
-
- lam = self.rate_volume(S, dt)
- n = np.random.poisson(lam = lam)
- new_sample = []
- vol = S.volume()
- size = 0
-
- alpha = 1./lam
-
- while size1:
- x = torch.cat(new_sample, dim = 0)
- else:
- return None
- return x
-
- def rate_sets(self,Sets, dt = 1):
- res = []
- for S in Sets:
- res.append(self.rate_volume(S,dt=dt))
- return res
-
- def visualize(self,S,samples = 2, n = 10, dt = 1., show = True):
- xtest = S.return_discretization(n)
- rate = self.rate(xtest)
-
- if self.d == 1:
- plt.plot(xtest, rate, label='rate', lw = 3)
- for i in range(samples):
-
- x = self.sample(S, dt= dt)
- if x is not None:
- n = x.size()[0]
- plt.plot(x,x*0,'o', label = 'sample n='+str(n))
-
- elif self.d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu, label = 'rate')
- ax.contour(cs, colors='k')
-
- for i in range(samples):
- x = self.sample(S, dt = dt)
- if x is not None:
- ax.plot(x[:, 0].detach().numpy(), x[:, 1].detach().numpy(), 'o', ms=10, alpha = 0.5, label = 'sample')
- ax.grid(c='k', ls='-', alpha=0.1)
- plt.colorbar(cs)
-
- plt.legend()
- if show == True:
- plt.show()
-
if __name__ == "__main__":
- d = 2
- n = 100
- bounds = torch.Tensor([[-1,1],[-1,1]]).double()
- D = BorelSet(d, bounds)
-
- process = PoissonPointProcess(d = d, B = 2)
- process.visualize(D, samples = 10, n = n, dt = 10)
-
-
+ d = 2
+ n = 100
+ bounds = torch.tensor([[-1, 1], [-1, 1]]).double()
+ D = BorelSet(d, bounds)
+ process = PoissonPointProcess(d=d, B=2)
+ process.visualize(D, samples=10, n=n, dt=10)
diff --git a/stpy/point_processes/poisson/__init__.py b/stpy/point_processes/poisson/__init__.py
index e69de29..32fcc1a 100644
--- a/stpy/point_processes/poisson/__init__.py
+++ b/stpy/point_processes/poisson/__init__.py
@@ -0,0 +1 @@
+from .poisson import PoissonPointProcess
diff --git a/stpy/point_processes/poisson/link_fun_rate_estimator.py b/stpy/point_processes/poisson/link_fun_rate_estimator.py
index d4e50d5..02e46fb 100644
--- a/stpy/point_processes/poisson/link_fun_rate_estimator.py
+++ b/stpy/point_processes/poisson/link_fun_rate_estimator.py
@@ -7,8 +7,14 @@
from stpy.borel_set import BorelSet, HierarchicalBorelSets
from stpy.embeddings.embedding import HermiteEmbedding
-from stpy.helpers.ellipsoid_algorithms import maximize_matrix_quadratic_on_ellipse, minimize_matrix_quadratic_on_ellipse
-from stpy.helpers.ellipsoid_algorithms import maximize_quadratic_on_ellipse, minimize_quadratic_on_ellipse
+from stpy.helpers.ellipsoid_algorithms import (
+ maximize_matrix_quadratic_on_ellipse,
+ minimize_matrix_quadratic_on_ellipse,
+)
+from stpy.helpers.ellipsoid_algorithms import (
+ maximize_quadratic_on_ellipse,
+ minimize_quadratic_on_ellipse,
+)
from stpy.helpers.quadrature_helper import quadvec2
from stpy.kernels import KernelFunction
from stpy.point_processes.poisson import PoissonPointProcess
@@ -17,452 +23,584 @@
## implement loading data
+
class PermanentalProcessRateEstimator(PoissonRateEstimator):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- self.integration = "fixed_quad"
- self.product_integrals = {}
- self.varLambdas = torch.zeros(size=(len(self.basic_sets), self.get_m(), self.get_m())).double()
- self.opt = 'cvxpy'
- if self.feedback == "count-record" and self.estimator == "least-sq":
- print("precomputing-integrals:")
- for index_set, set in enumerate(self.basic_sets):
- print(index_set, "/", len(self.basic_sets))
- self.varLambdas[index_set, :] = self.product_integral(set)
- self.variances[index_set] = set.volume() * self.B
-
- def product_integral(self, S):
-
- if S in self.product_integrals.keys():
- return self.product_integrals[S]
- else:
-
- if "product_integral" in dir(self.packing):
- Psi = self.packing.product_integral(S)
- self.product_integrals[S] = Psi
- return Psi
-
- elif self.integration == "vec_quad":
-
- if S.d == 2:
- # Psi = torch.zeros(size=(self.get_m(), self.get_m())).double()
- F = lambda x: (self.packing.embed(x).view(-1, 1) @ \
- self.packing.embed(x).view(1, -1)).view(-1)
- integrand = lambda x, y: F(torch.Tensor([x, y]).view(1, 2).double()).numpy()
-
- val = quadvec2(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- float(S.bounds[1, 0]), float(S.bounds[1, 1]), limit=10, epsrel=10e-3, epsabs=10e-3,
- quadrature='gk15')
- Psi = torch.from_numpy(val).view((self.get_m(), self.get_m()))
-
- elif self.integration == "fixed_quad":
-
- if S.d == 1:
- weights, nodes = S.return_legendre_discretization(n=128)
- Z = self.packing.embed(nodes)
- M = torch.einsum('ij,ik->ijk', Z, Z)
- Psi = torch.einsum('i,ijk->jk', weights, M)
-
- if S.d == 2:
- weights, nodes = S.return_legendre_discretization(n=50)
- Z = self.packing.embed(nodes)
- M = torch.einsum('ij,ik->ijk', Z, Z)
- Psi = torch.einsum('i,ijk->jk', weights, M)
-
- else:
- Psi = torch.zeros(size=(self.get_m(), self.get_m())).double()
- for i in range(self.get_m()):
- for j in range(self.get_m()):
-
- if S.d == 1:
- F_ij = lambda x: (
- self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[i] *
- self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[
- j]).numpy()
- val, status = integrate.quad(F_ij, float(S.bounds[0, 0]), float(S.bounds[0, 1]))
-
-
- elif S.d == 2:
- F_ij = lambda x: self.packing.embed(x).view(-1)[i] * self.packing.embed(x).view(-1)[j]
- integrand = lambda x, y: F_ij(torch.Tensor([x, y]).view(1, 2).double()).numpy()
- val, status = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- lambda x: float(S.bounds[1, 0]),
- lambda x: float(S.bounds[1, 1]), epsabs=1.49e-03,
- epsrel=1.49e-03)
- else:
- raise NotImplementedError("Integration above d>2 not implemented.")
-
- Psi[i, j] = val
- print(i, j, val)
-
- self.product_integrals[S] = Psi
- return Psi
-
- def get_constraints(self):
- s = self.get_m()
- l = np.full(s, self.b)
- u = np.full(s, self.B)
- Lambda = np.identity(s)
- return (l, Lambda, u)
-
- def cov(self, inverse=False):
- s = self.get_m()
-
- if inverse == False:
- return torch.zeros(size=(s, s)).double()
- else:
- return torch.zeros(size=(s, s)).double(), torch.zeros(size=(s, s)).double()
-
- def sample(self, verbose=False, steps=10, stepsize=None):
-
- if self.data is None:
- self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1)
- return None
-
- if self.observations is not None:
- observations = self.observations.double()
- sumLambda = self.sumLambda.double()
- nabla = lambda theta: -torch.sum(torch.diag(1. / (observations @ theta).view(-1)) @ observations) \
- + (sumLambda.T + sumLambda) @ theta + self.s * theta.view(-1, 1)
- else:
- sumLambda = self.sumLambda.double()
- nabla = lambda theta: (sumLambda.T + sumLambda) @ theta + self.s * theta.view(-1, 1)
-
- theta = self.rate.view(-1, 1)
-
- W = self.construct_covariance_matrix_laplace()
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3))
- eta = 0.5 / (L + 1)
-
- for k in range(steps):
- W = torch.randn(size=(self.get_m(), 1)).double()
- theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W
- if verbose == True:
- print("Iter:", k, theta.T)
-
- self.sampled_theta = theta
- return None
-
- def sample_value(self, S):
- """
- Given a pre-sampled value evaluate certain portions of the domain S
- :param S:
- :return:
- """
- Z = self.product_integral(S)
- map = self.sampled_theta.T @ Z @ self.sampled_theta
- return map
-
- def sample_path(self, S, n=128):
- xtest = S.return_discretization(n)
- return (self.packing.embed(xtest) @ self.sampled_theta) ** 2
-
- def load_data(self, data):
- super().load_data(data, times=False)
- self.sumLambda = torch.zeros(size=(self.get_m(), self.get_m()))
- if len(data) > 1:
- for sample in data:
- (S, obs, dt) = sample
- self.sumLambda += self.product_integral(S) * dt
-
- def add_data_point(self, new_data):
- super().add_data_point(new_data, times=False)
- (S, obs, dt) = new_data
- self.sumLambda += self.product_integral(S) * dt
-
- def penalized_likelihood(self, threads=4):
- sumLambda = self.sumLambda.numpy()
- if self.observations is not None:
- observations = self.observations.numpy()
- loss = lambda theta: float(
- -np.sum(np.log((observations @ theta) ** 2)) + np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum(
- theta ** 2))
- else:
- loss = lambda theta: float(np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum(theta ** 2))
-
- theta = np.random.randn(self.get_m())
- res = minimize(loss, theta, jac=None, method='L-BFGS-B')
- self.rate = torch.from_numpy(res.x)
- return self.rate
-
- def construct_covariance_matrix_laplace(self):
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
-
- if self.feedback == "count-record":
- if self.observations is not None:
- for i in range(self.observations.size()[0]):
- A = self.observations[i, :].view(-1, 1) @ self.observations[i, :].view(1, -1)
- k = np.maximum(torch.dot(self.observations[i, :], self.rate.view(-1)) ** 2, self.b)
- W = W + A / k
- W += 2 * self.sumLambda
- else:
- raise AssertionError("Not implemented.")
- return W + torch.eye(self.get_m()).double() * self.s
-
- def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.):
-
- phi = self.packing.integral(S)
- map = (phi @ self.rate)
-
- ucb = np.maximum((map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2,
- (map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2)
- ucb = np.minimum(ucb, self.B * S.volume() * dt)
- lcb = 0.
-
- return dt * map ** 2, dt * lcb, dt * ucb
-
- def mean_std_per_action(self, S, W, dt, beta):
- Z = self.product_integral(S)
-
- ucb, _ = maximize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
- lcb, _ = minimize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
-
- map = self.rate.T @ Z @ self.rate
-
- return dt * map, dt * ucb, -lcb * dt
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- return (self.packing.embed(xtest) @ self.rate) ** 2
-
- def mean_rate_latent(self, S, n=128):
- xtest = S.return_discretization(n)
- return self.packing.embed(xtest) @ self.rate
-
- def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01):
- xtest = S.return_discretization(n)
- if self.data is None:
- return 0 * xtest[:, 0].view(-1, 1), self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:, 0].view(-1,
- xtest.size()[
- 0])
- self.fit_ellipsoid_approx()
-
- Phi = self.packing.embed(xtest).double()
- map = Phi @ self.rate
- N = Phi.size()[0]
-
- ucb = torch.zeros(size=(N, 1)).double()
- lcb = torch.zeros(size=(N, 1)).double()
-
- for i in range(N):
- x = Phi[i, :].view(-1, 1)
- maximum = np.maximum((map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2,
- (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2)
- ucb[i, 0] = np.minimum(maximum, self.B)
- lcb[i, 0] = 0.
- # lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2
- return map ** 2, lcb, ucb
-
- def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01):
- """
- Calculate exact confidence using laplace approximation on a whole set domain
- :param S: set
- :param n: discretization
- :param beta: beta
- :return:
- """
-
- xtest = S.return_discretization(n)
- if self.data is None:
- return self.b + 0 * xtest[:, 0].view(-1, 1), self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:,
- 0].view(-1,
- 1)
-
- N = xtest.size()[0]
- Phi = self.packing.embed(xtest)
- map = (Phi @ self.rate) ** 2
-
- if self.uncertainty == "laplace":
- W = self.construct_covariance_matrix_laplace()
- ucb = torch.zeros(size=(N, 1)).double()
- lcb = torch.zeros(size=(N, 1)).double()
-
- for i in range(N):
- x = Phi[i, :]
- ucbi, _ = maximize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
- lcbi, _ = minimize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta)
- ucb[i, 0] = ucbi
- lcb[i, 0] = lcbi
-
- return map, lcb, ucb
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.integration = "fixed_quad"
+ self.product_integrals = {}
+ self.varLambdas = torch.zeros(
+ size=(len(self.basic_sets), self.get_m(), self.get_m())
+ ).double()
+ self.opt = "cvxpy"
+ if self.feedback == "count-record" and self.estimator == "least-sq":
+ print("precomputing-integrals:")
+ for index_set, set in enumerate(self.basic_sets):
+ print(index_set, "/", len(self.basic_sets))
+ self.varLambdas[index_set, :] = self.product_integral(set)
+ self.variances[index_set] = set.volume() * self.B
+
+ def product_integral(self, S):
+
+ if S in self.product_integrals.keys():
+ return self.product_integrals[S]
+ else:
+
+ if "product_integral" in dir(self.packing):
+ Psi = self.packing.product_integral(S)
+ self.product_integrals[S] = Psi
+ return Psi
+
+ elif self.integration == "vec_quad":
+
+ if S.d == 2:
+ # Psi = torch.zeros(size=(self.get_m(), self.get_m())).double()
+ F = lambda x: (
+ self.packing.embed(x).view(-1, 1)
+ @ self.packing.embed(x).view(1, -1)
+ ).view(-1)
+ integrand = lambda x, y: F(
+ torch.tensor([x, y]).view(1, 2).double()
+ ).numpy()
+
+ val = quadvec2(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ float(S.bounds[1, 0]),
+ float(S.bounds[1, 1]),
+ limit=10,
+ epsrel=10e-3,
+ epsabs=10e-3,
+ quadrature="gk15",
+ )
+ Psi = torch.from_numpy(val).view((self.get_m(), self.get_m()))
+
+ elif self.integration == "fixed_quad":
+
+ if S.d == 1:
+ weights, nodes = S.return_legendre_discretization(n=128)
+ Z = self.packing.embed(nodes)
+ M = torch.einsum("ij,ik->ijk", Z, Z)
+ Psi = torch.einsum("i,ijk->jk", weights, M)
+
+ if S.d == 2:
+ weights, nodes = S.return_legendre_discretization(n=50)
+ Z = self.packing.embed(nodes)
+ M = torch.einsum("ij,ik->ijk", Z, Z)
+ Psi = torch.einsum("i,ijk->jk", weights, M)
+
+ else:
+ Psi = torch.zeros(size=(self.get_m(), self.get_m())).double()
+ for i in range(self.get_m()):
+ for j in range(self.get_m()):
+
+ if S.d == 1:
+ F_ij = lambda x: (
+ self.packing.embed(
+ torch.from_numpy(np.array(x)).view(1, -1)
+ ).view(-1)[i]
+ * self.packing.embed(
+ torch.from_numpy(np.array(x)).view(1, -1)
+ ).view(-1)[j]
+ ).numpy()
+ val, status = integrate.quad(
+ F_ij, float(S.bounds[0, 0]), float(S.bounds[0, 1])
+ )
+
+ elif S.d == 2:
+ F_ij = (
+ lambda x: self.packing.embed(x).view(-1)[i]
+ * self.packing.embed(x).view(-1)[j]
+ )
+ integrand = lambda x, y: F_ij(
+ torch.tensor([x, y]).view(1, 2).double()
+ ).numpy()
+ val, status = integrate.dblquad(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ lambda x: float(S.bounds[1, 0]),
+ lambda x: float(S.bounds[1, 1]),
+ epsabs=1.49e-03,
+ epsrel=1.49e-03,
+ )
+ else:
+ raise NotImplementedError(
+ "Integration above d>2 not implemented."
+ )
+
+ Psi[i, j] = val
+ print(i, j, val)
+
+ self.product_integrals[S] = Psi
+ return Psi
+
+ def get_constraints(self):
+ s = self.get_m()
+ l = np.full(s, self.min_intensity)
+ u = np.full(s, self.B)
+ Lambda = np.identity(s)
+ return (l, Lambda, u)
+
+ def cov(self, inverse=False):
+ s = self.get_m()
+
+ if inverse == False:
+ return torch.zeros(size=(s, s)).double()
+ else:
+ return torch.zeros(size=(s, s)).double(), torch.zeros(size=(s, s)).double()
+
+ def sample(self, verbose=False, steps=10, stepsize=None):
+
+ if self.data is None:
+ self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1)
+ return None
+
+ if self.observations is not None:
+ observations = self.observations.double()
+ sumLambda = self.sumLambda.double()
+ nabla = (
+ lambda theta: -torch.sum(
+ torch.diag(1.0 / (observations @ theta).view(-1)) @ observations
+ )
+ + (sumLambda.T + sumLambda) @ theta
+ + self.s * theta.view(-1, 1)
+ )
+ else:
+ sumLambda = self.sumLambda.double()
+ nabla = lambda theta: (
+ sumLambda.T + sumLambda
+ ) @ theta + self.s * theta.view(-1, 1)
+
+ theta = self.rate.view(-1, 1)
+
+ W = self.construct_covariance_matrix_laplace()
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3
+ )
+ )
+ eta = 0.5 / (L + 1)
+
+ for k in range(steps):
+ W = torch.randn(size=(self.get_m(), 1)).double()
+ theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W
+ if verbose == True:
+ print("Iter:", k, theta.T)
+
+ self.sampled_theta = theta
+ return None
+
+ def sample_value(self, S):
+ """
+ Given a pre-sampled value evaluate certain portions of the domain S
+ :param S:
+ :return:
+ """
+ Z = self.product_integral(S)
+ map = self.sampled_theta.T @ Z @ self.sampled_theta
+ return map
+
+ def sample_path(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return (self.packing.embed(xtest) @ self.sampled_theta) ** 2
+
+ def load_data(self, data):
+ super().load_data(data, times=False)
+ self.sumLambda = torch.zeros(size=(self.get_m(), self.get_m()))
+ if len(data) > 1:
+ for sample in data:
+ (S, obs, dt) = sample
+ self.sumLambda += self.product_integral(S) * dt
+
+ def add_data_point(self, new_data):
+ super().add_data_point(new_data, times=False)
+ (S, obs, dt) = new_data
+ self.sumLambda += self.product_integral(S) * dt
+
+ def penalized_likelihood(self, threads=4):
+ sumLambda = self.sumLambda.numpy()
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ loss = lambda theta: float(
+ -np.sum(np.log((observations @ theta) ** 2))
+ + np.dot(theta, sumLambda @ theta)
+ + 0.5 * self.s * np.sum(theta**2)
+ )
+ else:
+ loss = lambda theta: float(
+ np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum(theta**2)
+ )
+
+ theta = np.random.randn(self.get_m())
+ res = minimize(loss, theta, jac=None, method="L-BFGS-B")
+ self.rate = torch.from_numpy(res.x)
+ return self.rate
+
+ def construct_covariance_matrix_laplace(self):
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+
+ if self.feedback == "count-record":
+ if self.observations is not None:
+ for i in range(self.observations.size()[0]):
+ A = self.observations[i, :].view(-1, 1) @ self.observations[
+ i, :
+ ].view(1, -1)
+ k = np.maximum(
+ torch.dot(self.observations[i, :], self.rate.view(-1)) ** 2,
+ self.min_intensity,
+ )
+ W = W + A / k
+ W += 2 * self.sumLambda
+ else:
+ raise AssertionError("Not implemented.")
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def map_lcb_ucb_approx_action(self, S, dt=1.0, beta=2.0):
+
+ phi = self.packing.integral(S)
+ map = phi @ self.rate
+
+ ucb = np.maximum(
+ (map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2,
+ (map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2,
+ )
+ ucb = np.minimum(ucb, self.B * S.volume() * dt)
+ lcb = 0.0
+
+ return dt * map**2, dt * lcb, dt * ucb
+
+ def mean_std_per_action(self, S, W, dt, beta):
+ Z = self.product_integral(S)
+
+ ucb, _ = maximize_matrix_quadratic_on_ellipse(
+ Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+ lcb, _ = minimize_matrix_quadratic_on_ellipse(
+ Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+
+ map = self.rate.T @ Z @ self.rate
+
+ return dt * map, dt * ucb, -lcb * dt
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return (self.packing.embed(xtest) @ self.rate) ** 2
+
+ def mean_rate_latent(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return self.packing.embed(xtest) @ self.rate
+
+ def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01):
+ xtest = S.return_discretization(n)
+ if self.data is None:
+ return (
+ 0 * xtest[:, 0].view(-1, 1),
+ self.min_intensity + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, xtest.size()[0]),
+ )
+ self.fit_ellipsoid_approx()
+
+ Phi = self.packing.embed(xtest).double()
+ map = Phi @ self.rate
+ N = Phi.size()[0]
+
+ ucb = torch.zeros(size=(N, 1)).double()
+ lcb = torch.zeros(size=(N, 1)).double()
+
+ for i in range(N):
+ x = Phi[i, :].view(-1, 1)
+ maximum = np.maximum(
+ (map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2,
+ (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2,
+ )
+ ucb[i, 0] = np.minimum(maximum, self.B)
+ lcb[i, 0] = 0.0
+ # lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2
+ return map**2, lcb, ucb
+
+ def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01):
+ """
+ Calculate exact confidence using laplace approximation on a whole set domain
+ :param S: set
+ :param n: discretization
+ :param beta: beta
+ :return:
+ """
+
+ xtest = S.return_discretization(n)
+ if self.data is None:
+ return (
+ self.min_intensity + 0 * xtest[:, 0].view(-1, 1),
+ self.min_intensity + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, 1),
+ )
+
+ N = xtest.size()[0]
+ Phi = self.packing.embed(xtest)
+ map = (Phi @ self.rate) ** 2
+
+ if self.uncertainty == "laplace":
+ W = self.construct_covariance_matrix_laplace()
+ ucb = torch.zeros(size=(N, 1)).double()
+ lcb = torch.zeros(size=(N, 1)).double()
+
+ for i in range(N):
+ x = Phi[i, :]
+ ucbi, _ = maximize_quadratic_on_ellipse(
+ x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+ lcbi, _ = minimize_quadratic_on_ellipse(
+ x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta
+ )
+ ucb[i, 0] = ucbi
+ lcb[i, 0] = lcbi
+
+ return map, lcb, ucb
class LogisticGaussProcessRateEstimator(PermanentalProcessRateEstimator):
- def penalized_likelihood(self, threads=4):
- logistic = lambda x: np.log(1 + np.exp(x))
- weights = self.weights.numpy()
- nodes = self.nodes.numpy()
-
- if self.observations is not None:
- observations = self.observations.numpy()
- loss = lambda theta: float(-np.sum(np.log(logistic(observations @ theta))) + np.sum(
- weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2))
- else:
- loss = lambda theta: float(np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2))
-
- theta = np.random.randn(self.get_m())
- res = minimize(loss, theta, jac=None, method='L-BFGS-B',
- options={'maxcor': 20, 'iprint': -1, 'maxfun': 150000, 'maxls': 50})
- self.rate = torch.from_numpy(res.x)
-
- return self.rate
-
- def logistic(self, x):
- return torch.log(1 + torch.exp(x))
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- return self.logistic(self.packing.embed(xtest) @ self.rate)
+ def penalized_likelihood(self, threads=4):
+ logistic = lambda x: np.log(1 + np.exp(x))
+ weights = self.weights.numpy()
+ nodes = self.nodes.numpy()
+
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ loss = lambda theta: float(
+ -np.sum(np.log(logistic(observations @ theta)))
+ + np.sum(weights * logistic(theta @ nodes.T))
+ + self.s * np.sum(theta**2)
+ )
+ else:
+ loss = lambda theta: float(
+ np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta**2)
+ )
+
+ theta = np.random.randn(self.get_m())
+ res = minimize(
+ loss,
+ theta,
+ jac=None,
+ method="L-BFGS-B",
+ options={"maxcor": 20, "iprint": -1, "maxfun": 150000, "maxls": 50},
+ )
+ self.rate = torch.from_numpy(res.x)
+
+ return self.rate
+
+ def logistic(self, x):
+ return torch.log(1 + torch.exp(x))
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return self.logistic(self.packing.embed(xtest) @ self.rate)
class ExpGaussProcessRateEstimator(PermanentalProcessRateEstimator):
- def penalized_likelihood(self, threads=4):
- weights = self.weights.numpy()
- nodes = self.nodes.numpy()
-
- if self.observations is not None:
- observations = self.observations.numpy()
- loss = lambda theta: float(np.sum(observations @ theta) + np.sum(
- weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2))
- else:
- loss = lambda theta: float(np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2))
-
- theta = np.zeros(self.get_m())
- res = minimize(loss, theta, jac=None, method='L-BFGS-B', options={'maxcor': 20, 'iprint': -1,
- 'maxfun': 150000, 'maxls': 100,
- 'ftol': 1e-12, 'eps': 1e-12, 'gtol': 1e-8})
- self.rate = torch.from_numpy(res.x)
-
- return self.rate
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- return torch.exp(-self.packing.embed(xtest) @ self.rate)
+ def penalized_likelihood(self, threads=4):
+ weights = self.weights.numpy()
+ nodes = self.nodes.numpy()
+
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ loss = lambda theta: float(
+ np.sum(observations @ theta)
+ + np.sum(weights * np.exp(-theta @ nodes.T))
+ + self.s * np.sum(theta**2)
+ )
+ else:
+ loss = lambda theta: float(
+ np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta**2)
+ )
+
+ theta = np.zeros(self.get_m())
+ res = minimize(
+ loss,
+ theta,
+ jac=None,
+ method="L-BFGS-B",
+ options={
+ "maxcor": 20,
+ "iprint": -1,
+ "maxfun": 150000,
+ "maxls": 100,
+ "ftol": 1e-12,
+ "eps": 1e-12,
+ "gtol": 1e-8,
+ },
+ )
+ self.rate = torch.from_numpy(res.x)
+
+ return self.rate
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return torch.exp(-self.packing.embed(xtest) @ self.rate)
if __name__ == "__main__":
- torch.manual_seed(2)
- np.random.seed(2)
- d = 1
- gamma = 0.1
- n = 64
- B = 4.
- b = 0.1
-
- process = PoissonPointProcess(d=1, B=B, b=b)
- Sets = []
- levels = 4
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- Sets = hierarchical_structure.get_all_sets()
-
- D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double())
-
- m = 64
- embedding = HermiteEmbedding(m=m, d=1, gamma=gamma)
- k = KernelFunction(gamma=gamma)
-
- estimator5 = PoissonRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d)
-
- estimator4 = PermanentalProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d)
- # estimator = PermanentalProcessRateEstimator(process, hierarchical_structure,
- # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid")
- # estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
- estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B + 1, m=m, d=d,
- embedding=embedding)
-
- # estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
- estimator2 = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d,
- embedding=embedding)
- # estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
- estimator3 = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d,
- embedding=embedding)
-
- estimators = [estimator, estimator2, estimator3, estimator4, estimator5]
- names = ['sigmoid', 'logistic', 'exp', 'square', 'no-link']
- bands = [True, False, False, False, True]
-
- estimators = [estimator, estimator5, estimator4]
- names = ['sigmoid', 'no-link', 'square']
- bands = [False, False, False]
-
- min_vol, max_vol = estimator.get_min_max()
- dt = 10. / (b * min_vol)
- dt = dt * 2
-
- print("Suggested dt:", dt)
- c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)]
-
- no_sets = len(Sets)
-
- # no_samples = 3
- # data = []
- # samples = []
- # repeats = 2
- #
- # for i in range(no_samples):
- # j = np.random.randint(0, no_sets, 1)
- # S = Sets[j[0]]
- # for _ in range(repeats):
- # sample = process.sample_discretized(S, dt)
- # samples.append(sample)
- # data.append((S, sample, dt))
- #
- # sample_D = process.sample_discretized(D, dt)
- # samples.append(sample_D)
- # no_samples = repeats * no_samples + 1
- # data.append((D, sample_D, dt))
-
- data_single = []
- basic_sets = hierarchical_structure.get_sets_level(levels)
- samples = []
-
- for set in basic_sets:
- sample = process.sample_discretized(set, dt)
- data_single.append((set, sample, dt))
- samples.append(sample)
- data = data_single
-
- # sample_D = torch.cat(samples)
- # data = [(D,sample_D,dt)]
-
- # data2 = []
- # samples = []
- # for set in basic_sets:
- # sample = process.sample_discretized(set,dt*2)
- # data2.append((set,sample,dt*2))
- # samples.append(sample)
- #
- # sample_D_2 = torch.cat(samples)
- # data = [(D, sample_D_2, dt*2)]
- #
- # data = data + data2
-
- for estimator, name, band in zip(estimators, names, bands):
- estimator.load_data(data)
-
- xtest = D.return_discretization(n=n)
-
- # likelihood based
- estimator.fit_gp()
- rate_mean = estimator.mean_rate(D, n=n)
- p = plt.plot(xtest, rate_mean, label='likelihood: ' + name)
-
- if band == True:
- _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.)
- plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4,
- color=p[0].get_color(), label=name)
-
- for j in range(len(samples)):
- if samples[j] is not None:
- plt.plot(samples[j], samples[j] * 0, 'o', color=c[j])
-
- # for action in Sets:
- # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.)
- # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2)
- # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green")
- process.visualize(D, samples=0, n=n, dt=1.)
- plt.show()
+ torch.manual_seed(2)
+ np.random.seed(2)
+ d = 1
+ gamma = 0.1
+ n = 64
+ B = 4.0
+ b = 0.1
+
+ process = PoissonPointProcess(d=1, B=B, b=b)
+ Sets = []
+ levels = 4
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ Sets = hierarchical_structure.get_all_sets()
+
+ D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double())
+
+ m = 64
+ embedding = HermiteEmbedding(m=m, d=1, gamma=gamma)
+ k = KernelFunction(gamma=gamma)
+
+ estimator5 = PoissonRateEstimator(
+ hierarchical_structure,
+ kernel=k,
+ max_intensity=B,
+ basis_size_per_dim=m,
+ d=d,
+ )
+
+ estimator4 = PermanentalProcessRateEstimator(
+ process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d
+ )
+ # estimator = PermanentalProcessRateEstimator(process, hierarchical_structure,
+ # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid")
+ # estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
+ estimator = LogGaussProcessRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B + 1,
+ m=m,
+ d=d,
+ embedding=embedding,
+ )
+
+ # estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
+ estimator2 = LogisticGaussProcessRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ embedding=embedding,
+ )
+ # estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom")
+ estimator3 = ExpGaussProcessRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ embedding=embedding,
+ )
+
+ estimators = [estimator, estimator2, estimator3, estimator4, estimator5]
+ names = ["sigmoid", "logistic", "exp", "square", "no-link"]
+ bands = [True, False, False, False, True]
+
+ estimators = [estimator, estimator5, estimator4]
+ names = ["sigmoid", "no-link", "square"]
+ bands = [False, False, False]
+
+ min_vol, max_vol = estimator.get_min_max()
+ dt = 10.0 / (b * min_vol)
+ dt = dt * 2
+
+ print("Suggested dt:", dt)
+ c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [
+ "k" for i in range(500)
+ ]
+
+ no_sets = len(Sets)
+
+ # no_samples = 3
+ # data = []
+ # samples = []
+ # repeats = 2
+ #
+ # for i in range(no_samples):
+ # j = np.random.randint(0, no_sets, 1)
+ # S = Sets[j[0]]
+ # for _ in range(repeats):
+ # sample = process.sample_discretized(S, dt)
+ # samples.append(sample)
+ # data.append((S, sample, dt))
+ #
+ # sample_D = process.sample_discretized(D, dt)
+ # samples.append(sample_D)
+ # no_samples = repeats * no_samples + 1
+ # data.append((D, sample_D, dt))
+
+ data_single = []
+ basic_sets = hierarchical_structure.get_sets_level(levels)
+ samples = []
+
+ for set in basic_sets:
+ sample = process.sample_discretized(set, dt)
+ data_single.append((set, sample, dt))
+ samples.append(sample)
+ data = data_single
+
+ # sample_D = torch.cat(samples)
+ # data = [(D,sample_D,dt)]
+
+ # data2 = []
+ # samples = []
+ # for set in basic_sets:
+ # sample = process.sample_discretized(set,dt*2)
+ # data2.append((set,sample,dt*2))
+ # samples.append(sample)
+ #
+ # sample_D_2 = torch.cat(samples)
+ # data = [(D, sample_D_2, dt*2)]
+ #
+ # data = data + data2
+
+ for estimator, name, band in zip(estimators, names, bands):
+ estimator.load_data(data)
+
+ xtest = D.return_discretization(n=n)
+
+ # likelihood based
+ estimator.fit_gp()
+ rate_mean = estimator.mean_rate(D, n=n)
+ p = plt.plot(xtest, rate_mean, label="likelihood: " + name)
+
+ if band == True:
+ _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.0)
+ plt.fill_between(
+ xtest.numpy().flatten(),
+ lcb.numpy().flatten(),
+ ucb.numpy().flatten(),
+ alpha=0.4,
+ color=p[0].get_color(),
+ label=name,
+ )
+
+ for j in range(len(samples)):
+ if samples[j] is not None:
+ plt.plot(samples[j], samples[j] * 0, "o", color=c[j])
+
+ # for action in Sets:
+ # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.)
+ # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2)
+ # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green")
+ process.visualize(D, samples=0, n=n, dt=1.0)
+ plt.show()
diff --git a/stpy/point_processes/poisson/loglinear_estimator.py b/stpy/point_processes/poisson/loglinear_estimator.py
index 4956fb4..d9ba3de 100644
--- a/stpy/point_processes/poisson/loglinear_estimator.py
+++ b/stpy/point_processes/poisson/loglinear_estimator.py
@@ -12,169 +12,195 @@
class LogLinearRateEstimator(PoissonRateEstimator):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- def least_squares_weighted(self, threads=0):
- theta = cp.Variable(self.get_m())
-
- mask = self.bucketized_counts.clone().numpy() > 0
-
- observations = self.total_bucketized_obs[mask].clone().numpy()
- phis = self.varphis[mask, :].clone().numpy()
- tau = self.total_bucketized_time.clone().numpy()
-
- variances = self.variances.view(-1).clone().numpy()
-
- for i in range(variances.shape[0]):
- if mask[i] > 0:
- variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i])
-
- selected_variances = variances[mask]
- print(np.log(observations))
- print(selected_variances)
- objective = cp.Minimize(
- cp.sum_squares((phis @ theta) - np.log(observations) / tau[mask])) # + self.s * cp.norm2(theta))
-
- prob = cp.Problem(objective)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=True,
- mosek_params={mosek.iparam.num_threads: threads})
-
- self.rate = torch.from_numpy(theta.value)
- print(self.rate)
- return self.rate
-
- def mean_var_reg_set(self, S, dt=1., beta=2.):
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix_regression()
- self.approx_fit = True
-
- map = 0
- lcb = 0
- ucb = 0
- for set in self.basic_sets:
- if S.inside(set):
- x = self.packing.integral(set).view(-1, 1)
- lcb = lcb + torch.exp(dt * (x @ self.rate - beta * np.sqrt(x.T @ self.W_inv @ x)))
- ucb = ucb + torch.exp(dt * (x @ self.rate + beta * np.sqrt(x.T @ self.W_inv @ x)))
- map = map + torch.exp(dt * x @ self.rate)
- return map, ucb, lcb
-
- def fit_ellipsoid_approx(self):
- self.W = self.construct_covariance_matrix_regression()
- self.W_inv = torch.pinverse(self.W)
-
- # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.):
- # phi = self.packing.integral(S) * dt
- # map = phi @ self.rate
- # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
- # ucb = np.minimum(ucb, self.B * S.volume() * dt)
- #
- # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
- # lcb = np.maximum(lcb, self.b * S.volume() * dt)
- # return map, lcb, ucb
-
- def construct_covariance_matrix_regression(self):
-
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
-
- if self.data is not None:
- variances = self.variances
-
- if self.feedback == "count-record":
- mask = self.bucketized_counts > 0
- tau = self.total_bucketized_time
- for index_o, o in enumerate(self.bucketized_obs):
- n = mask[index_o]
- if n > 0:
- A = self.varphis[index_o, :].view(-1, 1) @ self.varphis[index_o, :].view(1, -1) * tau[index_o]
- W = W + A / (variances[index_o])
-
- elif self.feedback == "histogram":
-
- for datapoint in self.data:
- (S, obs, dt) = datapoint
- varphi = self.packing.integral(S) * dt
- variance = varphi @ self.rate
- variance = variance
- A = varphi.view(-1, 1) @ varphi.view(1, -1)
- W = W + A / variance
-
- return W + torch.eye(self.get_m()).double() * self.s
-
- def mean_set(self, S, dt=1.):
- mu = 0
- for set in self.basic_sets:
- if S.inside(set):
- mu = mu + torch.exp(dt * self.packing.integral(set) @ self.rate)
- return mu
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def least_squares_weighted(self, threads=0):
+ theta = cp.Variable(self.get_m())
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ phis = self.varphis[mask, :].clone().numpy()
+ tau = self.total_bucketized_time.clone().numpy()
+
+ variances = self.variances.view(-1).clone().numpy()
+
+ for i in range(variances.shape[0]):
+ if mask[i] > 0:
+ variances[i] = (
+ variances[i]
+ * tau[i]
+ * self.variance_correction(variances[i] * tau[i])
+ )
+
+ selected_variances = variances[mask]
+ print(np.log(observations))
+ print(selected_variances)
+ objective = cp.Minimize(
+ cp.sum_squares((phis @ theta) - np.log(observations) / tau[mask])
+ ) # + self.s * cp.norm2(theta))
+
+ prob = cp.Problem(objective)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=True,
+ mosek_params={mosek.iparam.num_threads: threads},
+ )
+
+ self.rate = torch.from_numpy(theta.value)
+ print(self.rate)
+ return self.rate
+
+ def mean_var_reg_set(self, S, dt=1.0, beta=2.0):
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix_regression()
+ self.approx_fit = True
+
+ map = 0
+ lcb = 0
+ ucb = 0
+ for set in self.basic_sets:
+ if S.inside(set):
+ x = self.packing.integral(set).view(-1, 1)
+ lcb = lcb + torch.exp(
+ dt * (x @ self.rate - beta * np.sqrt(x.T @ self.W_inv @ x))
+ )
+ ucb = ucb + torch.exp(
+ dt * (x @ self.rate + beta * np.sqrt(x.T @ self.W_inv @ x))
+ )
+ map = map + torch.exp(dt * x @ self.rate)
+ return map, ucb, lcb
+
+ def fit_ellipsoid_approx(self):
+ self.W = self.construct_covariance_matrix_regression()
+ self.W_inv = torch.pinverse(self.W)
+
+ # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.):
+ # phi = self.packing.integral(S) * dt
+ # map = phi @ self.rate
+ # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
+ # ucb = np.minimum(ucb, self.B * S.volume() * dt)
+ #
+ # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
+ # lcb = np.maximum(lcb, self.b * S.volume() * dt)
+ # return map, lcb, ucb
+
+ def construct_covariance_matrix_regression(self):
+
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+
+ if self.data is not None:
+ variances = self.variances
+
+ if self.feedback == "count-record":
+ mask = self.bucketized_counts > 0
+ tau = self.total_bucketized_time
+ for index_o, o in enumerate(self.bucketized_obs):
+ n = mask[index_o]
+ if n > 0:
+ A = (
+ self.varphis[index_o, :].view(-1, 1)
+ @ self.varphis[index_o, :].view(1, -1)
+ * tau[index_o]
+ )
+ W = W + A / (variances[index_o])
+
+ elif self.feedback == "histogram":
+
+ for datapoint in self.data:
+ (S, obs, dt) = datapoint
+ varphi = self.packing.integral(S) * dt
+ variance = varphi @ self.rate
+ variance = variance
+ A = varphi.view(-1, 1) @ varphi.view(1, -1)
+ W = W + A / variance
+
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def mean_set(self, S, dt=1.0):
+ mu = 0
+ for set in self.basic_sets:
+ if S.inside(set):
+ mu = mu + torch.exp(dt * self.packing.integral(set) @ self.rate)
+ return mu
if __name__ == "__main__":
- torch.manual_seed(2)
- np.random.seed(2)
- d = 1
- gamma = 0.1
- n = 64
- B = 4.
- b = 0.1
-
- process = PoissonPointProcess(d=1, B=B, b=b)
- Sets = []
- levels = 5
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- Sets = hierarchical_structure.get_all_sets()
-
- D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double())
-
- m = 128
- k = KernelFunction(gamma=gamma)
- estimator = LogLinearRateEstimator(process, hierarchical_structure,
- kernel_object=k, B=B, m=m, d=d, estimator='least-sq')
-
- min_vol, max_vol = estimator.get_min_max()
-
- dt = 1. / (b * min_vol)
- dt = dt * 2
-
- print("Suggested dt:", dt)
- c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)]
-
- no_sets = len(Sets)
- no_samples = 0
- data = []
- samples = []
- repeats = 2
-
- for i in range(no_samples):
- j = np.random.randint(0, no_sets, 1)
- S = Sets[j[0]]
- for _ in range(repeats):
- sample = process.sample_discretized(S, dt)
- samples.append(sample)
- data.append((S, sample, dt))
-
- sample_D = process.sample_discretized(D, dt)
- samples.append(sample_D)
- no_samples = repeats * no_samples + 1
- data.append((D, sample_D, dt))
-
- estimator.load_data(data)
-
- xtest = D.return_discretization(n=n)
-
- # likelihood based
- estimator.fit_gp()
-
- for set in estimator.basic_sets:
- x = np.linspace(set.bounds[0, 0], set.bounds[0, 1], 2)
- val = estimator.mean_set(set)
- plt.plot(x, x * 0 + float(val), 'b-o')
- vol = process.rate_volume(set)
- plt.plot(x, x * 0 + float(vol), '-o', color='orange')
- for j in range(no_samples):
- if samples[j] is not None:
- plt.plot(samples[j], samples[j] * 0, 'o', color=c[j])
-
- process.visualize(D, samples=0, n=n, dt=1.)
+ torch.manual_seed(2)
+ np.random.seed(2)
+ d = 1
+ gamma = 0.1
+ n = 64
+ B = 4.0
+ b = 0.1
+
+ process = PoissonPointProcess(d=1, B=B, b=b)
+ Sets = []
+ levels = 5
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ Sets = hierarchical_structure.get_all_sets()
+
+ D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double())
+
+ m = 128
+ k = KernelFunction(gamma=gamma)
+ estimator = LogLinearRateEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ estimator="least-sq",
+ )
+
+ min_vol, max_vol = estimator.get_min_max()
+
+ dt = 1.0 / (b * min_vol)
+ dt = dt * 2
+
+ print("Suggested dt:", dt)
+ c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [
+ "k" for i in range(500)
+ ]
+
+ no_sets = len(Sets)
+ no_samples = 0
+ data = []
+ samples = []
+ repeats = 2
+
+ for i in range(no_samples):
+ j = np.random.randint(0, no_sets, 1)
+ S = Sets[j[0]]
+ for _ in range(repeats):
+ sample = process.sample_discretized(S, dt)
+ samples.append(sample)
+ data.append((S, sample, dt))
+
+ sample_D = process.sample_discretized(D, dt)
+ samples.append(sample_D)
+ no_samples = repeats * no_samples + 1
+ data.append((D, sample_D, dt))
+
+ estimator.load_data(data)
+
+ xtest = D.return_discretization(n=n)
+
+ # likelihood based
+ estimator.fit_gp()
+
+ for set in estimator.basic_sets:
+ x = np.linspace(set.bounds[0, 0], set.bounds[0, 1], 2)
+ val = estimator.mean_set(set)
+ plt.plot(x, x * 0 + float(val), "b-o")
+ vol = process.rate_volume(set)
+ plt.plot(x, x * 0 + float(vol), "-o", color="orange")
+ for j in range(no_samples):
+ if samples[j] is not None:
+ plt.plot(samples[j], samples[j] * 0, "o", color=c[j])
+
+ process.visualize(D, samples=0, n=n, dt=1.0)
diff --git a/stpy/point_processes/poisson/mbr_positive_estimator.py b/stpy/point_processes/poisson/mbr_positive_estimator.py
index 5924d22..8993cb7 100644
--- a/stpy/point_processes/poisson/mbr_positive_estimator.py
+++ b/stpy/point_processes/poisson/mbr_positive_estimator.py
@@ -8,355 +8,436 @@
from stpy.embeddings.embedding import HermiteEmbedding
from stpy.kernels import KernelFunction
from stpy.point_processes.poisson import PoissonPointProcess
-from stpy.point_processes.poisson.link_fun_rate_estimator import PermanentalProcessRateEstimator
+from stpy.point_processes.poisson.link_fun_rate_estimator import (
+ PermanentalProcessRateEstimator,
+)
class MBRPositiveEstimator(PermanentalProcessRateEstimator):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- if self.feedback == "count-record":
- self.varLambdas_vec = torch.zeros(
- size=(self.varLambdas.size()[0], self.varLambdas.size()[1] * self.varLambdas.size()[2])).double()
- for i in range(self.varLambdas.size()[0]):
- self.varLambdas_vec[i, :] = self.varLambdas[i, :, :].reshape(-1)
-
- self.approx_solver = True
-
- def fit_gp(self, threads=4):
- if self.data is not None:
- super().fit_gp(threads=threads)
- else:
- self.rate = None
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- emb = self.packing.embed(xtest)
- mu = torch.einsum('ij,jk,ik->i', emb, self.rate, emb).view(-1, 1)
- return mu
-
- def rate_value(self, x, dt=1):
- emb = self.packing.embed(x) * dt
- mu = torch.einsum('ij,jk,ik->i', emb, self.rate, emb).view(-1, 1)
- return mu
-
- def mean_set(self, S, dt=1.):
- if self.data is not None:
- emb = self.product_integral(S) * dt
- mu = torch.trace(emb @ self.rate).view(1, 1)
- else:
- mu = self.b * S.volume()
- return mu
-
- def penalized_likelihood(self, threads=4):
- sumLambda = self.sumLambda.numpy()
- Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
-
- if self.observations is not None:
- observations = self.observations.numpy()
- # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro")
- objective = -cp.sum(cp.log(observations @ Theta @ observations.T)) + \
- cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta))
- else:
- objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta))
-
- # if self.get_m() == 2:
- # # use Lorentz-cone special result
- # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )]
- # else:
- # constraints = [Theta >> 0]
- constraints = []
- prob = cp.Problem(cp.Minimize(objective), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-3})
- self.rate = torch.from_numpy(Theta.value)
- return self.rate
-
- def penalized_likelihood_bins(self, threads=4):
- Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
-
- mask = self.bucketized_counts.clone().numpy() > 0
- observations = self.total_bucketized_obs[mask].clone().numpy()
- tau = self.total_bucketized_time[mask].clone().numpy()
- varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy()
-
- objective = -cp.sum(observations @ cp.log(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta)))) + \
- cp.sum(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta))) + self.s * cp.sum_squares(cp.vec(Theta))
-
- constraints = [Theta >> 0]
- prob = cp.Problem(cp.Minimize(objective), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-3})
- self.rate = torch.from_numpy(Theta.value)
- return self.rate
-
- def least_squares_weighted(self, threads=4):
-
- if self.approx_fit == False:
- self.bucketization()
-
- Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
-
- mask = self.bucketized_counts.clone().numpy() > 0
- observations = self.total_bucketized_obs[mask].clone().numpy()
- tau = self.total_bucketized_time.clone().numpy()
-
- # varsumLambdas
- varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy()
-
- variances = self.variances.view(-1).clone().numpy()
-
- for i in range(variances.shape[0]):
- if mask[i] > 0:
- variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i])
-
- selected_variances = variances[mask]
-
- objective = cp.sum_squares((varLambdas_vec @ cp.vec(Theta) +
- - observations) / np.sqrt(selected_variances)) + self.s * cp.sum_squares(
- cp.vec(Theta)) / 2
- constraints = [Theta >> 0]
- prob = cp.Problem(cp.Minimize(objective), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-3})
-
- self.rate = torch.from_numpy(Theta.value)
- return self.rate
-
- def construct_covariance_matrix(self):
- if self.estimator == "bins":
- self.construct_covariance_matrix_bins()
- elif self.estimator == "least-sq":
- self.construct_covariance_matrix_regression()
- else:
- raise NotImplementedError("Covariance not implemented")
-
- def construct_covariance_matrix_regression(self):
- varLambdas = self.varLambdas_vec.clone()
- variances = self.variances
- mask = self.bucketized_counts > 0
- tau = self.total_bucketized_time
- W = torch.zeros(size=(self.get_m() ** 2, self.get_m() ** 2)).double()
- I = torch.eye(self.get_m() ** 2).double()
- W_inv = self.s * torch.eye(self.get_m() ** 2).double()
-
- for index_o, o in enumerate(self.bucketized_obs):
- n = mask[index_o]
- if n > 0:
- k = self.variance_correction(tau[index_o] * variances[index_o])
- v = tau[index_o] / (variances[index_o] * k)
-
- vec = varLambdas[index_o, :].view(-1, 1)
- A = vec @ vec.T
- W = W + A * v
- denom = 1. + v * vec.T @ W_inv @ vec
- W_inv = W_inv @ (I - v * vec @ (vec.T @ W_inv) / denom)
-
- self.W = W + self.s * torch.eye(self.get_m() ** 2).double()
- self.W_inv = W_inv
- # self.W_cholesky = torch.cholesky(self.W, upper=True)
- return self.W
-
- def construct_covariance_matrix_bins(self):
- self.construct_covariance_matrix_regression()
-
- def mean_var_reg_set(self, S, dt=1., beta=2., lcb_compute=False):
-
- if self.data is None:
- return S.volume() * self.b, S.volume() * self.B, S.volume() * self.b
-
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix()
- self.approx_fit = True
-
- map = None
- lcb = None
-
- if self.approx_solver == True:
- ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True)
- if lcb_compute == True:
- lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False)
- else:
- ucb = self.band(S, beta=beta, dt=dt, maximization=True)
- if lcb_compute == True:
- lcb = self.band(S, beta=beta, dt=dt, maximization=False)
-
- return map, ucb, lcb
-
- def mean_var_bins_set(self, S, dt=1., beta=2., lcb_compute=False):
- return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute=lcb_compute)
-
- def band(self, S, beta=2., dt=1., maximization=True):
- emb = self.product_integral(S) * dt
- A = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
- cost = cp.trace(A @ emb)
- Z = self.W_cholesky.clone()
- zero = np.zeros(self.get_m() ** 2)
- constraints = [cp.SOC(zero.T @ cp.vec(A) + self.s * beta ** 2, Z @ (cp.vec(A) - cp.vec(self.rate.numpy())))]
- constraints += [A >> 0]
-
- if maximization == True:
- prob = cp.Problem(cp.Maximize(cost), constraints)
- else:
- prob = cp.Problem(cp.Minimize(cost), constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: 4,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-3})
- ucb = torch.trace(torch.from_numpy(A.value) @ emb)
- return ucb
-
- def band_no_opt(self, S, beta=2., dt=1., maximization=True):
-
- if self.rate is None:
- if maximization == True:
- return S.volume() * dt * self.B
- else:
- return S.volume() * dt * self.b
- else:
- emb = self.product_integral(S)
- cost = torch.trace(self.rate @ emb)
- if maximization == True:
- out = cost + beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1)
- else:
- out = np.maximum(cost - beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1), 0.)
- return out * dt
-
- def gap(self, S, actions, w, dt, beta=2.):
- """
- Estimates the gap of an action S,
- :param S:
- :param dt:
- :return:
- """
-
- if self.data is None:
- return (self.B - self.b) * S.volume() / w(S)
-
- if self.ucb_identified == False:
- print("Recomputing UCB.....")
- self.ucb_identified = True
- self.max_ucb = -1000
- self.ucb_action = None
- for action in actions:
- _, ucb, __ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0))
- ucb = ucb / w(action)
- if ucb > self.max_ucb:
- self.max_ucb = ucb
- self.ucb_action = action
- map, ucb, lcb = self.mean_var_reg_set(S, dt=dt, beta=self.beta(0), lcb_compute=True)
- gap = w(S) * self.max_ucb - lcb
- return gap
-
- def information(self, S, dt, precomputed=None):
-
- if self.data is None:
- return 1.
-
- if self.W is None:
- self.construct_covariance_matrix()
-
- if self.feedback == "count-record":
- varphi_UCB = self.product_integral(self.ucb_action).view(1, -1) * dt
-
- ind = []
- for index, set in enumerate(self.basic_sets):
- if S.inside(set):
- ind.append(index)
- Upsilon = self.varLambdas_vec[ind, :] * dt
-
- I = torch.eye(Upsilon.size()[0]).double()
- G = self.W_inv - self.W_inv @ Upsilon.T @ torch.inverse(I + Upsilon @ Upsilon.T) @ Upsilon @ self.W_inv
- return 10e-4 + torch.logdet(varphi_UCB @ self.W_inv @ varphi_UCB.T) - torch.logdet(
- varphi_UCB @ G @ varphi_UCB.T)
-
- elif self.feedback == "histogram":
- raise NotImplementedError("Not implemented.")
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ if self.feedback == "count-record":
+ self.varLambdas_vec = torch.zeros(
+ size=(
+ self.varLambdas.size()[0],
+ self.varLambdas.size()[1] * self.varLambdas.size()[2],
+ )
+ ).double()
+ for i in range(self.varLambdas.size()[0]):
+ self.varLambdas_vec[i, :] = self.varLambdas[i, :, :].reshape(-1)
+
+ self.approx_solver = True
+
+ def fit_gp(self, threads=4):
+ if self.data is not None:
+ super().fit_gp(threads=threads)
+ else:
+ self.rate = None
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ emb = self.packing.embed(xtest)
+ mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1)
+ return mu
+
+ def rate_value(self, x, dt=1):
+ emb = self.packing.embed(x) * dt
+ mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1)
+ return mu
+
+ def mean_set(self, S, dt=1.0):
+ if self.data is not None:
+ emb = self.product_integral(S) * dt
+ mu = torch.trace(emb @ self.rate).view(1, 1)
+ else:
+ mu = self.min_intensity * S.volume()
+ return mu
+
+ def penalized_likelihood(self, threads=4):
+ sumLambda = self.sumLambda.numpy()
+ Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+
+ if self.observations is not None:
+ observations = self.observations.numpy()
+ # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro")
+ objective = (
+ -cp.sum(cp.log(observations @ Theta @ observations.T))
+ + cp.trace(sumLambda @ Theta)
+ + self.s * cp.sum_squares(cp.vec(Theta))
+ )
+ else:
+ objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(
+ cp.vec(Theta)
+ )
+
+ # if self.get_m() == 2:
+ # # use Lorentz-cone special result
+ # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )]
+ # else:
+ # constraints = [Theta >> 0]
+ constraints = []
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+ self.rate = torch.from_numpy(Theta.value)
+ return self.rate
+
+ def penalized_likelihood_bins(self, threads=4):
+ Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ tau = self.total_bucketized_time[mask].clone().numpy()
+ varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy()
+
+ objective = (
+ -cp.sum(
+ observations @ cp.log(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta)))
+ )
+ + cp.sum(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta)))
+ + self.s * cp.sum_squares(cp.vec(Theta))
+ )
+
+ constraints = [Theta >> 0]
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+ self.rate = torch.from_numpy(Theta.value)
+ return self.rate
+
+ def least_squares_weighted(self, threads=4):
+
+ if self.approx_fit == False:
+ self.bucketization()
+
+ Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ tau = self.total_bucketized_time.clone().numpy()
+
+ # varsumLambdas
+ varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy()
+
+ variances = self.variances.view(-1).clone().numpy()
+
+ for i in range(variances.shape[0]):
+ if mask[i] > 0:
+ variances[i] = (
+ variances[i]
+ * tau[i]
+ * self.variance_correction(variances[i] * tau[i])
+ )
+
+ selected_variances = variances[mask]
+
+ objective = (
+ cp.sum_squares(
+ (varLambdas_vec @ cp.vec(Theta) + -observations)
+ / np.sqrt(selected_variances)
+ )
+ + self.s * cp.sum_squares(cp.vec(Theta)) / 2
+ )
+ constraints = [Theta >> 0]
+ prob = cp.Problem(cp.Minimize(objective), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+
+ self.rate = torch.from_numpy(Theta.value)
+ return self.rate
+
+ def construct_covariance_matrix(self):
+ if self.estimator == "bins":
+ self.construct_covariance_matrix_bins()
+ elif self.estimator == "least-sq":
+ self.construct_covariance_matrix_regression()
+ else:
+ raise NotImplementedError("Covariance not implemented")
+
+ def construct_covariance_matrix_regression(self):
+ varLambdas = self.varLambdas_vec.clone()
+ variances = self.variances
+ mask = self.bucketized_counts > 0
+ tau = self.total_bucketized_time
+ W = torch.zeros(size=(self.get_m() ** 2, self.get_m() ** 2)).double()
+ I = torch.eye(self.get_m() ** 2).double()
+ W_inv = self.s * torch.eye(self.get_m() ** 2).double()
+
+ for index_o, o in enumerate(self.bucketized_obs):
+ n = mask[index_o]
+ if n > 0:
+ k = self.variance_correction(tau[index_o] * variances[index_o])
+ v = tau[index_o] / (variances[index_o] * k)
+
+ vec = varLambdas[index_o, :].view(-1, 1)
+ A = vec @ vec.T
+ W = W + A * v
+ denom = 1.0 + v * vec.T @ W_inv @ vec
+ W_inv = W_inv @ (I - v * vec @ (vec.T @ W_inv) / denom)
+
+ self.W = W + self.s * torch.eye(self.get_m() ** 2).double()
+ self.W_inv = W_inv
+ # self.W_cholesky = torch.cholesky(self.W, upper=True)
+ return self.W
+
+ def construct_covariance_matrix_bins(self):
+ self.construct_covariance_matrix_regression()
+
+ def mean_var_reg_set(self, S, dt=1.0, beta=2.0, lcb_compute=False):
+
+ if self.data is None:
+ return (
+ S.volume() * self.min_intensity,
+ S.volume() * self.B,
+ S.volume() * self.min_intensity,
+ )
+
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix()
+ self.approx_fit = True
+
+ map = None
+ lcb = None
+
+ if self.approx_solver == True:
+ ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True)
+ if lcb_compute == True:
+ lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False)
+ else:
+ ucb = self.band(S, beta=beta, dt=dt, maximization=True)
+ if lcb_compute == True:
+ lcb = self.band(S, beta=beta, dt=dt, maximization=False)
+
+ return map, ucb, lcb
+
+ def mean_var_bins_set(self, S, dt=1.0, beta=2.0, lcb_compute=False):
+ return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute=lcb_compute)
+
+ def band(self, S, beta=2.0, dt=1.0, maximization=True):
+ emb = self.product_integral(S) * dt
+ A = cp.Variable((self.get_m(), self.get_m()), symmetric=True)
+ cost = cp.trace(A @ emb)
+ Z = self.W_cholesky.clone()
+ zero = np.zeros(self.get_m() ** 2)
+ constraints = [
+ cp.SOC(
+ zero.T @ cp.vec(A) + self.s * beta**2,
+ Z @ (cp.vec(A) - cp.vec(self.rate.numpy())),
+ )
+ ]
+ constraints += [A >> 0]
+
+ if maximization == True:
+ prob = cp.Problem(cp.Maximize(cost), constraints)
+ else:
+ prob = cp.Problem(cp.Minimize(cost), constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: 4,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-3,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-3,
+ },
+ )
+ ucb = torch.trace(torch.from_numpy(A.value) @ emb)
+ return ucb
+
+ def band_no_opt(self, S, beta=2.0, dt=1.0, maximization=True):
+
+ if self.rate is None:
+ if maximization == True:
+ return S.volume() * dt * self.B
+ else:
+ return S.volume() * dt * self.min_intensity
+ else:
+ emb = self.product_integral(S)
+ cost = torch.trace(self.rate @ emb)
+ if maximization == True:
+ out = cost + beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1)
+ else:
+ out = np.maximum(
+ cost - beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1), 0.0
+ )
+ return out * dt
+
+ def gap(self, S, actions, w, dt, beta=2.0):
+ """
+ Estimates the gap of an action S,
+ :param S:
+ :param dt:
+ :return:
+ """
+
+ if self.data is None:
+ return (self.B - self.min_intensity) * S.volume() / w(S)
+
+ if self.ucb_identified == False:
+ print("Recomputing UCB.....")
+ self.ucb_identified = True
+ self.max_ucb = -1000
+ self.ucb_action = None
+ for action in actions:
+ _, ucb, __ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0))
+ ucb = ucb / w(action)
+ if ucb > self.max_ucb:
+ self.max_ucb = ucb
+ self.ucb_action = action
+ map, ucb, lcb = self.mean_var_reg_set(
+ S, dt=dt, beta=self.beta(0), lcb_compute=True
+ )
+ gap = w(S) * self.max_ucb - lcb
+ return gap
+
+ def information(self, S, dt, precomputed=None):
+
+ if self.data is None:
+ return 1.0
+
+ if self.W is None:
+ self.construct_covariance_matrix()
+
+ if self.feedback == "count-record":
+ varphi_UCB = self.product_integral(self.ucb_action).view(1, -1) * dt
+
+ ind = []
+ for index, set in enumerate(self.basic_sets):
+ if S.inside(set):
+ ind.append(index)
+ Upsilon = self.varLambdas_vec[ind, :] * dt
+
+ I = torch.eye(Upsilon.size()[0]).double()
+ G = (
+ self.W_inv
+ - self.W_inv
+ @ Upsilon.T
+ @ torch.inverse(I + Upsilon @ Upsilon.T)
+ @ Upsilon
+ @ self.W_inv
+ )
+ return (
+ 10e-4
+ + torch.logdet(varphi_UCB @ self.W_inv @ varphi_UCB.T)
+ - torch.logdet(varphi_UCB @ G @ varphi_UCB.T)
+ )
+
+ elif self.feedback == "histogram":
+ raise NotImplementedError("Not implemented.")
if __name__ == "__main__":
- torch.manual_seed(2)
- np.random.seed(2)
- d = 1
- gamma = 0.2
- n = 64
- B = 4.
- b = 0.5
-
- process = PoissonPointProcess(d=1, B=B, b=b)
- Sets = []
- levels = 3
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- Sets = hierarchical_structure.get_all_sets()
-
- D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double())
-
- m = 32
- embedding = HermiteEmbedding(m=m, d=1, gamma=gamma)
- k = KernelFunction(gamma=gamma)
- estimator = MBRPositiveEstimator(process, hierarchical_structure, kernel_object=k,
- B=B, m=m, d=d, embedding=embedding, basis="custom")
- min_vol, max_vol = estimator.get_min_max()
-
- dt = 10. / (b * min_vol)
- dt = dt * 2
-
- print("Suggested dt:", dt)
- c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)]
-
- no_sets = len(Sets)
- no_samples = 0
- data = []
- samples = []
- repeats = 2
-
- for i in range(no_samples):
- j = np.random.randint(0, no_sets, 1)
- S = Sets[j[0]]
- for _ in range(repeats):
- sample = process.sample_discretized(S, dt)
- samples.append(sample)
- data.append((S, sample, dt))
-
- sample_D = process.sample_discretized(D, dt)
- samples.append(sample_D)
- no_samples = repeats * no_samples + 1
- data.append((D, sample_D, dt))
-
- estimator.load_data(data)
-
- xtest = D.return_discretization(n=n)
-
- # likelihood based
- estimator.penalized_likelihood()
- rate_mean = estimator.mean_rate(D, n=n)
-
- # _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.)
-
- for j in range(no_samples):
- if samples[j] is not None:
- plt.plot(samples[j], samples[j] * 0, 'o', color=c[j])
-
- plt.plot(xtest, rate_mean, label='likelihood - locations known')
- # plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4,
- # color='blue', label='triangle')
- process.visualize(D, samples=0, n=n, dt=1.)
+ torch.manual_seed(2)
+ np.random.seed(2)
+ d = 1
+ gamma = 0.2
+ n = 64
+ B = 4.0
+ b = 0.5
+
+ process = PoissonPointProcess(d=1, B=B, b=b)
+ Sets = []
+ levels = 3
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ Sets = hierarchical_structure.get_all_sets()
+
+ D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double())
+
+ m = 32
+ embedding = HermiteEmbedding(m=m, d=1, gamma=gamma)
+ k = KernelFunction(gamma=gamma)
+ estimator = MBRPositiveEstimator(
+ process,
+ hierarchical_structure,
+ kernel_object=k,
+ B=B,
+ m=m,
+ d=d,
+ embedding=embedding,
+ basis="custom",
+ )
+ min_vol, max_vol = estimator.get_min_max()
+
+ dt = 10.0 / (b * min_vol)
+ dt = dt * 2
+
+ print("Suggested dt:", dt)
+ c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [
+ "k" for i in range(500)
+ ]
+
+ no_sets = len(Sets)
+ no_samples = 0
+ data = []
+ samples = []
+ repeats = 2
+
+ for i in range(no_samples):
+ j = np.random.randint(0, no_sets, 1)
+ S = Sets[j[0]]
+ for _ in range(repeats):
+ sample = process.sample_discretized(S, dt)
+ samples.append(sample)
+ data.append((S, sample, dt))
+
+ sample_D = process.sample_discretized(D, dt)
+ samples.append(sample_D)
+ no_samples = repeats * no_samples + 1
+ data.append((D, sample_D, dt))
+
+ estimator.load_data(data)
+
+ xtest = D.return_discretization(n=n)
+
+ # likelihood based
+ estimator.penalized_likelihood()
+ rate_mean = estimator.mean_rate(D, n=n)
+
+ # _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.)
+
+ for j in range(no_samples):
+ if samples[j] is not None:
+ plt.plot(samples[j], samples[j] * 0, "o", color=c[j])
+
+ plt.plot(xtest, rate_mean, label="likelihood - locations known")
+ # plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4,
+ # color='blue', label='triangle')
+ process.visualize(D, samples=0, n=n, dt=1.0)
diff --git a/stpy/point_processes/poisson/poisson.py b/stpy/point_processes/poisson/poisson.py
index 4228b1f..524f0c0 100644
--- a/stpy/point_processes/poisson/poisson.py
+++ b/stpy/point_processes/poisson/poisson.py
@@ -5,152 +5,199 @@
from stpy.borel_set import BorelSet
-class PoissonPointProcess():
- """
- parametrized by log linear model
-
- """
-
- def __init__(self, d=1, B=1, b=0.2, rate=None, rate_volume=None):
- self.B = B
- self.d = d
- self.b = b
- if rate is None:
- self.rate = self.rate_default
- else:
- self.rate = rate
-
- self.rate_volume_f = rate_volume
- self.exact = True
-
- def rate_default(self, x, dt=1.):
- return (self.B * torch.sum(torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1).view(-1,
- 1) + self.b) * dt
-
- def rate_volume(self, S, dt=1, rate=None):
- if self.rate_volume_f is None:
- # integrate rate numerically over S
- import scipy.integrate as integrate
- if rate is None:
- rate = self.rate
- else:
- rate = rate
- integral = 0
- if self.d == 1:
- # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1))
- integral, _ = integrate.quad(lambda x: rate(torch.Tensor([x]).view(1, 1)).numpy(),
- float(S.bounds[0, 0]), float(S.bounds[0, 1]))
- elif self.d == 2:
- integrand = lambda x, y: rate(torch.Tensor([x, y]).view(1, 2).double()).numpy()
- integral, _ = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- lambda x: float(S.bounds[1, 0]), lambda x: float(S.bounds[1, 1]))
-
- return integral * dt
- else:
- return self.rate_volume_f(S) * dt
-
- def sample_discretized(self, S, dt, n=100):
- lam = np.maximum(float(self.rate_volume(S, dt)), 0)
- count = np.random.poisson(lam=lam)
- if count > 0:
- x = S.return_discretization(n)
- r = self.rate(x) * dt
- r = torch.maximum(r, r * 0)
- sample = torch.from_numpy(
- np.random.choice(np.arange(0, x.size()[0], 1), size=count, p=(r / torch.sum(r)).numpy().reshape(-1)))
- return x[sample, :]
- else:
- return None
-
- def sample_discretized_direct(self, x, val):
- lam = 1000
- count = np.random.poisson(lam=np.maximum(0, lam))
- if count > 0:
- val = torch.abs(val)
- sample = torch.from_numpy(np.random.choice(np.arange(0, x.size()[0], 1),
- size=count, p=(val / torch.sum(val)).numpy().reshape(-1)))
- return x[sample, :]
- else:
- return None
-
- def sample(self, S, dt=1., verbose=False, rate=None):
- """
-
- :param S: set where it should be sampled
- :return:
- """
- if self.exact == True:
- return self.sample_discretized(S, dt=dt)
- else:
-
- lam = self.rate_volume(S, dt)
- n = np.random.poisson(lam=lam)
- print("Number of events:", n)
- alpha = 1.
-
- new_sample = []
- size = 0
- while size < n:
- # uniform sample g(s) = 1/vol(S)
- sample = S.uniform_sample(1)
- t = self.rate(sample) / (alpha)
- p = np.random.uniform(0, 1)
- if p < t:
- new_sample.append(sample.view(1, -1))
- size = size + 1
-
- if len(new_sample) > 1:
- x = torch.cat(new_sample, dim=0)
- else:
- return None
- return x
-
- def rate_sets(self, Sets, dt=1):
- res = []
- for S in Sets:
- res.append(self.rate_volume(S, dt=dt))
- return res
-
- def visualize(self, S, samples=2, n=10, dt=1., show=True):
- xtest = S.return_discretization(n)
- rate = self.rate(xtest)
-
- if self.d == 1:
- plt.plot(xtest, rate, label='rate', lw=3)
- for i in range(samples):
-
- x = self.sample(S, dt=dt)
- if x is not None:
- n = x.size()[0]
- plt.plot(x, x * 0, 'o', label='sample n=' + str(n))
-
- elif self.d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu, label='rate')
- ax.contour(cs, colors='k')
-
- for i in range(samples):
- x = self.sample(S, dt=dt)
- if x is not None:
- ax.plot(x[:, 0].detach().numpy(), x[:, 1].detach().numpy(), 'o', ms=10, alpha=0.5, label='sample')
- ax.grid(c='k', ls='-', alpha=0.1)
- plt.colorbar(cs)
-
- plt.legend()
- if show == True:
- plt.show()
+class PoissonPointProcess:
+ """
+ parametrized by log linear model
+
+ """
+
+ def __init__(
+ self, d=1.0, B=1.0, b=0.2, rate=None, rate_volume=None, naive_integral=False
+ ):
+ self.B = B
+ self.d = d
+ self.b = b
+ if rate is None:
+ self.rate = self.rate_default
+ else:
+ self.rate = rate
+
+ self.rate_volume_f = rate_volume
+ self.exact = True
+ self.naive_integral = naive_integral
+
+ def rate_default(self, x, dt=1.0):
+ return (
+ self.B
+ * torch.sum(
+ torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1
+ ).view(-1, 1)
+ + self.b
+ ) * dt
+
+ def rate_volume(self, S, dt=1, rate=None):
+ if self.rate_volume_f is None:
+ # integrate rate numerically over S
+ import scipy.integrate as integrate
+
+ if rate is None:
+ rate = self.rate
+ else:
+ rate = rate
+ integral = 0
+ if self.d == 1:
+ # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1))
+ integral, _ = integrate.quad(
+ lambda x: rate(torch.tensor([x]).view(1, 1)).numpy(),
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ )
+ elif self.d == 2:
+ integrand = lambda x, y: rate(
+ torch.tensor([x, y]).view(1, 2).double()
+ ).numpy()
+ integral, _ = integrate.dblquad(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ lambda x: float(S.bounds[1, 0]),
+ lambda x: float(S.bounds[1, 1]),
+ )
+
+ return integral * dt
+ else:
+ return self.rate_volume_f(S) * dt
+
+ def sample_discretized(self, S: BorelSet, dt, n=100):
+ x = S.return_discretization(n).to(device=torch.get_default_device())
+ r = self.rate(x) * dt
+ if self.naive_integral:
+ total_area = 1.0
+ for bound in S.bounds:
+ total_area *= bound[1] - bound[0]
+ lam = r.sum() * (total_area / len(x))
+ else:
+ lam = np.maximum(float(self.rate_volume(S, dt)), 0)
+ count = np.random.poisson(lam=lam.cpu().numpy())
+ if count > 0:
+ r = torch.maximum(r, r * 0)
+ sample = torch.from_numpy(
+ np.random.choice(
+ np.arange(0, x.size()[0], 1),
+ size=count,
+ p=(r / torch.sum(r)).cpu().numpy().reshape(-1),
+ )
+ )
+ return x[sample, :]
+ else:
+ return None
+
+ def sample_discretized_direct(self, x, val):
+ lam = 1000
+ count = np.random.poisson(lam=np.maximum(0, lam))
+ if count > 0:
+ val = torch.abs(val)
+ sample = torch.from_numpy(
+ np.random.choice(
+ np.arange(0, x.size()[0], 1),
+ size=count,
+ p=(val / torch.sum(val)).numpy().reshape(-1),
+ )
+ )
+ return x[sample, :]
+ else:
+ return None
+
+ def sample(self, S, dt=1.0, verbose=False, rate=None):
+ """
+
+ :param S: set where it should be sampled
+ :return:
+ """
+ if self.exact == True:
+ return self.sample_discretized(S, dt=dt)
+ else:
+
+ lam = self.rate_volume(S, dt)
+ n = np.random.poisson(lam=lam)
+ print("Number of events:", n)
+ alpha = 1.0
+
+ new_sample = []
+ size = 0
+ while size < n:
+ # uniform sample g(s) = 1/vol(S)
+ sample = S.uniform_sample(1)
+ t = self.rate(sample) / (alpha)
+ p = np.random.uniform(0, 1)
+ if p < t:
+ new_sample.append(sample.view(1, -1))
+ size = size + 1
+
+ if len(new_sample) > 1:
+ x = torch.cat(new_sample, dim=0)
+ else:
+ return None
+ return x
+
+ def rate_sets(self, Sets, dt=1):
+ res = []
+ for S in Sets:
+ res.append(self.rate_volume(S, dt=dt))
+ return res
+
+ def visualize(self, S, samples=2, n=10, dt=1.0, show=True):
+ xtest = S.return_discretization(n)
+ rate = self.rate(xtest)
+
+ if self.d == 1:
+ plt.plot(xtest, rate, label="rate", lw=3)
+ for i in range(samples):
+
+ x = self.sample(S, dt=dt)
+ if x is not None:
+ n = x.size()[0]
+ plt.plot(x, x * 0, "o", label="sample n=" + str(n))
+
+ elif self.d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu, label="rate")
+ ax.contour(cs, colors="k")
+
+ for i in range(samples):
+ x = self.sample(S, dt=dt)
+ if x is not None:
+ ax.plot(
+ x[:, 0].detach().numpy(),
+ x[:, 1].detach().numpy(),
+ "o",
+ ms=10,
+ alpha=0.5,
+ label="sample",
+ )
+ ax.grid(c="k", ls="-", alpha=0.1)
+ plt.colorbar(cs)
+
+ plt.legend()
+ if show == True:
+ plt.show()
if __name__ == "__main__":
- d = 2
- n = 100
- bounds = torch.Tensor([[-1, 1], [-1, 1]]).double()
- D = BorelSet(d, bounds)
+ d = 2
+ n = 100
+ bounds = torch.tensor([[-1, 1], [-1, 1]]).double()
+ D = BorelSet(d, bounds)
- process = PoissonPointProcess(d=d, B=2)
- process.visualize(D, samples=10, n=n, dt=10)
+ process = PoissonPointProcess(d=d, B=2)
+ process.visualize(D, samples=10, n=n, dt=10)
diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py
index d91658e..51ae795 100644
--- a/stpy/point_processes/poisson_rate_estimator.py
+++ b/stpy/point_processes/poisson_rate_estimator.py
@@ -1,15 +1,27 @@
+import os
+from typing import Optional
import cvxpy as cp
import mosek
import numpy as np
import scipy
+from stpy.borel_set import BorelSet, HierarchicalBorelSets
+from stpy.embeddings.embedding import Embedding
+from stpy.kernels import KernelFunction
import torch
from autograd_minimize import minimize
from quadprog import solve_qp
from torchmin import minimize as minimize_torch
-from stpy.embeddings.bernstein_embedding import BernsteinEmbedding, BernsteinSplinesEmbedding, \
- BernsteinSplinesOverlapping
-from stpy.embeddings.bump_bases import PositiveNystromEmbeddingBump, TriangleEmbedding, FaberSchauderEmbedding
+from stpy.embeddings.bernstein_embedding import (
+ BernsteinEmbedding,
+ BernsteinSplinesEmbedding,
+ BernsteinSplinesOverlapping,
+)
+from stpy.embeddings.bump_bases import (
+ PositiveNystromEmbeddingBump,
+ TriangleEmbedding,
+ FaberSchauderEmbedding,
+)
from stpy.embeddings.optimal_positive_basis import OptimalPositiveBasis
from stpy.helpers.ellipsoid_algorithms import maximize_on_elliptical_slice
from stpy.point_processes.rate_estimator import RateEstimator
@@ -17,1947 +29,2578 @@
class PoissonRateEstimator(RateEstimator):
- def __init__(self, process, hierarchy, d=1, m=100, kernel_object=None, B=1., s=1., jitter=10e-8, b=0.,
- basis='triangle', estimator='likelihood', feedback='count-record', offset=0.1, uncertainty='laplace',
- approx=None, stepsize=None, embedding=None, beta=2., sampling='proximal+prox', peeking=True,
- constraints=True, var_cor_on=True,
- samples_nystrom=15000, inverted_constraint=False, steps=None, dual=True, no_anchor_points=1024, U=1.,
- opt='torch'):
-
- self.process = process
- self.d = d
- self.s = s
- self.b = b
- self.B = B
- self.U = U
- self.stepsize = stepsize
- self.sampling = sampling
- self.steps = steps
- self.opt = opt
- self.kernel_object = kernel_object
- # set hierarchy
- self.constraints = constraints
- self.hierarchy = hierarchy
- self.ucb_identified = False
- self.inverted_constraint = inverted_constraint
- # approximation
- self.loglikelihood = 0.
- self.dual = dual
- self.peeking = peeking
- self.no_anchor_points = no_anchor_points
- if beta < 0.:
- self.beta = lambda t: self.beta_theory()
- else:
- self.beta = lambda t: beta
- self.var_cor_on = var_cor_on
-
- if basis == 'triangle':
- self.packing = TriangleEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset,
- s=np.sqrt(jitter))
- elif basis == 'bernstein':
- self.packing = BernsteinEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset,
- s=np.sqrt(jitter))
- elif basis == 'splines':
- self.packing = BernsteinSplinesEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset,
- s=np.sqrt(jitter))
- elif basis == 'nystrom':
- self.packing = PositiveNystromEmbeddingBump(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset,
- s=np.sqrt(jitter), samples=samples_nystrom)
- elif basis == 'overlap-splines':
- self.packing = BernsteinSplinesOverlapping(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset,
- s=np.sqrt(jitter))
- elif basis == 'faber':
- self.packing = FaberSchauderEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset,
- s=np.sqrt(jitter))
- elif basis == "optimal-positive":
- self.packing = OptimalPositiveBasis(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset,
- s=np.sqrt(jitter), samples=samples_nystrom)
- elif basis == "custom":
- self.packing = embedding
- else:
- raise NotImplementedError("The request positive basis is not implemented.")
- self.m = m
- self.data = None
- self.covariance = False
-
- # stabilizing the matrix inversion
- self.jitter = jitter
-
- # for variance stabilization
- self.stabilization = None
- self.approx_fit = False
-
- # properties of rate estimator
- self.estimator = estimator
- self.feedback = feedback
- self.uncertainty = uncertainty
- self.approx = approx
-
- # precompute information
- self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
-
- self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double()
- self.variances = torch.ones(size=(len(self.basic_sets), 1)).double().view(-1)
- self.variances_histogram = []
- self.observations = None
- self.rate = None
- self.W = (s) * torch.eye(self.get_m()).double()
- self.W_inv_approx = (1. / s) * torch.eye(self.get_m()).double()
- self.beta_value = 2.
- self.sampled_theta = None
-
- if self.dual == True:
- if self.d == 1:
- anchor = no_anchor_points
- self.anchor_points = self.hierarchy.top_node.return_discretization(anchor)
- self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1)
- elif self.d == 2:
- anchor = no_anchor_points
- self.anchor_points = self.hierarchy.top_node.return_discretization(int(np.sqrt(anchor)))
- self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1)
- self.global_dt = 0.
- self.anchor_points_emb = self.packing.embed(self.anchor_points)
-
- if feedback == "count-record" and basis != "custom":
- print("Precomputing phis.")
- for index_set, set in enumerate(self.basic_sets):
- self.varphis[index_set, :] = self.packing.integral(set)
- self.variances[index_set] = set.volume() * self.B
- else:
- pass
-
- print("Precomputation finished.")
-
- def add_data_point(self, new_data, times=True):
-
- super().add_data_point(new_data, times=times)
-
- if self.rate is not None:
- rate = self.rate
- else:
- l, _, u = self.get_constraints()
- Gamma_half = self.cov()
- rate = Gamma_half @ u
-
- if self.feedback == 'histogram':
- val = self.packing.integral(new_data[0]) @ rate * new_data[2]
- v = - np.log(val) + val
-
- elif self.feedback == 'count-record':
- v = self.packing.integral(new_data[0]) @ rate * new_data[2]
- if new_data[1] is not None:
- val2 = self.packing.embed(new_data[1]) @ rate * new_data[2]
- v = v - torch.sum(np.log(val2))
-
- self.loglikelihood += v
-
- def beta_theory(self):
- if self.approx_fit == False:
- l, Lambda, u = self.get_constraints()
- Gamma_half, invGamma_half = self.cov(inverse=True)
-
- ## norm
- norm = self.s
-
- ## constraints
- eps = 10e-3
- res = Gamma_half @ self.rate.view(-1, 1) - torch.from_numpy(l).view(-1, 1)
- xi = res.clone()
- xi[res > eps] = 0.
-
- constraint = xi.T @ Gamma_half @ self.W_inv_approx @ Gamma_half.T @ xi
-
- ## concentration
- vol = 4 * np.log(1. / 0.1) + torch.logdet(self.W) - self.get_m() * np.log(self.s)
- self.beta_value = np.sqrt(norm + vol + constraint)
- print('-------------------')
- print("New beta:", self.beta_value)
- print("norm:", norm)
- print("constraint:", constraint)
- print("vol:", vol)
- print("-------------------")
- else:
- pass
- return self.beta_value
-
- def get_constraints(self):
- return self.packing.get_constraints()
-
- def cov(self, inverse=False):
- return self.packing.cov(inverse=inverse)
-
- def fit_gp(self, threads=4):
-
- if self.data is not None:
- if self.feedback == "count-record":
-
- if self.estimator == "likelihood":
- if self.opt == 'cvxpy':
- self.penalized_likelihood(threads=threads)
- elif self.opt == 'torch':
- self.penalized_likelihood_fast(threads=threads)
- else:
- raise NotImplementedError("The optimization method does not exist")
-
- elif self.estimator == "least-sq":
- self.least_squares_weighted()
-
- elif self.estimator == "bins":
- self.penalized_likelihood_bins()
-
- else:
- raise AssertionError("wrong name.")
-
-
- elif self.feedback == 'histogram':
-
- if self.estimator == "likelihood":
- self.penalized_likelihood_integral()
-
- elif self.estimator == "least-sq":
- self.least_squares_weighted_integral()
-
- elif self.estimator == "bins":
- self.penalized_likelihood_integral_bins()
-
- else:
- raise AssertionError("wrong name.")
- else:
- raise AssertionError("wrong name.")
- else:
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov()
- self.rate = l
-
- def sample_mirror_langevin(self, steps=500, verbose=False):
-
- l, Lambda, u = self.get_constraints()
- Gamma_half, invGamma_half = self.cov(inverse=True)
-
- v = torch.from_numpy((u + l) / 2.).view(-1, 1)
- S = torch.diag(torch.from_numpy(u - l).view(-1) / 2.).double()
-
- phis = self.phis.clone() @ invGamma_half
-
- if self.observations is not None:
- obs = self.observations @ invGamma_half
- else:
- obs = None
-
- invGamma = invGamma_half.T @ invGamma_half
- transform = lambda y: S @ torch.tanh(y) + v
-
- if self.feedback == "count-record" and self.dual == False:
- if obs is not None:
- func = lambda y: -torch.sum(torch.log(obs @ transform(y)).view(-1)) \
- + torch.sum(phis @ transform(y)) \
- + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum(
- torch.log(1. / (1. - transform(y) ** 2)))
- else:
- func = lambda y: torch.sum(phis @ transform(y)) \
- + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum(
- torch.log(1. / (1. - transform(y) ** 2))) # torch.sum(torch.log(0.5*(1.+torch.cosh(2*y))))
-
-
- elif self.feedback == "count-record" and self.dual == True:
- mask = self.bucketized_counts > 0
- phis = self.varphis[mask, :] @ invGamma_half
- tau = self.total_bucketized_time[mask]
-
- if obs is not None:
- obs = self.anchor_points_emb @ invGamma_half
- weights = self.anchor_weights
- mask = weights > 0.
-
- func = lambda y: -torch.sum(weights[mask].view(-1, 1) * torch.log(obs[mask, :] @ transform(y))) \
- + torch.sum(tau.view(-1, 1) * (phis @ transform(y))) \
- + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum(
- torch.log(1. / (1. - (transform(y) ** 2)))) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y))))
- else:
- func = lambda y: torch.sum(tau.view(-1, 1) * (phis @ transform(y))) \
- + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum(
- torch.log(1. / (1. - transform(y) ** 2))) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y))))
-
- elif self.feedback == "histogram":
- func = lambda y: - torch.sum(
- self.counts.clone().view(-1) * torch.log(phis @ (S @ torch.tanh(y) + v)).view(-1)) \
- + torch.sum(phis @ (S @ torch.tanh(y) + v)) \
- + self.s * (S @ torch.tanh(y) + v).T @ invGamma @ (S @ torch.tanh(y) + v)
-
- y = torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True)
-
- # initiallize with map sqeezed more
- y.data = Gamma_half @ self.rate.view(-1, 1) # u < theta < l
-
- u_new = u + 0.01
- l_new = l - 0.01
- v2 = torch.from_numpy((u_new + l_new) / 2.).view(-1, 1)
- S2 = torch.diag(torch.from_numpy(u_new - l_new).view(-1) / 2.).double()
- #
- y.data = torch.inverse(S2) @ (y.data - v2)
- y.data = torch.atanh(y.data)
-
- W = S.T @ invGamma_half.T @ self.construct_covariance_matrix_laplace() @ invGamma_half @ S
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-8))
- eta = 0.05 / (L + 1)
-
- print("Eta:", eta)
-
- for k in range(steps):
-
- w = torch.randn(size=(self.get_m(), 1)).double()
- nabla_y = torch.autograd.functional.jacobian(func, y).data[0, 0, :, :]
- y.data = y.data - eta * nabla_y + np.sqrt(2 * eta) * w
- theta = torch.tanh(y).detach()
-
- if verbose == True:
- print("Iter:", k, (S @ theta + v).T)
- print(y.T)
-
- self.sampled_theta = invGamma_half @ transform(y.data)
-
- def sample_projected_langevin(self, steps=300, verbose=False, stepsize=None):
- """
- :param burn_in:
- :return:
- """
-
- Gamma_half = self.packing.cov()
-
- def prox(x):
- z = x.numpy()
- theta = cp.Variable((self.get_m(), 1))
- objective = cp.Minimize(cp.sum_squares(z - theta))
- constraints = []
- l, Lambda, u = self.get_constraints()
- Lambda = Lambda @ Gamma_half.numpy()
- constraints.append(Lambda @ theta >= l.reshape(-1, 1))
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.OSQP, warm_start=False, verbose=False, eps_abs=1e-3, eps_rel=1e-3)
- return torch.from_numpy(theta.value)
-
- if self.feedback == "count-record" and self.dual == False:
- if self.observations is not None:
- nabla = lambda y: -torch.einsum('i,ij->j', 1. / (self.observations @ y).view(-1),
- self.observations).view(-1, 1) + \
- torch.sum(self.phis, dim=0).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- nabla = lambda theta: torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1)
-
- elif self.feedback == "count-record" and self.dual == True:
- mask = self.bucketized_counts > 0
- phis = self.varphis[mask, :]
- tau = self.total_bucketized_time[mask]
-
- if self.observations is not None:
- obs = self.anchor_points_emb
- weights = self.anchor_weights
- mask = weights > 0.
- nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)),
- obs[mask]).view(-1, 1) + \
- torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
-
-
- elif self.feedback == "histogram":
- nabla = lambda theta: -torch.sum(torch.diag((1. / (self.phis @ theta).view(-1)) * self.counts) @ self.phis,
- dim=0).view(-1, 1) \
- + torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1)
-
- theta = self.rate.view(-1, 1)
- W = self.construct_covariance_matrix_laplace(minimal=True)
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5))
-
- if stepsize is None:
- eta = 0.5 / (L + 1)
- else:
- eta = np.minimum(1, stepsize * 0.5 / L)
-
- print(eta)
- for k in range(steps):
- w = torch.randn(size=(self.get_m(), 1)).double()
- theta = prox(theta - eta * nabla(theta) + np.sqrt(2 * eta) * w)
-
- if verbose == True:
- print("Iter:", k, theta.T)
-
- self.sampled_theta = theta
-
- def sample_proximal_langevin_prox(self, steps=300, verbose=False, stepsize=None):
- """
- :param burn_in:
- :return:
- """
-
- Gamma_half, invGamma_half = self.packing.cov(inverse=True)
- # invGamma = invGamma_half.T @ invGamma_half
- l, Lambda, u = self.get_constraints()
- Lambda = Lambda @ Gamma_half.numpy()
-
- def prox(x):
- res = solve_qp(np.eye(self.get_m()), x.numpy().reshape(-1), C=Gamma_half.numpy(), b=l.numpy(),
- factorized=True)
- return torch.from_numpy(res[0]).view(-1, 1)
-
- # theta_n = cp.Variable((self.get_m(), 1))
- # x = cp.Parameter((self.get_m(), 1))
- # objective = cp.Minimize(cp.sum_squares(x - theta_n))
- #
- # constraints = []
- # l, Lambda, u = self.get_constraints()
- # Lambda = Lambda @ Gamma_half.numpy()
- # constraints.append(Lambda @ theta_n >= l.reshape(-1, 1))
- # constraints.append(Lambda @ theta_n <= u.reshape(-1, 1))
- #
- # prob = cp.Problem(objective, constraints)
-
- # def prox(x):
- # return Gamma_half @ torch.from_numpy(scipy.optimize.nnls(invGamma.numpy(), (invGamma_half@x).numpy().reshape(-1), maxiter = 1000)[0]).view(-1,1)
-
- if self.data is not None:
- if self.feedback == "count-record" and self.dual == False:
- if self.observations is not None:
- nabla = lambda y: -torch.einsum('i,ij->j', 1. / (self.observations @ y).view(-1),
- self.observations).view(-1, 1) + \
- torch.sum(self.phis, dim=0).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- nabla = lambda theta: torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1)
-
- elif self.feedback == "count-record" and self.dual == True:
- mask = self.bucketized_counts > 0
- phis = self.varphis[mask, :]
- tau = self.total_bucketized_time[mask]
-
- if self.observations is not None:
- obs = self.anchor_points_emb
- weights = self.anchor_weights
- mask = weights > 0.
- nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)),
- obs[mask]).view(-1, 1) + \
- torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
-
-
- elif self.feedback == "histogram":
- nabla = lambda theta: -torch.sum(
- torch.diag((1. / (self.phis @ theta).view(-1)) * self.counts) @ self.phis,
- dim=0).view(-1, 1) \
- + torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1)
- else:
- nabla = lambda theta: self.s * theta.view(-1, 1)
-
- if self.rate is not None:
- theta = self.rate.view(-1, 1)
- else:
- theta = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False).view(
- -1, 1) ** 2
-
- for k in range(steps):
- w = torch.randn(size=(self.get_m(), 1)).double()
-
- # calculate proper step-size
- W = self.construct_covariance_matrix_laplace(theta=theta)
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3))
- if stepsize is not None:
- eta = 0.5 * stepsize / L
- else:
- eta = 0.5 / L
-
- # prox calculate
- # x.value = theta.numpy()
- # prob.solve(solver=cp.OSQP, warm_start=True, verbose=False, eps_abs=1e-3, eps_rel=1e-3)
- # proximal_theta = torch.from_numpy(theta_n.value)
-
- # update step
- # theta = 0.5 * theta - eta * nabla(theta) + 0.5 * proximal_theta + np.sqrt(2 * eta) * w
-
- # update step
- theta = 0.5 * theta - eta * nabla(theta) + 0.5 * prox(theta) + np.sqrt(2 * eta) * w
- if verbose == True:
- print("Iter:", k, theta.T)
-
- self.sampled_theta = prox(theta)
-
- def sample_proximal_langevin_simple_prox(self, steps=300, verbose=False):
-
- Gamma_half, invGamma_half = self.packing.cov(inverse=True)
- l, Lambda, u = self.get_constraints()
- prox_simple = lambda x: torch.minimum(torch.maximum(x.view(-1), torch.from_numpy(l).view(-1)) \
- , torch.from_numpy(u).view(-1)).view(-1, 1)
-
- def prox(x):
- return invGamma_half @ prox_simple(Gamma_half @ x)
-
- phis = self.phis
- if self.feedback == "count-record" and self.dual == False:
- if self.observations is not None:
- obs = self.observations
-
- func = lambda y: -torch.sum(torch.log(obs @ y)) \
- + torch.sum((phis @ y)) \
- + self.s * y.T @ y
-
- nabla = lambda y: -torch.einsum('i,ij->j', 1. / (obs @ y).view(-1), obs).view(-1, 1) + \
- torch.sum(phis, dim=0).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- func = lambda y: torch.sum(phis @ y).view(-1, 1) \
- + self.s * y.T @ y
-
- nabla = lambda y: torch.sum(phis, dim=0).view(-1, 1) + self.s * y.view(-1, 1)
-
-
-
-
-
- elif self.feedback == "count-record" and self.dual == True:
- mask = self.bucketized_counts > 0
- phis = self.varphis[mask, :]
- tau = self.total_bucketized_time[mask]
-
- if self.observations is not None:
- obs = self.anchor_points_emb
- weights = self.anchor_weights
- mask = weights > 0.
- func = lambda y: -torch.sum(weights[mask].view(-1, 1) * torch.log(obs[mask, :] @ y)) \
- + torch.sum(tau.view(-1, 1) * (phis @ y)) \
- + self.s * y.T @ y
-
- nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)),
- obs[mask]).view(-1, 1) + \
- torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- func = lambda y: torch.sum(tau.view(-1, 1) * (phis @ y)) \
- + self.s * y.T @ y
-
- nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
-
- elif self.feedback == "histogram":
- func = lambda y: - torch.sum(self.counts.view(-1) * torch.log(phis @ y).view(-1)) + \
- torch.sum(phis @ y) \
- + self.s * y.T @ y
- nabla = lambda y: -torch.einsum('i,ij->j', self.counts.view(-1) / (phis @ y).view(-1), phis).view(-1, 1) + \
- torch.sum(phis, dim=0).view(-1, 1) + self.s * y
-
- # hessian = lambda y: self.construct_covariance_matrix_laplace()
-
- y = prox(torch.randn(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True))
- y.data = self.rate.view(-1, 1)
-
- W = self.construct_covariance_matrix_laplace()
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5))
-
- eta = 0.5 / (L + 1)
-
- for k in range(steps):
- W = torch.randn(size=(self.get_m(), 1)).double()
- nabla_y = nabla(y.data)
- y.data = (1 - eta) * y.data - eta * nabla_y + eta * prox(y.data) + np.sqrt(2 * eta) * W
- if verbose == True:
- print("Iter:", k, y.T)
- print("grad:", y.grad.T)
-
- self.sampled_theta = prox(y.detach())
-
- def sample_hessian_positive_langevin(self, steps=500, verbose=False, stepsize=None):
-
- if self.data is not None:
- if self.feedback == "count-record" and self.dual == False:
- if self.observations is not None:
- nabla = lambda y: -torch.einsum('i,ij->j', 1. / (self.observations @ y).view(-1),
- self.observations).view(-1, 1) + \
- torch.sum(self.phis, dim=0).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- nabla = lambda theta: torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1)
-
- elif self.feedback == "count-record" and self.dual == True:
-
- mask = self.bucketized_counts > 0
- phis = self.varphis[mask, :]
- tau = self.total_bucketized_time[mask]
-
- if self.observations is not None:
- obs = self.anchor_points_emb
- weights = self.anchor_weights
- mask = weights > 0.
- nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)),
- obs[mask]).view(-1, 1) + \
- torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
- else:
- nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \
- + self.s * y.view(-1, 1)
-
-
- elif self.feedback == "histogram":
- nabla = lambda theta: -torch.sum(
- torch.diag((1. / (self.phis @ theta).view(-1)) * self.counts) @ self.phis,
- dim=0).view(-1, 1) \
- + torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1)
- else:
- nabla = lambda theta: self.s * theta.view(-1, 1)
-
- Gamma_half = self.packing.cov()
- lz, Lambda, u = self.get_constraints()
-
- Lambda = torch.from_numpy(Lambda) @ Gamma_half
- y = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).view(-1) ** 2
-
- if self.rate is not None:
- y.data = self.rate.data + Gamma_half @ y.data
- else:
- y.data = Gamma_half @ y.data
-
- if verbose == True:
- print("initial point")
- print(y.data)
-
- W = self.construct_covariance_matrix_laplace()
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5))
-
- if stepsize is None:
- eta = 1. / (L + 1)
- else:
- eta = stepsize / (L + 1)
-
- D = lambda x: torch.diag(1. / torch.abs(Lambda @ x).view(-1))
- sqrt_hessian = lambda x: Lambda @ D(x)
-
- phi = lambda x: -torch.sum(torch.log(Lambda @ x))
- nabla_phi = lambda x: -torch.einsum('i,ij->j', 1. / (Lambda @ x).view(-1), Lambda)
- hessian_phi = lambda x: Lambda.T @ torch.diag(1. / (Lambda @ x).view(-1) ** 2) @ Lambda
-
- for k in range(steps):
- w = torch.randn(size=(self.get_m(), 1)).double()
- nabla_val = nabla(y)
- H = sqrt_hessian(y.data)
- z = nabla_phi(y.data).view(-1, 1) - eta * nabla_val + np.sqrt(2 * eta) * H @ w
-
- # y.data = newton_solve(lambda s: nabla_phi(s).reshape(-1)-z.data.reshape(-1),y.reshape(-1),
- # verbose = verbose, grad = hessian_phi).view(-1,1)
-
- # # minimization appraoch
- def objective(s):
- return torch.sum((nabla_phi(s).reshape(-1) - z.reshape(-1)) ** 2)
-
- # #
-
- # x0 = y.reshape(-1).clone().detach().numpy()
- # res = minimize(objective, x0, backend='torch', method='Newton-CG', precision='float64', tol=1e-5, hvp_type='vhp')
- # y.data = torch.from_numpy(res.x)
-
- x0 = y.reshape(-1).clone()
- res = minimize_torch(objective, x0, method='newton-cg', tol=1e-5)
- y.data = res.x
-
- if verbose:
- print("Iter:", k)
- print(y.T)
-
- self.sampled_theta = y.data
-
- def sample_mla_prime(self, steps=100, verbose=False, stepsize=None):
- Gamma_half, invGamma_half = self.packing.cov(inverse=True)
- invGamma = invGamma_half.T @ invGamma_half
- l, Lambda, u = self.get_constraints()
- Lambda = torch.from_numpy(Lambda) @ Gamma_half
-
- if self.data is not None:
- if self.feedback == "count-record" and self.dual == False:
- if self.observations is not None:
- observations = self.observations @ invGamma_half
- phis = self.phis @ invGamma_half
- nabla = lambda y: -torch.einsum('i,ij->j', 1. / (observations @ y).view(-1),
- observations).view(-1, 1) + \
- torch.sum(phis, dim=0).view(-1, 1) \
- + self.s * invGamma @ y.view(-1, 1)
- else:
- nabla = lambda theta: torch.sum(phis, dim=0).view(-1, 1) + self.s * invGamma @ theta.view(-1, 1)
-
- else:
- nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1)
-
- y = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).reshape(-1,
- 1) ** 2
- # if self.rate is not None:
- # y.data = Gamma_half @ self.rate.data.view(-1,1) + y.data
- # else:
- y.data = y.data
-
- if verbose == True:
- print("initial point")
- print(y.data)
-
- W = invGamma_half.T @ self.construct_covariance_matrix_laplace() @ invGamma_half
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5))
-
- if stepsize is None:
- eta = 1. / (L + 1)
- else:
- eta = stepsize / (L + 1)
-
- from stpy.approx_inference.sampling_helper import get_increment
- for k in range(steps):
-
- nabla_val = nabla(y)
-
- # cvxpy minimization
- # x = cp.Variable((self.get_m(), 1))
- # objective = cp.Minimize( eta * nabla_val.detach().numpy().T @ x - cp.sum(cp.log(x)) -(-1./y.data).T@x)
- # constraints = [x >= 0.]
- #
- # prob = cp.Problem(objective, constraints)
- # prob.solve(solver = cp.MOSEK)
-
- w0 = (eta * nabla_val.data + 1. / y.data)
- # initial point for the solve
- # w0 = -1./( torch.from_numpy(x.value))
-
- # simulate
- f = lambda w, n: n / torch.abs(w)
- w = get_increment(eta, 1000, f, w0, path=False)
-
- # back mirror map
- y.data = (-1. / w)
-
- if verbose:
- print("Iter:", k)
- print(y.T)
-
- self.sampled_theta = invGamma_half @ y.data
-
- def sample_hessian_positive_langevin_2(self, steps=500, verbose=False, stepsize=None, preconditioner=True):
-
- Gamma_half, invGamma_half = self.packing.cov(inverse=True)
- invGamma = invGamma_half @ invGamma_half
- if self.data is not None:
-
- if self.feedback == "count-record" and self.dual == False:
-
- observations = self.observations @ invGamma_half
- phis = self.phis @ invGamma_half
-
- if self.observations is not None:
- nabla = lambda y: -torch.einsum('i,ij->j', 1. / (observations @ y).view(-1),
- observations).view(-1, 1) + \
- torch.sum(phis, dim=0).view(-1, 1) \
- + self.s * invGamma @ y.view(-1, 1)
- else:
- nabla = lambda theta: torch.sum(phis, dim=0).view(-1, 1) + self.s * invGamma @ theta.view(-1, 1)
-
- else:
- nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1)
-
- y = torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).view(-1) ** 2
- # if self.rate is not None:
- # y.data = Gamma_half @ self.rate.data + y.data
-
- if verbose == True:
- print("initial point")
- print(y.data)
-
- W = self.construct_covariance_matrix_laplace(minimal=True)
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5))
-
- if stepsize is None:
- eta = 1. / (L + 1)
- else:
- eta = stepsize / (L + 1)
-
- for k in range(steps):
- w = torch.randn(size=(self.get_m(), 1)).double() / torch.abs(y.data).view(-1, 1)
- nabla_val = nabla(y)
- z = -1. / y.data.view(-1, 1) + self.b - eta * Gamma_half @ nabla_val + np.sqrt(2 * eta) * Gamma_half @ w
- y.data = -1. / z + self.b
-
- if verbose:
- print("Iter:", k)
- print(y.T)
-
- self.sampled_theta = invGamma_half @ y.data
-
- def sample_newton_langevin(self, steps=1000, stepsize=None, verbose=False):
- Gamma_half, invGamma_half = self.packing.cov(inverse=True)
- invGamma = invGamma_half @ invGamma_half
- if self.data is not None:
-
- if self.feedback == "count-record" and self.dual == False:
-
- observations = self.observations @ invGamma_half
- phis = self.phis @ invGamma_half
-
- if self.observations is not None:
- nabla = lambda y, bar: -torch.einsum('i,ij->j', 1. / (observations @ y).view(-1),
- observations).view(-1, 1) + \
- torch.sum(phis, dim=0).view(-1, 1) \
- + self.s * invGamma @ y.view(-1, 1) - bar * 1. / y
- else:
- nabla = lambda theta, bar: torch.sum(phis, dim=0).view(-1, 1) + self.s * invGamma @ theta.view(
- -1, 1) - bar * 1. / theta
-
- else:
- nabla = lambda theta, bar: self.s * invGamma @ theta.view(-1, 1) - bar * 1. / theta
-
- y = 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).view(-1, 1) ** 2
-
- barrier = 10.
- # hessian = lambda theta,bar: torch.einsum('ik,k,kj->ij',observations.T,(observations@theta).view(-1),observations) + invGamma + bar/theta**2
- hessian = lambda theta, bar: observations.T @ torch.diag(
- 1 / (observations @ theta).view(-1) ** 2) @ observations + invGamma + torch.diag(bar / theta.view(-1) ** 2)
- hessian_sqrt = lambda theta, bar: torch.cholesky(hessian(theta, bar))
- eta = 1.
-
- for k in range(steps):
- w = torch.randn(size=(self.get_m(), 1)).double()
- nabla_val = nabla(y, barrier)
- y.data = y.data - torch.linalg.solve(hessian(y.data, barrier), nabla_val) + np.sqrt(
- 2 * eta) * torch.linalg.solve(hessian_sqrt(y.data, barrier), w)
-
- if verbose:
- print("Iter:", k)
- print(y.T)
-
- self.sampled_theta = invGamma_half @ y.data
-
- # self.sampled_theta = y.data
-
- def sample_hmc(self, steps=1000, stepsize=None, verbose=False):
- import hamiltorch
- phis = self.phis
- if self.feedback == "count-record" and self.dual == False:
- if self.observations is not None:
- obs = self.observations
- func = lambda y: torch.sum(torch.log(obs @ y)) \
- - torch.sum((phis @ y)) \
- - self.s * y.T @ y
- else:
- func = lambda y: - torch.sum(phis @ y).view(-1, 1) \
- - self.s * y.T @ y
-
- num_samples = 1
- num_steps_per_sample = steps
- if stepsize is None:
- step_size = 1e-8
- else:
- step_size = stepsize
-
- params_init = self.rate
- self.sample_theta = hamiltorch.sample(log_prob_func=func,
- params_init=params_init,
- num_samples=num_samples,
- step_size=step_size,
- num_steps_per_sample=num_steps_per_sample)
- print(self.sampled_theta)
-
- def sample_variational(self, xtest, accuracy=1e-4, verbose=False, samples=1):
- from stpy.approx_inference.variational_mf import VMF_SGCP
- cov_params = [self.kernel_object.kappa, self.kernel_object.gamma]
- S_borders = np.array([[-1., 1.]])
- num_inducing_points = self.m
- num_integration_points = 256
- X = self.x
-
- var_mf_sgcp = VMF_SGCP(S_borders, X, cov_params, num_inducing_points,
- num_integration_points=num_integration_points,
- update_hyperparams=False, output=0, conv_crit=accuracy)
- var_mf_sgcp.run()
- sample_paths = var_mf_sgcp.sample_posterior(xtest, num_samples=1.)
- return sample_paths
-
- def sample(self, verbose=False, steps=1000, domain=None):
- """
- :return:
- """
- if self.steps is not None:
- steps = self.steps
-
- if self.stepsize is not None:
- stepsize = self.stepsize
- else:
- stepsize = None
-
- l, Lambda, u = self.get_constraints()
- print("Sampling started.")
- if self.rate is None:
- self.fit_gp()
-
- if self.sampling == 'mirror':
- self.sample_mirror_langevin(steps=steps, verbose=verbose)
- elif self.sampling == 'proximal+prox':
- self.sample_proximal_langevin_prox(steps=steps, verbose=verbose)
- elif self.sampling == "proximal+simple_prox":
- self.sample_proximal_langevin_simple_prox(steps=steps, verbose=verbose)
- elif self.sampling == "hessian":
- self.sample_hessian_positive_langevin(steps=steps, verbose=verbose, stepsize=stepsize)
- elif self.sampling == "hessian2":
- self.sample_hessian_positive_langevin_2(steps=steps, verbose=verbose, stepsize=stepsize)
- elif self.sampling == "mla_prime":
- self.sample_mla_prime(steps=steps, verbose=verbose, stepsize=stepsize)
- elif self.sampling == 'hmc':
- self.sample_hmc(steps=steps, verbose=verbose, stepsize=stepsize)
- elif self.sampling == 'polyia_variational':
- self.sample_variational(accuracy=1. / steps, verbose=verbose)
- else:
- raise NotImplementedError("Sampling of such is not supported.")
-
- print("Sampling finished.")
-
- def sampled_lcb_ucb(self, xtest, samples=100, delta=0.1):
- paths = []
- for i in range(samples):
- self.sample()
- path = self.sample_path_points(xtest).view(1, -1)
- paths.append(path)
-
- paths = torch.cat(paths, dim=0)
- lcb = torch.quantile(paths, delta, dim=0)
- ucb = torch.quantile(paths, 1 - delta, dim=0)
- return lcb, ucb
-
- def penalized_likelihood_fast(self, threads=4):
- l, Lambda, u = self.get_constraints()
- Gamma_half, invGamma_half = self.cov(inverse=True)
-
- if self.dual == False:
- # using all points without anchor points
- if self.observations is not None:
- def objective(theta):
- return -torch.sum(torch.log(self.observations @ invGamma_half @ theta)) + torch.sum(
- self.phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2)
- else:
- def objective(theta):
- return torch.sum(self.phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum(
- (invGamma_half @ theta) ** 2)
- else:
- # using anchor points
- mask = self.bucketized_counts > 0
- phis = self.varphis[mask, :]
- tau = self.total_bucketized_time[mask]
-
- if self.observations is not None:
- observations = self.anchor_points_emb
- weights = self.anchor_weights
- mask = weights > 0.
-
- def objective(theta):
- return -torch.einsum('i,i', weights[mask],
- torch.log(observations[mask, :] @ invGamma_half @ theta)) + torch.einsum('i,i',
- tau,
- phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum(
- (invGamma_half @ theta) ** 2)
- else:
- def objective(theta):
- return torch.einsum('i,i', tau, phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum(
- (invGamma_half @ theta) ** 2)
-
- if self.rate is not None:
- theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double()
- theta0.data = self.rate.data
- else:
- theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double()
-
- eps = 1e-4
- res = minimize(objective, theta0.numpy(), backend='torch', method='L-BFGS-B',
- bounds=(l[0] + eps, u[0]), precision='float64', tol=1e-8,
- options={'ftol': 1e-08,
- 'gtol': 1e-08, 'eps': 1e-08,
- 'maxfun': 15000, 'maxiter': 15000,
- 'maxls': 20})
-
- self.rate = invGamma_half @ torch.from_numpy(res.x)
- print(res.message)
- return self.rate
-
- def penalized_likelihood(self, threads=4):
-
- theta = cp.Variable(self.get_m())
- l, Lambda, u = self.get_constraints()
-
- Gamma_half = self.cov(inverse=False)
-
- if self.dual == False:
-
- # using all points without anchor points
- phis = self.phis.numpy()
- if self.observations is not None:
- observations = self.observations.numpy()
- objective = cp.Minimize(-cp.sum(cp.log(observations @ theta)) +
- cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta))
- else:
- objective = cp.Minimize(cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta))
-
- else:
-
- # using anchor points
- mask = self.bucketized_counts.clone().numpy() > 0
- phis = self.varphis[mask, :].clone().numpy()
- tau = self.total_bucketized_time[mask].clone().numpy()
-
- if self.observations is not None:
- observations = self.anchor_points_emb.numpy()
- weights = self.anchor_weights.numpy()
- mask = weights > 0.
- objective = cp.Minimize(-cp.sum(cp.multiply(weights[mask], cp.log(observations[mask, :] @ theta))) +
- cp.sum(cp.multiply(tau, phis @ theta)) + self.s * 0.5 * cp.sum_squares(theta))
- else:
- objective = cp.Minimize(cp.sum(cp.multiply(tau, phis @ theta)) + self.s * 0.5 * cp.sum_squares(theta))
-
- constraints = []
-
- Lambda = Lambda @ Gamma_half.numpy()
-
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
-
- prob = cp.Problem(objective, constraints)
-
- if self.rate is not None:
- theta.value = self.rate.numpy()
-
- try:
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-4})
-
- self.rate = torch.from_numpy(theta.value)
- return self.rate
- except:
- print("Optimization failed. Using the old value.")
- print(prob.status)
- return self.rate
-
- def penalized_likelihood_integral(self, threads=4):
-
- phis = self.phis.numpy()
- counts = self.counts.numpy()
-
- theta = cp.Variable(self.get_m())
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov().numpy()
- objective = cp.Minimize(-cp.sum(counts @ cp.log(phis @ theta)) + cp.sum(phis @ theta)
- + self.s * 0.5 * cp.sum_squares(theta))
-
- constraints = []
- Lambda = Lambda @ Gamma_half
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
-
- # if self.rate is not None:
- # theta.value = self.rate.numpy()
- try:
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-4})
- self.rate = torch.from_numpy(theta.value)
- except:
- print("Optimization failed. Using the old value.")
- print(prob.status)
-
- return self.rate
-
- def bucketization(self):
-
- phis = []
- observations = []
-
- # project sets to smallest forms, and then sum on those only
- basic_sets = self.basic_sets
-
- data_basic = [[] for _ in range(len(basic_sets))]
- sensing_times = [[] for _ in range(len(basic_sets))]
- counts = torch.zeros(len(basic_sets)).int()
- total_data = 0.
- self.total_bucketized_obs = torch.zeros(size=(len(basic_sets), 1)).double().view(-1)
- self.total_bucketized_time = torch.zeros(size=(len(basic_sets), 1)).double().view(-1)
-
- for sample in self.data:
- S, obs, dt = sample
- if obs is not None:
- total_data = total_data + obs.size()[0] # total counts
- for index, elementary in enumerate(basic_sets): # iterate over basic sets
- mask = elementary.is_inside(obs) # mask which belong to the elementary
- if S.inside(elementary) == True:
- data_basic[index].append(obs[mask])
- counts[index] += 1
- sensing_times[index].append(dt)
- else:
- for index, elementary in enumerate(basic_sets):
- if S.inside(elementary) == True:
- data_basic[index].append(torch.Tensor([]))
- counts[index] += 1
- sensing_times[index].append(dt)
-
- for index, elementary in enumerate(basic_sets):
- arr = np.array([int(elem.size()[0]) for elem in data_basic[index]]) # counts over sensing rounds
- phi = self.packing.integral(elementary) # * counts[index]
-
- self.total_bucketized_obs[index] = float(np.sum(arr))
- self.total_bucketized_time[index] = float(np.sum(sensing_times[index]))
-
- observations.append(arr)
- phis.append(phi.view(1, -1)) # construct varphi_B
-
- self.bucketized_obs = observations.copy() # these are number of counts associated with sensings
- self.bucketized_time = sensing_times.copy() # these are times each basic set has been sensed
- self.bucketized_counts = counts # these are count each basic set has been sensed
-
- def variance_correction(self, variance):
-
- if self.var_cor_on == 1:
-
- g = lambda B, k, mu: -0.5 * (B ** 2) / ((mu ** 2) * k) - B / (mu * k) + (np.exp(B / (k * mu)) - 1)
- gn = lambda k: g(self.U, k, variance)
-
- from scipy import optimize
- k = optimize.bisect(gn, 1, 10000000)
-
- return k
- else:
- return 1.
-
- def least_squares_weighted(self, threads=4):
-
- # if self.approx_fit == False:
- # self.bucketization()
-
- theta = cp.Variable(self.get_m())
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov().numpy()
-
- mask = self.bucketized_counts.clone().numpy() > 0
- observations = self.total_bucketized_obs[mask].clone().numpy()
- phis = self.varphis[mask, :].clone().numpy()
- tau = self.total_bucketized_time.clone().numpy()
-
- variances = self.variances.view(-1).clone().numpy()
-
- for i in range(variances.shape[0]):
- if mask[i] > 0:
- variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i])
-
- selected_variances = variances[mask]
- objective = cp.Minimize(
- cp.sum_squares((cp.multiply((phis @ theta), tau[mask]) - observations) / (np.sqrt(selected_variances)))
- + 0.5 * self.s * cp.norm2(theta) ** 2)
-
- constraints = []
- Lambda = Lambda @ Gamma_half
- # constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
-
- prob = cp.Problem(objective, constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-4})
- print(prob.status)
- self.rate = torch.from_numpy(theta.value)
- return self.rate
-
- def least_sqaures_weighted_fast(self, threads=4):
-
- l, Lambda, u = self.get_constraints()
- Gamma_half, invGamma_half = self.cov(inverse=True)
-
- mask = self.bucketized_counts > 0
- observations = self.total_bucketized_obs[mask]
- phis = self.varphis[mask, :]
- tau = self.total_bucketized_time
-
- variances = self.variances.view(-1)
- for i in range(variances.size()[0]):
- if mask[i] > 0:
- variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i])
- selected_variances = variances[mask]
-
- def objective(theta):
- return torch.sum(
- ((tau[mask] * (phis @ invGamma_half @ theta) - observations) / (np.sqrt(selected_variances))) ** 2) \
- + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2)
-
- if self.rate is not None:
- theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double()
- theta0.data = Gamma_half @ self.rate.data
- else:
- theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double()
-
- eps = 1e-4
- res = minimize(objective, theta0.numpy(), backend='torch', method='L-BFGS-B',
- bounds=(l[0] + eps, u[0]), precision='float64', tol=1e-8,
- options={'ftol': 1e-06,
- 'gtol': 1e-06, 'eps': 1e-08,
- 'maxfun': 15000, 'maxiter': 15000,
- 'maxls': 20})
- self.rate = invGamma_half @ torch.from_numpy(res.x)
-
- return self.rate
-
- def least_squares_weighted_integral(self, threads=4):
-
- # if self.approx_fit == False:
- # self.bucketization()
-
- theta = cp.Variable(self.get_m())
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov().numpy()
-
- phis = self.phis.clone().numpy() # integrated actions
- if self.rate is None:
- rate = torch.pinverse(torch.from_numpy(Gamma_half)) @ torch.from_numpy(u)
- else:
- rate = self.rate.clone()
-
- if len(self.variances_histogram) > 0:
- variances = self.variances_histogram.numpy()
-
- for i in range(variances.shape[0]):
- variances[i] = variances[i] * self.variance_correction(variances[i])
- else:
- variances = np.zeros(len(self.data))
- i = 0
- for S, obs, dt in self.data:
- variances[i] = S.volume() * self.B
- variances[i] = variances[i] * self.variance_correction(variances[i])
- i = i + 1
-
- observations = self.counts.clone().numpy()
-
- objective = cp.Minimize(cp.sum_squares((phis @ theta - observations) / np.sqrt(variances))
- + self.s * cp.sum_squares(theta))
- constraints = []
- Lambda = Lambda @ Gamma_half
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
- prob = cp.Problem(objective, constraints)
-
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
-
- self.rate = torch.from_numpy(theta.value)
-
- return self.rate
-
- def penalized_likelihood_bins(self, threads=4):
- theta = cp.Variable(self.get_m())
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov().numpy()
-
- mask = self.bucketized_counts.clone().numpy() > 0
- observations = self.total_bucketized_obs[mask].clone().numpy()
- phis = self.varphis[mask, :].clone().numpy()
- tau = self.total_bucketized_time[mask].clone().numpy()
-
- constraints = []
- Lambda = Lambda @ Gamma_half
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
-
- objective = cp.Minimize(
- -cp.sum(observations @ cp.log(cp.multiply(tau, phis @ theta))) + cp.sum(cp.multiply(phis @ theta, tau))
- + self.s * 0.5 * cp.sum_squares(theta))
- prob = cp.Problem(objective, constraints)
- try:
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-8,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-8,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-8})
-
- self.rate = torch.from_numpy(theta.value)
- except:
- print("optimization failed.")
- return self.rate
-
- def penalized_likelihood_integral_bins(self, threads=4):
- phis = self.phis.numpy()
- counts = self.counts.numpy()
-
- theta = cp.Variable(self.get_m())
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov().numpy()
- objective = cp.Minimize(-cp.sum(counts @ cp.log(phis @ theta)) + cp.sum(phis @ theta)
- + self.s * 0.5 * cp.sum_squares(theta))
-
- constraints = []
- Lambda = Lambda @ Gamma_half
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
-
- try:
- if constraints:
- prob = cp.Problem(objective, constraints)
- else:
- prob = cp.Problem(objective)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False,
- mosek_params={mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-6})
- self.rate = torch.from_numpy(theta.value)
- except:
- print("Optimization failed. Using the old value.")
-
- return self.rate
-
- def update_variances(self, value=False, force=False):
- self.approx_fit = True
- if (self.feedback == "count-record" and self.estimator=="least-sq") or force == True:
- print("updating variance")
- for index, set in enumerate(self.basic_sets):
- if value == False:
- ucb = self.ucb(set)
- self.variances[index] = np.minimum(ucb, self.variances[index])
- else:
- self.variances[index] = self.mean_set(set)
- else:
- if self.data is not None:
- if self.peeking == True:
- new_var = []
- for S, _, dt in self.data:
- new_var.append(float(self.ucb(S)) * dt)
- self.variances_histogram = torch.Tensor(new_var.copy()).double()
- else:
- last = self.data[-1]
- new_var = torch.Tensor([self.ucb(last[0]) * last[2]]).double()
- if len(self.variances_histogram) > 0:
- self.variances_histogram = torch.cat((self.variances_histogram, new_var))
- else:
- self.variances_histogram = new_var
- self.approx_fit = False
-
- def ucb(self, S, dt=1., delta=0.5):
-
- if self.data is None or self.rate is None:
- return self.B * S.volume() * dt
-
- if self.approx == None:
-
- if self.uncertainty == "laplace":
- return self.mean_var_laplace_set(S, dt=dt, beta=self.beta(0))[1]
-
- elif self.uncertainty == "least-sq":
- return self.mean_var_reg_set(S, dt=dt, beta=self.beta(0))[1]
-
- elif self.uncertainty == "bins":
- return self.mean_var_bins_set(S, dt=dt, beta=self.beta(0))[1]
-
- elif self.uncertainty == "likelihood-ratio":
- return self.mean_var_ratio_set(S, dt=dt, beta=self.beta(0))[1]
-
- elif self.uncertainty == "conformal":
- return self.mean_var_conformal_set(S, dt=dt, delta=delta)[2]
-
- else:
- raise AssertionError("Not Implemented.")
-
- elif self.approx == "ellipsoid":
-
- if self.approx_fit == False:
- self.fit_ellipsoid_approx()
- self.beta(0)
- print("Fitting Approximation.")
- self.approx_fit = True
- return self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0))[2]
- else:
- raise AssertionError("Not implemented.")
-
- def mean_std_per_action(self, S, W, dt, beta):
-
- phi = self.packing.integral(S) * dt
- Gamma_half = self.cov().numpy()
-
- l, Lambda, u = self.get_constraints()
-
- Lambda = Lambda @ Gamma_half
- ucb, _ = maximize_on_elliptical_slice(phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u)
- lcb, _ = maximize_on_elliptical_slice(-phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u)
- map = phi @ self.rate
-
- return map, float(ucb), -float(lcb)
-
- def mean_var_laplace_set(self, S, dt, beta=2.):
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix_laplace()
- self.approx_fit = True
- return self.mean_std_per_action(S, self.W, dt, beta)
-
- def mean_var_reg_set(self, S, dt, beta=2.):
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix_regression()
- self.approx_fit = True
- return self.mean_std_per_action(S, self.W, dt, beta)
-
- def mean_var_bins_set(self, S, dt, beta=2.):
- if self.approx_fit == False:
- self.W = self.construct_covariance_matrix_bins()
- self.approx_fit = True
- return self.mean_std_per_action(S, self.W, dt, beta)
-
- def mean_var_ratio_set(self, S, dt, beta=2.):
- x = self.packing.integral(S) * dt
- map = x @ self.rate
- # v = np.log(1. / 0.1) - torch.sum(self.counts.double() @ torch.log(self.phis.double() @ self.rate)) \
- # + torch.sum(self.phis.double() @ self.rate) + 0.5 * self.s * torch.norm(self.rate) ** 2
- v = np.log(1. / 0.1) + self.likelihood + 0.5 * self.s * torch.norm(self.rate) ** 2
-
- phis = self.phis.numpy()
- counts = self.counts.numpy()
- theta = cp.Variable(self.get_m())
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov().numpy()
-
- objective_min = cp.Minimize(x @ theta)
- objective_max = cp.Maximize(x @ theta)
-
- constraints = []
- Lambda = Lambda @ Gamma_half
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
-
- constraints.append(
- -cp.sum(counts @ cp.log(phis @ theta)) + cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(
- theta) <= v)
-
- prob = cp.Problem(objective_min, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
- lcb = np.dot(theta.value, x)
- prob = cp.Problem(objective_max, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
- ucb = np.dot(theta.value, x)
-
- return map, ucb, lcb
-
- def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.):
- phi = self.packing.integral(S)
- map = dt * phi @ self.rate
-
- ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
- # ucb = np.minimum(dt * ucb, self.B * S.volume() * dt)
-
- lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
- # lcb = np.maximum(dt * lcb, self.b * S.volume() * dt)
- return map, lcb, ucb
-
- def fit_ellipsoid_approx(self):
-
- if self.uncertainty == "laplace":
- self.W = self.construct_covariance_matrix_laplace()
- elif self.uncertainty == 'least-sq':
- self.W = self.construct_covariance_matrix_regression()
- elif self.uncertainty == 'bins':
- self.W = self.construct_covariance_matrix_bins()
- else:
- raise AssertionError("Not implemented.")
-
- self.W_inv_approx = torch.pinverse(self.W)
-
- def construct_covariance_matrix(self):
- if self.estimator == "likelihood":
- self.W = self.construct_covariance_matrix_laplace()
- elif self.estimator == "least-sq":
- self.W = self.construct_covariance_matrix_regression()
- elif self.estimator == "bins":
- self.W = self.construct_covariance_matrix_bins()
- else:
- raise NotImplementedError("This estimator is not implemented.")
- return self.W
-
- def construct_covariance_matrix_laplace(self, theta=None):
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
-
- if self.feedback == "count-record":
-
- if self.observations is not None:
-
- if theta is None:
- D = torch.diag(1. / ((self.observations @ self.rate).view(-1) ** 2))
- W = self.observations.T @ D @ self.observations
- else:
- D = torch.diag(1. / ((self.observations @ theta).view(-1) ** 2))
- W = self.observations.T @ D @ self.observations
-
- elif self.feedback == "histogram":
- # D = torch.diag(self.counts / (self.phis @ self.rate).view(-1) ** 2)
- if len(self.variances_histogram) > 0:
- variances = self.variances_histogram.view(-1).clone()
-
- for i in range(variances.shape[0]):
- variances[i] = variances[i] * self.variance_correction(variances[i])
-
- D = torch.diag(self.counts / variances ** 2)
-
- W = self.phis.T @ D @ self.phis
- else:
- raise AssertionError("Not implemented.")
-
- return W + torch.eye(self.get_m()).double() * self.s
-
- def construct_covariance_matrix_regression(self):
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
- if self.data is not None:
- variances = self.variances
- if self.feedback == "count-record":
- mask = self.bucketized_counts > 0
- tau = self.total_bucketized_time
- for index_o, o in enumerate(self.bucketized_obs):
- n = mask[index_o]
- if n > 0:
- A = self.varphis[index_o, :].view(-1, 1) @ self.varphis[index_o, :].view(1, -1) * tau[index_o]
- k = self.variance_correction(tau[index_o] * variances[index_o])
- W = W + A / (variances[index_o] * k)
-
- elif self.feedback == "histogram":
-
- if len(self.variances_histogram) > 0:
- variances = self.variances_histogram.view(-1).clone()
-
- for i in range(variances.shape[0]):
- variances[i] = variances[i] * self.variance_correction(variances[i])
-
- D = torch.diag(1. / variances)
- W = self.phis.T @ D @ self.phis
-
- return W + torch.eye(self.get_m()).double() * self.s
-
- def construct_covariance_matrix_bins(self):
- W = torch.zeros(size=(self.get_m(), self.get_m())).double()
-
- if self.feedback == "count-record":
-
- mask = self.bucketized_counts > 0
- tau = self.total_bucketized_time
- varphis = self.varphis[mask, :]
- variances = self.variances.view(-1).clone()
-
- for i in range(variances.size()[0]):
- if mask[i] > 0:
- variances[i] = variances[i] * self.variance_correction(variances[i] * tau[i])
-
- variances = variances[mask]
- tau = tau[mask]
-
- if self.observations is not None:
- D = torch.diag(tau / variances)
- W = varphis.T @ D @ varphis
-
- elif self.feedback == "histogram":
-
- if len(self.variances_histogram) > 0:
- variances = self.variances_histogram.view(-1).clone()
-
- for i in range(variances.shape[0]):
- variances[i] = variances[i] * self.variance_correction(variances[i])
-
- D = torch.diag(1. / variances)
- W = self.phis.T @ D @ self.phis
- else:
- raise AssertionError("Not implemented.")
-
- return W + torch.eye(self.get_m()).double() * self.s
-
- def gap(self, S, actions, w, dt, beta=2.):
- """
- Estimates the gap of an action S,
- :param S:
- :param dt:
- :return:
- """
- phi = self.packing.integral(S) * dt
- Gamma_half = self.packing.cov().numpy()
-
- if self.approx is None:
- l, Lambda, u = self.get_constraints()
- Lambda = Lambda @ Gamma_half
- ucbs = []
- for action in actions:
- phi_a = self.packing.integral(action) * dt
- # ucb, _ = maximize_on_elliptical_slice(phi_a.numpy()-phi.numpy(), self.W.numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u)
- ucb, _ = maximize_on_elliptical_slice(phi.numpy(), self.W.numpy(),
- self.rate.view(-1).numpy(), beta, l, Lambda, u)
- ucbs.append(float(ucb))
- gap = torch.max(torch.Tensor(ucbs))
-
- else:
- if self.data is None:
- return (self.B - self.b) * S.volume()
-
- if self.ucb_identified == False:
- print("Recomputing UCB.....")
- self.ucb_identified = True
- self.fit_ellipsoid_approx()
- self.max_ucb = -1000
- self.ucb_action = None
-
- for action in actions:
- _, __, ucb = self.map_lcb_ucb_approx_action(action, dt=dt, beta=self.beta(0))
- ucb = ucb / w(action)
-
- if ucb > self.max_ucb:
- self.max_ucb = ucb
- self.ucb_action = action
-
- map, lcb, ucb = self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0))
- gap = w(S) * self.max_ucb - lcb
- return gap
-
- def information(self, S, dt, precomputed=None):
-
- if self.data is None:
- return 1.
-
- if self.W is None:
- self.construct_covariance_matrix()
-
- if self.feedback == "count-record":
- varphi_UCB = self.packing.integral(self.ucb_action).view(1, -1) * dt
-
- if precomputed is not None:
- Upsilon = precomputed[S] * dt
- else:
- ind = []
- for index, set in enumerate(self.basic_sets):
- if S.inside(set):
- ind.append(index)
- Upsilon = self.varphis[ind, :] * dt
-
- I = torch.eye(Upsilon.size()[0]).double()
- G = self.W_inv_approx - self.W_inv_approx @ Upsilon.T @ torch.inverse(
- I + Upsilon @ Upsilon.T) @ Upsilon @ self.W_inv_approx
- return 10e-4 + torch.logdet(varphi_UCB @ self.W_inv_approx @ varphi_UCB.T) - torch.logdet(
- varphi_UCB @ G @ varphi_UCB.T)
-
- elif self.feedback == "histogram":
-
- return torch.log(1 + self.packing.integral(S) @ self.W_inv_approx @ self.packing.integral(S) * dt ** 2)
-
- def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01):
- xtest = S.return_discretization(n)
- if self.data is None:
- return self.b + 0 * xtest[:, 0].view(-1, 1), \
- self.b + 0 * xtest[:, 0].view(-1, 1), \
- self.B + 0 * xtest[:, 0].view(-1, 1)
-
- self.fit_ellipsoid_approx()
- self.fit_ellipsoid_approx()
-
- Phi = self.packing.embed(xtest).double()
- map = Phi @ self.rate
- N = Phi.size()[0]
-
- ucb = torch.zeros(size=(N, 1)).double()
- lcb = torch.zeros(size=(N, 1)).double()
-
- for i in range(N):
- x = Phi[i, :].view(-1, 1)
- ucb[i, 0] = np.minimum(map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.B)
- lcb[i, 0] = np.maximum(map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.b)
- return map, lcb, ucb
-
- def map_lcb_ucb(self, S, n, beta=2.0):
- """
- Calculate exact confidence using laplace approximation on a whole set domain
- :param S: set
- :param n: discretization
- :param beta: beta
- :return:
- """
-
- xtest = S.return_discretization(n)
- if self.data is None:
- return self.b + 0 * xtest[:, 0].view(-1, 1), \
- self.b + 0 * xtest[:, 0].view(-1, 1), \
- self.B + 0 * xtest[:, 0].view(-1, 1)
-
- N = xtest.size()[0]
- Phi = self.packing.embed(xtest)
- map = Phi @ self.rate
-
- if self.uncertainty == "laplace":
- W = self.construct_covariance_matrix_laplace()
- elif self.uncertainty == "least-sq":
- W = self.construct_covariance_matrix_regression()
- elif self.uncertainty == "bins":
- W = self.construct_covariance_matrix_bins()
- else:
- raise AssertionError("Not implemented ")
-
- Gamma_half = self.cov().numpy()
- l, Lambda, u = self.get_constraints()
- Lambda = Lambda @ Gamma_half
- ucb = torch.zeros(size=(N, 1)).double()
- lcb = torch.zeros(size=(N, 1)).double()
-
- for i in range(N):
- x = Phi[i, :]
- ucbi, _ = maximize_on_elliptical_slice(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), np.sqrt(beta), l,
- Lambda,
- u)
- lcbi, _ = maximize_on_elliptical_slice(-x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), np.sqrt(beta),
- l, Lambda,
- u)
- ucb[i, 0] = ucbi
- lcb[i, 0] = -lcbi
-
- return map, lcb, ucb
-
- def map_lcb_ucb_likelihood_ratio(self, S, n, delta=0.1, current=False):
- xtest = S.return_discretization(n)
-
- if self.data is None:
- return self.b + 0 * xtest[:, 0].view(-1, 1), \
- self.b + 0 * xtest[:, 0].view(-1, 1), \
- self.B + 0 * xtest[:, 0].view(-1, 1)
-
- N = xtest.size()[0]
- Phi = self.packing.embed(xtest)
- map = Phi @ self.rate
-
- ucb = torch.zeros(size=(N, 1)).double()
- lcb = torch.zeros(size=(N, 1)).double()
-
- phis = self.phis.numpy()
-
- if current:
- if self.observations is not None:
- v = np.log(1. / delta) - torch.sum(torch.log(self.observations @ self.rate)) + torch.sum(
- self.phis @ self.rate) + self.s * 0.5 * torch.sum(self.rate ** 2)
- else:
- v = np.log(1. / delta) + torch.sum(
- self.phis @ self.rate) + self.s * 0.5 * torch.sum(self.rate ** 2)
- else:
- if self.feedback == 'count-record':
- v = np.log(1. / delta) + self.loglikelihood + 0.5 * self.s * torch.sum(self.rate ** 2)
- elif self.feedback == 'histogram':
- v = np.log(1. / delta) + self.loglikelihood + 0.5 * self.s * torch.sum(self.rate ** 2)
- else:
- raise NotImplementedError("Not compatible with given feedback model ")
-
- l, Lambda, u = self.get_constraints()
- Gamma_half = self.cov().numpy()
- Lambda = Lambda @ Gamma_half
-
- for i in range(N):
- x = Phi[i, :].numpy()
-
- theta = cp.Variable(self.get_m())
-
- objective_min = cp.Minimize(x @ theta)
- objective_max = cp.Maximize(x @ theta)
-
- constraints = []
- constraints.append(Lambda @ theta >= l)
- constraints.append(Lambda @ theta <= u)
-
- if self.feedback == 'count-record':
- if self.observations is not None:
- observations = self.observations.numpy()
-
- constraints.append(
- -cp.sum(cp.log(observations @ theta)) +
- cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta)
- <= v)
- else:
- constraints.append(cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta)
- <= v)
-
- elif self.feedback == 'histogram':
- constraints.append(
- -cp.sum(cp.log(phis @ theta)) +
- cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta)
- <= v)
- else:
- raise NotImplementedError("Does not exist.")
-
- prob = cp.Problem(objective_min, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
- lcb[i, 0] = float(np.dot(theta.value, x))
-
- prob = cp.Problem(objective_max, constraints)
- prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
- ucb[i, 0] = float(np.dot(theta.value, x))
-
- return map, lcb, ucb
-
- def mean_var_conformal_set(self, S, dt, beta=2., max_val=None, delta=0.05):
- # self.bucketize_prepare()
- if max_val is None:
- max_val = int(self.B * self.basic_sets[0].volume() * dt) + 1
- map, lcb, ucb = self.conformal_confidence_set(S, delta=delta, max_val=max_val, dt=dt)
- return map, lcb, ucb
-
- def conformal_score_func(self, theta, new, index):
-
- if new[1] is None:
- n_new = 0
- else:
- n_new = new[1].size()[0]
-
- varphi = self.packing.integral(new[0]) * new[2]
- err_new = abs(float(n_new) - float(varphi @ theta))
- n = len(self.bucketized_obs[index])
-
- if n > 0:
-
- phis = self.varphis[index].repeat(n, 1)
- res = torch.Tensor(self.bucketized_obs[index]).double()
-
- err = torch.abs(res - (phis @ theta.view(-1, 1)).view(-1))
-
- return torch.sum(err < err_new).double() / float(n + 1.) + 1. / (float(n) + 1.)
-
- else:
- return 0.
-
- def conformal_confidence(self, delta=0.05, max_val=20, dt=1, step=1):
- lcb = []
- ucb = []
- map = []
-
- if self.data is not None:
- self.bucketization(time=True)
-
- for S in self.basic_sets:
- m, u, l = self.conformal_confidence_set(S, delta=delta, max_val=max_val, dt=dt, step=step)
-
- map.append(m)
- ucb.append(u)
- lcb.append(l)
-
- return torch.Tensor(map).double(), torch.Tensor(ucb).double(), torch.Tensor(lcb).double()
-
- def conformal_confidence_set(self, S, delta=0.05, max_val=20, dt=1., step=1):
- """
- :return: (lcb,ucb)
- """
-
- if self.data is not None:
- if self.feedback == "count-record":
- self.penalized_likelihood()
- elif self.feedback == "histogram":
- self.penalized_likelihood_integral()
-
- # identify the set in basic sets
- index = 0
- for set in self.basic_sets:
- if set.inside(S):
- break
- index += 1
-
- # calculate map estimate
- map = float(self.rate @ self.packing.integral(S))
- else:
- map = self.b
- return map, self.B, self.b
-
- scores = []
- j = 0
- score = 1.
- lowest = 0
- n = float(len(self.bucketized_obs[index]))
-
- while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j <= max_val:
- lowest = j
- if j > 0:
- obs = torch.zeros(size=(j, self.d)).double()
- for i in range(self.d):
- obs[:, i] = torch.from_numpy(np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j))
- else:
- obs = None
-
- # new observation
- new = (S, obs, dt)
-
- old_phis, old_observations, old_counts = self.add_data_point_and_remove(new)
-
- if self.feedback == "count-record":
- theta_new = self.penalized_likelihood()
- elif self.feedback == "histogram":
- theta_new = self.penalized_likelihood_integral()
-
- # restore back the data
- self.phis = old_phis
- self.observations = old_observations
- self.counts = old_counts
-
- # calculate the score
- score = self.conformal_score_func(theta_new, new, index)
- n = float(len(self.bucketized_obs[index]))
-
- print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1))
- j = j + 1
-
- j = max_val
- score = 1.
- largest = max_val
-
- while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j > lowest:
- largest = j
- if j > 0:
- obs = torch.zeros(size=(j, self.d)).double()
- for i in range(self.d):
- obs[:, i] = torch.from_numpy(np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j))
- else:
- obs = None
-
- # new observation
- new = (S, obs, dt)
-
- old_phis, old_observations, old_counts = self.add_data_point_and_remove(new)
-
- if self.feedback == "count-record":
- theta_new = self.penalized_likelihood()
- elif self.feedback == "histogram":
- theta_new = self.penalized_likelihood_integral()
-
- # restore back the data
- self.phis = old_phis
- self.observations = old_observations
- self.counts = old_counts
-
- # calculate the score
- score = self.conformal_score_func(theta_new, new, index)
- n = float(len(self.bucketized_obs[index]))
-
- print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1))
- j = j - 1
- # scores = np.array(scores)
- # mask = scores < np.ceil((1-delta)*(n+1))/(n+1)
-
- # if np.sum(mask) == 0:
- # lowest = 0
- # largest = max_val
- # else:
- # lowest = np.min(np.arange(0,max_val,step)[mask])
- # largest = np.max(np.arange(0, max_val, step)[mask])
-
- lcb = lowest / dt / S.volume()
- ucb = largest / dt / S.volume()
-
- return (map, ucb, lcb)
+ def __init__(
+ self,
+ anchor_hierarchy: HierarchicalBorelSets,
+ d: int = 1,
+ basis_size_per_dim: int = 100,
+ kernel: Optional[KernelFunction] = None,
+ max_intensity: float = 1.0,
+ s=1.0,
+ jitter=10e-8,
+ min_intensity: float = 0.0,
+ basis: str = "triangle",
+ estimator: str = "likelihood",
+ feedback_type: str = "count-record",
+ offset=0.1,
+ uncertainty="laplace",
+ approx=None,
+ sampling_stepsize=None,
+ embedding: Optional[Embedding] = None,
+ beta=2.0,
+ sampling="proximal+prox",
+ peeking=True,
+ constraints=True,
+ var_cor_on=True,
+ samples_nystrom=15000,
+ inverted_constraint=False,
+ langevine_sampling_steps=None,
+ use_anchors=True,
+ no_anchor_points=1024,
+ U=1.0,
+ optimization_library="torch",
+ roi: torch.Tensor | BorelSet | None = None,
+ roi_discretization: int = 30,
+ memory_limit=None,
+ device=torch.get_default_device(),
+ dtype=torch.get_default_dtype(),
+ ):
+ self.d = d
+ """ Dimension of the data """
+ self.s = s
+ self.b = min_intensity
+ """ Minimal value of the intensity function """
+ self.B = max_intensity
+ """ Maximal value of the intensity function """
+ self.U = U
+ self.stepsize = sampling_stepsize
+ self.sampling = sampling
+ self.steps = langevine_sampling_steps
+ self.optimization_library = optimization_library
+ self.kernel = kernel
+ # set hierarchy
+ self.constraints = constraints
+ self.hierarchy = anchor_hierarchy
+ self.ucb_identified = False
+ self.inverted_constraint = inverted_constraint
+ # approximation
+ self.loglikelihood = 0.0
+ self.dual = use_anchors
+ self.peeking = peeking
+ self.no_anchor_points = no_anchor_points
+ if beta < 0.0:
+ self.beta = lambda t: self.beta_theory()
+ else:
+ self.beta = lambda t: beta
+ self.var_cor_on = var_cor_on
+ self.device = device
+ self.dtype = dtype
+
+ if basis == "triangle":
+ self.packing = TriangleEmbedding(
+ d,
+ basis_size_per_dim,
+ kernel_object=kernel,
+ B=max_intensity,
+ b=min_intensity,
+ offset=offset,
+ s=np.sqrt(jitter),
+ )
+ elif basis == "bernstein":
+ self.packing = BernsteinEmbedding(
+ d,
+ basis_size_per_dim,
+ kernel_object=kernel,
+ B=max_intensity,
+ b=min_intensity,
+ offset=offset,
+ s=np.sqrt(jitter),
+ )
+ elif basis == "splines":
+ self.packing = BernsteinSplinesEmbedding(
+ d,
+ basis_size_per_dim,
+ kernel_object=kernel,
+ B=max_intensity,
+ b=min_intensity,
+ offset=offset,
+ s=np.sqrt(jitter),
+ )
+ elif basis == "nystrom":
+ self.packing = PositiveNystromEmbeddingBump(
+ d,
+ basis_size_per_dim,
+ kernel_object=kernel,
+ B=max_intensity,
+ b=min_intensity,
+ offset=offset,
+ s=np.sqrt(jitter),
+ samples=samples_nystrom,
+ )
+ elif basis == "overlap-splines":
+ self.packing = BernsteinSplinesOverlapping(
+ d,
+ basis_size_per_dim,
+ kernel_object=kernel,
+ B=max_intensity,
+ b=min_intensity,
+ offset=offset,
+ s=np.sqrt(jitter),
+ )
+ elif basis == "faber":
+ self.packing = FaberSchauderEmbedding(
+ d,
+ basis_size_per_dim,
+ kernel_object=kernel,
+ B=max_intensity,
+ b=min_intensity,
+ offset=offset,
+ s=np.sqrt(jitter),
+ )
+ elif basis == "optimal-positive":
+ self.packing = OptimalPositiveBasis(
+ d,
+ basis_size_per_dim,
+ kernel_object=kernel,
+ B=max_intensity,
+ b=min_intensity,
+ offset=offset,
+ s=np.sqrt(jitter),
+ samples=samples_nystrom,
+ data=roi,
+ discretization_size=roi_discretization,
+ memory_limit=memory_limit,
+ )
+ elif basis == "custom":
+ assert embedding is not None
+ self.packing = embedding
+ else:
+ raise NotImplementedError("The request positive basis is not implemented.")
+ self.m = basis_size_per_dim
+ """ Number of basis functions per dimension """
+ self.data = None
+ self.covariance = False
+
+ # stabilizing the matrix inversion
+ self.jitter = jitter
+
+ # for variance stabilization
+ self.stabilization = None
+ self.approx_fit = False
+
+ # properties of rate estimator
+ self.estimator = estimator
+ self.feedback = feedback_type
+ self.uncertainty = uncertainty
+ self.approx = approx
+
+ # precompute information
+ self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
+
+ self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double()
+ self.variances = torch.ones(size=(len(self.basic_sets), 1)).double().view(-1)
+ self.variances_histogram = []
+ self.observations = None
+ self.rate = None
+ r""" $\hat \theta$ in the paper"""
+ self.W = (s) * torch.eye(self.get_m()).double()
+ self.W_inv_approx = (1.0 / s) * torch.eye(self.get_m()).double()
+ self.beta_value = 2.0
+ self.sampled_theta = None
+ if self.dual == True:
+ if self.d == 1:
+ anchor = no_anchor_points
+ self.anchor_points = self.hierarchy.top_node.return_discretization(
+ anchor
+ )
+ self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1)
+ elif self.d == 2:
+ anchor = no_anchor_points
+ self.anchor_points = self.hierarchy.top_node.return_discretization(
+ int(np.sqrt(anchor))
+ )
+ self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1)
+ self.global_dt = 0.0
+ self.anchor_points_emb = self.packing.embed(self.anchor_points)
+
+ if feedback_type == "count-record" and self.dual:
+ print("Precomputing phis.")
+ for index_set, set in enumerate(self.basic_sets):
+ self.varphis[index_set, :] = self.packing.integral(set)
+ self.variances[index_set] = set.volume() * self.B
+
+ print("Precomputation finished.")
+
+ def add_data_point(self, new_data, times=True):
+ r"""
+ Takes data in the format (area: BorelSet, data_points: Tensor, time_delta: float)
+ where data_points is a 2d tensor, with number of columns equal to d
+ and number of rows equal to the number of point observations
+
+ It adds
+
+ - the integral over the sensing area plus the log of the integral over the sensing area if the data is of type histogram
+ - the integral over the sensing are plus the sum of the rate function at the datapoints if the data is of type count-record
+
+ to `self.loglikelihood`
+ """
+
+ super().add_data_point(new_data, times=times)
+
+ if self.rate is not None:
+ rate = self.rate
+ else:
+ l, _, u = self.get_constraints()
+ Gamma_half = self.cov()
+ rate = Gamma_half @ u
+
+ if self.feedback == "histogram":
+ val = self.packing.integral(new_data[0]) @ rate * new_data[2]
+ v = -np.log(val) + val
+
+ elif self.feedback == "count-record":
+ v = self.packing.integral(new_data[0]) @ rate * new_data[2]
+ if new_data[1] is not None:
+ val2 = self.packing.embed(new_data[1]) @ rate * new_data[2]
+ v = v - torch.sum(np.log(val2))
+
+ self.loglikelihood += v
+
+ def beta_theory(self):
+ if self.approx_fit == False:
+ l, Lambda, u = self.get_constraints()
+ Gamma_half, invGamma_half = self.cov(inverse=True)
+
+ ## norm
+ norm = self.s
+
+ ## constraints
+ eps = 10e-3
+ res = Gamma_half @ self.rate.view(-1, 1) - torch.tensor(l).view(-1, 1)
+ xi = res.clone()
+ xi[res > eps] = 0.0
+
+ constraint = xi.T @ Gamma_half @ self.W_inv_approx @ Gamma_half.T @ xi
+
+ ## concentration
+ vol = (
+ 4 * np.log(1.0 / 0.1)
+ + torch.logdet(self.W)
+ - self.get_m() * np.log(self.s)
+ )
+ self.beta_value = np.sqrt(norm + vol + constraint)
+ print("-------------------")
+ print("New beta:", self.beta_value)
+ print("norm:", norm)
+ print("constraint:", constraint)
+ print("vol:", vol)
+ print("-------------------")
+ else:
+ pass
+ return self.beta_value
+
+ def get_constraints(self):
+ old_elements = self.packing.get_constraints()
+ new_elements = []
+ for element in old_elements:
+ if isinstance(element, np.ndarray):
+ element = torch.tensor(element)
+ new_elements.append(element)
+ return tuple(new_elements)
+
+ def cov(self, inverse=False):
+ return self.packing.cov(inverse=inverse)
+
+ def fit(self):
+ self.fit_gp()
+
+ def fit_gp(
+ self,
+ threads=4,
+ optimization_library=None,
+ ):
+ optimization_library = (
+ optimization_library
+ if optimization_library is not None
+ else self.optimization_library
+ )
+
+ if self.data is not None:
+ if self.feedback == "count-record":
+
+ if self.estimator == "likelihood":
+ if optimization_library == "cvxpy":
+ self.penalized_likelihood(threads=threads)
+ elif optimization_library == "torch":
+ self.penalized_likelihood_fast()
+ else:
+ raise NotImplementedError(
+ "The optimization method does not exist"
+ )
+
+ elif self.estimator == "least-sq":
+ self.least_squares_weighted()
+
+ elif self.estimator == "bins":
+ self.penalized_likelihood_bins()
+
+ else:
+ raise AssertionError("wrong name.")
+
+ elif self.feedback == "histogram":
+
+ if self.estimator == "likelihood":
+ self.penalized_likelihood_integral()
+
+ elif self.estimator == "least-sq":
+ self.least_squares_weighted_integral()
+
+ elif self.estimator == "bins":
+ self.penalized_likelihood_integral_bins()
+
+ else:
+ raise AssertionError("wrong name.")
+ else:
+ raise AssertionError("wrong name.")
+ else:
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov()
+ self.rate = l
+
+ def sample_mirror_langevin(self, steps=500, verbose=False):
+
+ l, Lambda, u = self.get_constraints()
+ Gamma_half, invGamma_half = self.cov(inverse=True)
+
+ v = torch.tensor((u + l) / 2.0).view(-1, 1)
+ S = torch.diag(torch.tensor(u - l).view(-1) / 2.0).double()
+
+ phis = self.phis.clone() @ invGamma_half
+
+ if self.observations is not None:
+ obs = self.observations @ invGamma_half
+ else:
+ obs = None
+
+ invGamma = invGamma_half.T @ invGamma_half
+ transform = lambda y: S @ torch.tanh(y) + v
+
+ if self.feedback == "count-record" and self.dual == False:
+ if obs is not None:
+ func = (
+ lambda y: -torch.sum(torch.log(obs @ transform(y)).view(-1))
+ + torch.sum(phis @ transform(y))
+ + self.s * transform(y).T @ invGamma @ transform(y)
+ + torch.sum(torch.log(1.0 / (1.0 - transform(y) ** 2)))
+ )
+ else:
+ func = (
+ lambda y: torch.sum(phis @ transform(y))
+ + self.s * transform(y).T @ invGamma @ transform(y)
+ + torch.sum(torch.log(1.0 / (1.0 - transform(y) ** 2)))
+ ) # torch.sum(torch.log(0.5*(1.+torch.cosh(2*y))))
+
+ elif self.feedback == "count-record" and self.dual == True:
+ mask = self.bucketized_counts > 0
+ phis = self.varphis[mask, :] @ invGamma_half
+ tau = self.total_bucketized_time[mask]
+
+ if obs is not None:
+ obs = self.anchor_points_emb @ invGamma_half
+ weights = self.anchor_weights
+ mask = weights > 0.0
+
+ func = (
+ lambda y: -torch.sum(
+ weights[mask].view(-1, 1)
+ * torch.log(obs[mask, :] @ transform(y))
+ )
+ + torch.sum(tau.view(-1, 1) * (phis @ transform(y)))
+ + self.s * transform(y).T @ invGamma @ transform(y)
+ + torch.sum(torch.log(1.0 / (1.0 - (transform(y) ** 2))))
+ ) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y))))
+ else:
+ func = (
+ lambda y: torch.sum(tau.view(-1, 1) * (phis @ transform(y)))
+ + self.s * transform(y).T @ invGamma @ transform(y)
+ + torch.sum(torch.log(1.0 / (1.0 - transform(y) ** 2)))
+ ) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y))))
+
+ elif self.feedback == "histogram":
+ func = (
+ lambda y: -torch.sum(
+ self.counts.clone().view(-1)
+ * torch.log(phis @ (S @ torch.tanh(y) + v)).view(-1)
+ )
+ + torch.sum(phis @ (S @ torch.tanh(y) + v))
+ + self.s
+ * (S @ torch.tanh(y) + v).T
+ @ invGamma
+ @ (S @ torch.tanh(y) + v)
+ )
+
+ y = torch.rand(size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True)
+
+ # initiallize with map sqeezed more
+ y.data = Gamma_half @ self.rate.view(-1, 1) # u < theta < l
+
+ u_new = u + 0.01
+ l_new = l - 0.01
+ v2 = torch.tensor((u_new + l_new) / 2.0).view(-1, 1)
+ S2 = torch.diag(torch.tensor(u_new - l_new).view(-1) / 2.0).double()
+ #
+ y.data = torch.inverse(S2) @ (y.data - v2)
+ y.data = torch.atanh(y.data)
+
+ W = (
+ S.T
+ @ invGamma_half.T
+ @ self.construct_covariance_matrix_laplace()
+ @ invGamma_half
+ @ S
+ )
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-8
+ )
+ )
+ eta = 0.05 / (L + 1)
+
+ print("Eta:", eta)
+
+ for k in range(steps):
+
+ w = torch.randn(size=(self.get_m(), 1)).double()
+ nabla_y = torch.autograd.functional.jacobian(func, y).data[0, 0, :, :]
+ y.data = y.data - eta * nabla_y + np.sqrt(2 * eta) * w
+ theta = torch.tanh(y).detach()
+
+ if verbose == True:
+ print("Iter:", k, (S @ theta + v).T)
+ print(y.T)
+
+ self.sampled_theta = invGamma_half @ transform(y.data)
+
+ def sample_projected_langevin(self, steps=300, verbose=False, stepsize=None):
+ """
+ :param burn_in:
+ :return:
+ """
+
+ Gamma_half = self.packing.cov()
+
+ def prox(x):
+ z = x.cpu().numpy()
+ theta = cp.Variable((self.get_m(), 1))
+ objective = cp.Minimize(cp.sum_squares(z - theta))
+ constraints = []
+ l, Lambda, u = self.get_constraints()
+ Lambda = Lambda @ Gamma_half.cpu().numpy()
+ constraints.append(Lambda @ theta >= l.reshape(-1, 1))
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.OSQP,
+ warm_start=False,
+ verbose=False,
+ eps_abs=1e-3,
+ eps_rel=1e-3,
+ )
+ return torch.tensor(theta.value)
+
+ if self.feedback == "count-record" and self.dual == False:
+ if self.observations is not None:
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j",
+ 1.0 / (self.observations @ y).view(-1),
+ self.observations,
+ ).view(-1, 1)
+ + torch.sum(self.phis, dim=0).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ nabla = lambda theta: torch.sum(self.phis, dim=0).view(
+ -1, 1
+ ) + self.s * theta.view(-1, 1)
+
+ elif self.feedback == "count-record" and self.dual == True:
+ mask = self.bucketized_counts > 0
+ phis = self.varphis[mask, :]
+ tau = self.total_bucketized_time[mask]
+
+ if self.observations is not None:
+ obs = self.anchor_points_emb
+ weights = self.anchor_weights
+ mask = weights > 0.0
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j",
+ weights[mask] / ((obs[mask, :] @ y).view(-1)),
+ obs[mask],
+ ).view(-1, 1)
+ + torch.einsum("i,ij->j", tau, phis).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view(
+ -1, 1
+ ) + self.s * y.view(-1, 1)
+
+ elif self.feedback == "histogram":
+ nabla = (
+ lambda theta: -torch.sum(
+ torch.diag((1.0 / (self.phis @ theta).view(-1)) * self.counts)
+ @ self.phis,
+ dim=0,
+ ).view(-1, 1)
+ + torch.sum(self.phis, dim=0).view(-1, 1)
+ + self.s * theta.view(-1, 1)
+ )
+
+ theta = self.rate.view(-1, 1)
+ W = self.construct_covariance_matrix_laplace(minimal=True)
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5
+ )
+ )
+
+ if stepsize is None:
+ eta = 0.5 / (L + 1)
+ else:
+ eta = np.minimum(1, stepsize * 0.5 / L)
+
+ print(eta)
+ for k in range(steps):
+ w = torch.randn(size=(self.get_m(), 1)).double()
+ theta = prox(theta - eta * nabla(theta) + np.sqrt(2 * eta) * w)
+
+ if verbose == True:
+ print("Iter:", k, theta.T)
+
+ self.sampled_theta = theta
+
+ def sample_proximal_langevin_prox(self, steps=300, verbose=False, stepsize=None):
+ """
+ :param burn_in:
+ :return:
+ """
+
+ Gamma_half, invGamma_half = self.packing.cov(inverse=True)
+ # invGamma = invGamma_half.T @ invGamma_half
+ l, Lambda, u = self.get_constraints()
+ Lambda = Lambda @ Gamma_half.cpu().numpy()
+
+ def prox(x):
+ res = solve_qp(
+ np.eye(self.get_m()),
+ x.cpu().numpy().reshape(-1),
+ C=Gamma_half.cpu().numpy(),
+ b=np.array(l),
+ factorized=True,
+ )
+ return torch.tensor(res[0]).view(-1, 1)
+
+ # theta_n = cp.Variable((self.get_m(), 1))
+ # x = cp.Parameter((self.get_m(), 1))
+ # objective = cp.Minimize(cp.sum_squares(x - theta_n))
+ #
+ # constraints = []
+ # l, Lambda, u = self.get_constraints()
+ # Lambda = Lambda @ Gamma_half.cpu().numpy()
+ # constraints.append(Lambda @ theta_n >= l.reshape(-1, 1))
+ # constraints.append(Lambda @ theta_n <= u.reshape(-1, 1))
+ #
+ # prob = cp.Problem(objective, constraints)
+
+ # def prox(x):
+ # return Gamma_half @ torch.tensor(scipy.optimize.nnls(invGamma.cpu().numpy(), (invGamma_half@x).numpy().reshape(-1), maxiter = 1000)[0]).view(-1,1)
+
+ samples = []
+
+ if self.data is not None:
+ if self.feedback == "count-record" and self.dual == False:
+ if self.observations is not None:
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j",
+ 1.0 / (self.observations @ y).view(-1),
+ self.observations,
+ ).view(-1, 1)
+ + torch.sum(self.phis, dim=0).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ nabla = lambda theta: torch.sum(self.phis, dim=0).view(
+ -1, 1
+ ) + self.s * theta.view(-1, 1)
+
+ elif self.feedback == "count-record" and self.dual == True:
+ mask = self.bucketized_counts > 0
+ phis = self.varphis[mask, :]
+ tau = self.total_bucketized_time[mask]
+
+ if self.observations is not None:
+ obs = self.anchor_points_emb
+ weights = self.anchor_weights
+ mask = weights > 0.0
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j",
+ weights[mask] / ((obs[mask, :] @ y).view(-1)),
+ obs[mask],
+ ).view(-1, 1)
+ + torch.einsum("i,ij->j", tau, phis).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view(
+ -1, 1
+ ) + self.s * y.view(-1, 1)
+
+ elif self.feedback == "histogram":
+ nabla = (
+ lambda theta: -torch.sum(
+ torch.diag((1.0 / (self.phis @ theta).view(-1)) * self.counts)
+ @ self.phis,
+ dim=0,
+ ).view(-1, 1)
+ + torch.sum(self.phis, dim=0).view(-1, 1)
+ + self.s * theta.view(-1, 1)
+ )
+ else:
+ nabla = lambda theta: self.s * theta.view(-1, 1)
+
+ if self.rate is not None:
+ theta = self.rate.view(-1, 1)
+ else:
+ theta = (
+ self.b
+ + 0.05
+ * torch.rand(
+ size=(self.get_m(), 1), dtype=self.dtype, requires_grad=False
+ ).view(-1, 1)
+ ** 2
+ )
+
+ for k in range(steps):
+ w = torch.randn(size=(self.get_m(), 1)).double()
+
+ # calculate proper step-size
+ W = self.construct_covariance_matrix_laplace(theta=theta)
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.cpu().numpy(),
+ k=1,
+ which="LM",
+ return_eigenvectors=False,
+ tol=1e-3,
+ )
+ )
+ if stepsize is not None:
+ eta = 0.5 * stepsize / L
+ else:
+ eta = 0.5 / L
+
+ # prox calculate
+ # x.value = theta.cpu().numpy()
+ # prob.solve(solver=cp.OSQP, warm_start=True, verbose=False, eps_abs=1e-3, eps_rel=1e-3)
+ # proximal_theta = torch.tensor(theta_n.value)
+
+ # update step
+ # theta = 0.5 * theta - eta * nabla(theta) + 0.5 * proximal_theta + np.sqrt(2 * eta) * w
+
+ # update step
+ theta = (
+ 0.5 * theta
+ - eta * nabla(theta)
+ + 0.5 * prox(theta)
+ + np.sqrt(2 * eta) * w
+ )
+ if verbose == True:
+ print("Iter:", k, theta.T)
+
+ samples.append(prox(theta))
+
+ self.sampled_theta = prox(theta)
+
+ return samples
+
+ def sample_proximal_langevin_simple_prox(self, steps=300, verbose=False):
+
+ Gamma_half, invGamma_half = self.packing.cov(inverse=True)
+ l, Lambda, u = self.get_constraints()
+ prox_simple = lambda x: torch.minimum(
+ torch.maximum(x.view(-1), torch.tensor(l).view(-1)),
+ torch.tensor(u).view(-1),
+ ).view(-1, 1)
+
+ def prox(x):
+ return invGamma_half @ prox_simple(Gamma_half @ x)
+
+ phis = self.phis
+ if self.feedback == "count-record" and self.dual == False:
+ if self.observations is not None:
+ obs = self.observations
+
+ func = (
+ lambda y: -torch.sum(torch.log(obs @ y))
+ + torch.sum((phis @ y))
+ + self.s * y.T @ y
+ )
+
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j", 1.0 / (obs @ y).view(-1), obs
+ ).view(-1, 1)
+ + torch.sum(phis, dim=0).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ func = lambda y: torch.sum(phis @ y).view(-1, 1) + self.s * y.T @ y
+
+ nabla = lambda y: torch.sum(phis, dim=0).view(-1, 1) + self.s * y.view(
+ -1, 1
+ )
+
+ elif self.feedback == "count-record" and self.dual == True:
+ mask = self.bucketized_counts > 0
+ phis = self.varphis[mask, :]
+ tau = self.total_bucketized_time[mask]
+
+ if self.observations is not None:
+ obs = self.anchor_points_emb
+ weights = self.anchor_weights
+ mask = weights > 0.0
+ func = (
+ lambda y: -torch.sum(
+ weights[mask].view(-1, 1) * torch.log(obs[mask, :] @ y)
+ )
+ + torch.sum(tau.view(-1, 1) * (phis @ y))
+ + self.s * y.T @ y
+ )
+
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j",
+ weights[mask] / ((obs[mask, :] @ y).view(-1)),
+ obs[mask],
+ ).view(-1, 1)
+ + torch.einsum("i,ij->j", tau, phis).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ func = (
+ lambda y: torch.sum(tau.view(-1, 1) * (phis @ y)) + self.s * y.T @ y
+ )
+
+ nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view(
+ -1, 1
+ ) + self.s * y.view(-1, 1)
+
+ elif self.feedback == "histogram":
+ func = (
+ lambda y: -torch.sum(
+ self.counts.view(-1) * torch.log(phis @ y).view(-1)
+ )
+ + torch.sum(phis @ y)
+ + self.s * y.T @ y
+ )
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j", self.counts.view(-1) / (phis @ y).view(-1), phis
+ ).view(-1, 1)
+ + torch.sum(phis, dim=0).view(-1, 1)
+ + self.s * y
+ )
+
+ # hessian = lambda y: self.construct_covariance_matrix_laplace()
+
+ y = prox(
+ torch.randn(size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True)
+ )
+ y.data = self.rate.view(-1, 1)
+
+ W = self.construct_covariance_matrix_laplace()
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5
+ )
+ )
+
+ eta = 0.5 / (L + 1)
+
+ for k in range(steps):
+ W = torch.randn(size=(self.get_m(), 1)).double()
+ nabla_y = nabla(y.data)
+ y.data = (
+ (1 - eta) * y.data
+ - eta * nabla_y
+ + eta * prox(y.data)
+ + np.sqrt(2 * eta) * W
+ )
+ if verbose == True:
+ print("Iter:", k, y.T)
+ print("grad:", y.grad.T)
+
+ self.sampled_theta = prox(y.detach())
+
+ def sample_hessian_positive_langevin(self, steps=500, verbose=False, stepsize=None):
+
+ if self.data is not None:
+ if self.feedback == "count-record" and self.dual == False:
+ if self.observations is not None:
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j",
+ 1.0 / (self.observations @ y).view(-1),
+ self.observations,
+ ).view(-1, 1)
+ + torch.sum(self.phis, dim=0).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ nabla = lambda theta: torch.sum(self.phis, dim=0).view(
+ -1, 1
+ ) + self.s * theta.view(-1, 1)
+
+ elif self.feedback == "count-record" and self.dual == True:
+
+ mask = self.bucketized_counts > 0
+ phis = self.varphis[mask, :]
+ tau = self.total_bucketized_time[mask]
+
+ if self.observations is not None:
+ obs = self.anchor_points_emb
+ weights = self.anchor_weights
+ mask = weights > 0.0
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j",
+ weights[mask] / ((obs[mask, :] @ y).view(-1)),
+ obs[mask],
+ ).view(-1, 1)
+ + torch.einsum("i,ij->j", tau, phis).view(-1, 1)
+ + self.s * y.view(-1, 1)
+ )
+ else:
+ nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view(
+ -1, 1
+ ) + self.s * y.view(-1, 1)
+
+ elif self.feedback == "histogram":
+ nabla = (
+ lambda theta: -torch.sum(
+ torch.diag((1.0 / (self.phis @ theta).view(-1)) * self.counts)
+ @ self.phis,
+ dim=0,
+ ).view(-1, 1)
+ + torch.sum(self.phis, dim=0).view(-1, 1)
+ + self.s * theta.view(-1, 1)
+ )
+ else:
+ nabla = lambda theta: self.s * theta.view(-1, 1)
+
+ Gamma_half = self.packing.cov()
+ lz, Lambda, u = self.get_constraints()
+
+ Lambda = torch.tensor(Lambda) @ Gamma_half
+ y = (
+ self.b
+ + 0.05
+ * torch.rand(
+ size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True
+ ).view(-1)
+ ** 2
+ )
+
+ if self.rate is not None:
+ y.data = self.rate.data + Gamma_half @ y.data
+ else:
+ y.data = Gamma_half @ y.data
+
+ if verbose == True:
+ print("initial point")
+ print(y.data)
+
+ W = self.construct_covariance_matrix_laplace()
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5
+ )
+ )
+
+ if stepsize is None:
+ eta = 1.0 / (L + 1)
+ else:
+ eta = stepsize / (L + 1)
+
+ D = lambda x: torch.diag(1.0 / torch.abs(Lambda @ x).view(-1))
+ sqrt_hessian = lambda x: Lambda @ D(x)
+
+ phi = lambda x: -torch.sum(torch.log(Lambda @ x))
+ nabla_phi = lambda x: -torch.einsum(
+ "i,ij->j", 1.0 / (Lambda @ x).view(-1), Lambda
+ )
+ hessian_phi = (
+ lambda x: Lambda.T @ torch.diag(1.0 / (Lambda @ x).view(-1) ** 2) @ Lambda
+ )
+
+ for k in range(steps):
+ w = torch.randn(size=(self.get_m(), 1)).double()
+ nabla_val = nabla(y)
+ H = sqrt_hessian(y.data)
+ z = (
+ nabla_phi(y.data).view(-1, 1)
+ - eta * nabla_val
+ + np.sqrt(2 * eta) * H @ w
+ )
+
+ # y.data = newton_solve(lambda s: nabla_phi(s).reshape(-1)-z.data.reshape(-1),y.reshape(-1),
+ # verbose = verbose, grad = hessian_phi).view(-1,1)
+
+ # # minimization appraoch
+ def objective(s):
+ return torch.sum((nabla_phi(s).reshape(-1) - z.reshape(-1)) ** 2)
+
+ # #
+
+ # x0 = y.reshape(-1).clone().detach().numpy()
+ # res = minimize(objective, x0, backend='torch', method='Newton-CG', precision='float64', tol=1e-5, hvp_type='vhp')
+ # y.data = torch.tensor(res.x)
+
+ x0 = y.reshape(-1).clone()
+ res = minimize_torch(objective, x0, method="newton-cg", tol=1e-5)
+ y.data = res.x
+
+ if verbose:
+ print("Iter:", k)
+ print(y.T)
+
+ self.sampled_theta = y.data
+
+ def sample_mla_prime(self, steps=100, verbose=False, stepsize=None):
+ Gamma_half, invGamma_half = self.packing.cov(inverse=True)
+ invGamma = invGamma_half.T @ invGamma_half
+ l, Lambda, u = self.get_constraints()
+ Lambda = torch.tensor(Lambda) @ Gamma_half
+
+ if self.data is not None:
+ if self.feedback == "count-record" and self.dual == False:
+ if self.observations is not None:
+ observations = self.observations @ invGamma_half
+ phis = self.phis @ invGamma_half
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j", 1.0 / (observations @ y).view(-1), observations
+ ).view(-1, 1)
+ + torch.sum(phis, dim=0).view(-1, 1)
+ + self.s * invGamma @ y.view(-1, 1)
+ )
+ else:
+ nabla = lambda theta: torch.sum(phis, dim=0).view(
+ -1, 1
+ ) + self.s * invGamma @ theta.view(-1, 1)
+
+ else:
+ nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1)
+
+ y = (
+ self.b
+ + 0.05
+ * torch.rand(
+ size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True
+ ).reshape(-1, 1)
+ ** 2
+ )
+ # if self.rate is not None:
+ # y.data = Gamma_half @ self.rate.data.view(-1,1) + y.data
+ # else:
+ y.data = y.data
+
+ if verbose == True:
+ print("initial point")
+ print(y.data)
+
+ W = invGamma_half.T @ self.construct_covariance_matrix_laplace() @ invGamma_half
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5
+ )
+ )
+
+ if stepsize is None:
+ eta = 1.0 / (L + 1)
+ else:
+ eta = stepsize / (L + 1)
+
+ from stpy.approx_inference.sampling_helper import get_increment
+
+ for k in range(steps):
+
+ nabla_val = nabla(y)
+
+ # cvxpy minimization
+ # x = cp.Variable((self.get_m(), 1))
+ # objective = cp.Minimize( eta * nabla_val.detach().numpy().T @ x - cp.sum(cp.log(x)) -(-1./y.data).T@x)
+ # constraints = [x >= 0.]
+ #
+ # prob = cp.Problem(objective, constraints)
+ # prob.solve(solver = cp.MOSEK)
+
+ w0 = eta * nabla_val.data + 1.0 / y.data
+ # initial point for the solve
+ # w0 = -1./( torch.tensor(x.value))
+
+ # simulate
+ f = lambda w, n: n / torch.abs(w)
+ w = get_increment(eta, 1000, f, w0, path=False)
+
+ # back mirror map
+ y.data = -1.0 / w
+
+ if verbose:
+ print("Iter:", k)
+ print(y.T)
+
+ self.sampled_theta = invGamma_half @ y.data
+
+ def sample_hessian_positive_langevin_2(
+ self, steps=500, verbose=False, stepsize=None, preconditioner=True
+ ):
+
+ Gamma_half, invGamma_half = self.packing.cov(inverse=True)
+ invGamma = invGamma_half @ invGamma_half
+ if self.data is not None:
+
+ if self.feedback == "count-record" and self.dual == False:
+
+ observations = self.observations @ invGamma_half
+ phis = self.phis @ invGamma_half
+
+ if self.observations is not None:
+ nabla = (
+ lambda y: -torch.einsum(
+ "i,ij->j", 1.0 / (observations @ y).view(-1), observations
+ ).view(-1, 1)
+ + torch.sum(phis, dim=0).view(-1, 1)
+ + self.s * invGamma @ y.view(-1, 1)
+ )
+ else:
+ nabla = lambda theta: torch.sum(phis, dim=0).view(
+ -1, 1
+ ) + self.s * invGamma @ theta.view(-1, 1)
+
+ else:
+ nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1)
+
+ y = (
+ torch.rand(
+ size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True
+ ).view(-1)
+ ** 2
+ )
+ # if self.rate is not None:
+ # y.data = Gamma_half @ self.rate.data + y.data
+
+ if verbose == True:
+ print("initial point")
+ print(y.data)
+
+ W = self.construct_covariance_matrix_laplace(minimal=True)
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5
+ )
+ )
+
+ if stepsize is None:
+ eta = 1.0 / (L + 1)
+ else:
+ eta = stepsize / (L + 1)
+
+ for k in range(steps):
+ w = torch.randn(size=(self.get_m(), 1)).double() / torch.abs(y.data).view(
+ -1, 1
+ )
+ nabla_val = nabla(y)
+ z = (
+ -1.0 / y.data.view(-1, 1)
+ + self.b
+ - eta * Gamma_half @ nabla_val
+ + np.sqrt(2 * eta) * Gamma_half @ w
+ )
+ y.data = -1.0 / z + self.b
+
+ if verbose:
+ print("Iter:", k)
+ print(y.T)
+
+ self.sampled_theta = invGamma_half @ y.data
+
+ def sample_newton_langevin(self, steps=1000, stepsize=None, verbose=False):
+ Gamma_half, invGamma_half = self.packing.cov(inverse=True)
+ invGamma = invGamma_half @ invGamma_half
+ if self.data is not None:
+
+ if self.feedback == "count-record" and self.dual == False:
+
+ observations = self.observations @ invGamma_half
+ phis = self.phis @ invGamma_half
+
+ if self.observations is not None:
+ nabla = (
+ lambda y, bar: -torch.einsum(
+ "i,ij->j", 1.0 / (observations @ y).view(-1), observations
+ ).view(-1, 1)
+ + torch.sum(phis, dim=0).view(-1, 1)
+ + self.s * invGamma @ y.view(-1, 1)
+ - bar * 1.0 / y
+ )
+ else:
+ nabla = (
+ lambda theta, bar: torch.sum(phis, dim=0).view(-1, 1)
+ + self.s * invGamma @ theta.view(-1, 1)
+ - bar * 1.0 / theta
+ )
+
+ else:
+ nabla = (
+ lambda theta, bar: self.s * invGamma @ theta.view(-1, 1)
+ - bar * 1.0 / theta
+ )
+
+ y = (
+ 0.05
+ * torch.rand(
+ size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True
+ ).view(-1, 1)
+ ** 2
+ )
+
+ barrier = 10.0
+ # hessian = lambda theta,bar: torch.einsum('ik,k,kj->ij',observations.T,(observations@theta).view(-1),observations) + invGamma + bar/theta**2
+ hessian = (
+ lambda theta, bar: observations.T
+ @ torch.diag(1 / (observations @ theta).view(-1) ** 2)
+ @ observations
+ + invGamma
+ + torch.diag(bar / theta.view(-1) ** 2)
+ )
+ hessian_sqrt = lambda theta, bar: torch.cholesky(hessian(theta, bar))
+ eta = 1.0
+
+ for k in range(steps):
+ w = torch.randn(size=(self.get_m(), 1)).double()
+ nabla_val = nabla(y, barrier)
+ y.data = (
+ y.data
+ - torch.linalg.solve(hessian(y.data, barrier), nabla_val)
+ + np.sqrt(2 * eta)
+ * torch.linalg.solve(hessian_sqrt(y.data, barrier), w)
+ )
+
+ if verbose:
+ print("Iter:", k)
+ print(y.T)
+
+ self.sampled_theta = invGamma_half @ y.data
+
+ # self.sampled_theta = y.data
+
+ def sample_hmc(self, steps=1000, stepsize=None, verbose=False):
+ import hamiltorch
+
+ phis = self.phis
+ if self.feedback == "count-record" and self.dual == False:
+ if self.observations is not None:
+ obs = self.observations
+ func = (
+ lambda y: torch.sum(torch.log(obs @ y))
+ - torch.sum((phis @ y))
+ - self.s * y.T @ y
+ )
+ else:
+ func = lambda y: -torch.sum(phis @ y).view(-1, 1) - self.s * y.T @ y
+
+ num_samples = 1
+ num_steps_per_sample = steps
+ if stepsize is None:
+ step_size = 1e-8
+ else:
+ step_size = stepsize
+
+ params_init = self.rate
+ self.sample_theta = hamiltorch.sample(
+ log_prob_func=func,
+ params_init=params_init,
+ num_samples=num_samples,
+ step_size=step_size,
+ num_steps_per_sample=num_steps_per_sample,
+ )
+ print(self.sampled_theta)
+
+ def sample_variational(self, xtest, accuracy=1e-4, verbose=False, samples=1):
+ from stpy.approx_inference.variational_mf import VMF_SGCP
+
+ cov_params = [self.kernel.kappa, self.kernel.gamma]
+ S_borders = np.array([[-1.0, 1.0]])
+ num_inducing_points = self.m
+ num_integration_points = 256
+ X = self.x
+
+ var_mf_sgcp = VMF_SGCP(
+ S_borders,
+ X,
+ cov_params,
+ num_inducing_points,
+ num_integration_points=num_integration_points,
+ update_hyperparams=False,
+ output=0,
+ conv_crit=accuracy,
+ )
+ var_mf_sgcp.run()
+ sample_paths = var_mf_sgcp.sample_posterior(xtest, num_samples=1.0)
+ return sample_paths
+
+ def sample(self, verbose=False, steps=None, domain=None):
+ """
+ :return:
+ """
+ if steps is None:
+ steps = self.steps
+
+ if self.stepsize is not None:
+ stepsize = self.stepsize
+ else:
+ stepsize = None
+
+ l, Lambda, u = self.get_constraints()
+ if self.rate is None:
+ self.fit_gp()
+
+ if self.sampling == "mirror":
+ r = self.sample_mirror_langevin(steps=steps, verbose=verbose)
+ elif self.sampling == "proximal+prox":
+ r = self.sample_proximal_langevin_prox(steps=steps, verbose=verbose)
+ elif self.sampling == "proximal+simple_prox":
+ r = self.sample_proximal_langevin_simple_prox(steps=steps, verbose=verbose)
+ elif self.sampling == "hessian":
+ r = self.sample_hessian_positive_langevin(
+ steps=steps, verbose=verbose, stepsize=stepsize
+ )
+ elif self.sampling == "hessian2":
+ r = self.sample_hessian_positive_langevin_2(
+ steps=steps, verbose=verbose, stepsize=stepsize
+ )
+ elif self.sampling == "mla_prime":
+ r = self.sample_mla_prime(steps=steps, verbose=verbose, stepsize=stepsize)
+ elif self.sampling == "hmc":
+ r = self.sample_hmc(steps=steps, verbose=verbose, stepsize=stepsize)
+ elif self.sampling == "polyia_variational":
+ r = self.sample_variational(accuracy=1.0 / steps, verbose=verbose)
+ else:
+ raise NotImplementedError("Sampling of such is not supported.")
+
+ return r
+
+ def sampled_lcb_ucb(self, xtest, samples=100, delta=0.1):
+ paths = []
+ for i in range(samples):
+ self.sample()
+ path = self.sample_path_points(xtest).view(1, -1)
+ paths.append(path)
+
+ paths = torch.cat(paths, dim=0)
+ lcb = torch.quantile(paths, delta, dim=0)
+ ucb = torch.quantile(paths, 1 - delta, dim=0)
+ return lcb, ucb
+
+ def penalized_likelihood_fast(self):
+ l, Lambda, u = self.get_constraints()
+ # assert torch.allclose(Lambda, torch.eye(self.m**self.d))
+
+ Gamma_half, invGamma_half = self.cov(inverse=True)
+ invGamma_half = invGamma_half.to(self.device)
+
+ s = self.s * 0.5
+
+ if self.dual == False:
+ p = self.phis.to(self.device) @ invGamma_half
+ # using all points without anchor points
+ if self.observations is not None:
+ o = self.observations.to(self.device) @ invGamma_half
+
+ def objective(theta):
+ return (
+ -torch.sum(torch.log(o @ theta))
+ + torch.sum(p @ theta)
+ + s * torch.sum((invGamma_half @ theta) ** 2)
+ )
+
+ else:
+
+ def objective(theta):
+ return torch.sum(p @ theta) + s * torch.sum(
+ (invGamma_half @ theta) ** 2
+ )
+
+ else:
+ # using anchor points
+ mask = self.bucketized_counts > 0
+ phis = self.varphis[mask, :]
+ tau = self.total_bucketized_time[mask].to(self.device)
+ p = phis @ invGamma_half
+
+ if self.observations is not None:
+ observations = self.anchor_points_emb.to(self.device)
+ weights = self.anchor_weights.to(self.device)
+ mask = weights > 0.0
+
+ o = observations[mask, :] @ invGamma_half
+
+ def objective(theta):
+ return (
+ -torch.einsum(
+ "i,i",
+ weights[mask],
+ torch.log(o @ theta),
+ )
+ + torch.einsum("i,i", tau, p @ theta)
+ + s * torch.sum((invGamma_half @ theta) ** 2)
+ )
+
+ else:
+
+ def objective(theta):
+ return torch.einsum("i,i", tau, p @ theta) + s * torch.sum(
+ (invGamma_half @ theta) ** 2
+ )
+
+ if isinstance(self.rate, torch.Tensor):
+ theta0 = torch.cat(
+ [
+ self.rate.to(self.device),
+ torch.zeros([self.get_m() - len(self.rate)], device=self.device),
+ ]
+ )
+ else:
+ theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double()
+
+ eps = 1e-4
+ res = minimize(
+ objective,
+ theta0.cpu().numpy(),
+ backend="torch",
+ method="L-BFGS-B",
+ bounds=(l[0] + eps, u[0]),
+ precision="float64",
+ tol=1e-8,
+ torch_device=str(self.device),
+ options={
+ "ftol": 1e-08,
+ "gtol": 1e-08,
+ "eps": 1e-08,
+ "maxfun": 15000,
+ "maxiter": 15000,
+ "maxls": 20,
+ },
+ )
+
+ self.rate = invGamma_half @ torch.tensor(res.x, device=self.device)
+ print(res.message)
+ return self.rate
+
+ def penalized_likelihood(self, threads=None):
+ if threads is None:
+ cpu_count = os.cpu_count()
+ threads = max(cpu_count - 2, 1) if cpu_count is not None else 1
+
+ theta = cp.Variable(self.get_m())
+ l, Lambda, u = self.get_constraints()
+
+ Gamma_half = self.cov(inverse=False)
+
+ if self.dual == False:
+
+ # using all points without anchor points
+ phis = self.phis.cpu().numpy()
+ if self.observations is not None:
+ observations = self.observations.cpu().numpy()
+ objective = cp.Minimize(
+ -cp.sum(cp.log(observations @ theta))
+ + cp.sum(phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta)
+ )
+ else:
+ objective = cp.Minimize(
+ cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta)
+ )
+
+ else:
+
+ # using anchor points
+ mask = self.bucketized_counts.clone().numpy() > 0
+ phis = self.varphis[mask, :].clone().numpy()
+ tau = self.total_bucketized_time[mask].clone().numpy()
+
+ if self.observations is not None:
+ observations = self.anchor_points_emb.cpu().numpy()
+ weights = self.anchor_weights.cpu().numpy()
+ mask = weights > 0.0
+ objective = cp.Minimize(
+ -cp.sum(
+ cp.multiply(
+ weights[mask], cp.log(observations[mask, :] @ theta)
+ )
+ )
+ + cp.sum(cp.multiply(tau, phis @ theta))
+ + self.s * 0.5 * cp.sum_squares(theta)
+ )
+ else:
+ objective = cp.Minimize(
+ cp.sum(cp.multiply(tau, phis @ theta))
+ + self.s * 0.5 * cp.sum_squares(theta)
+ )
+
+ constraints = []
+
+ Lambda = (Lambda @ Gamma_half).cpu().numpy()
+
+ constraints.append(Lambda @ theta >= l.cpu().numpy())
+ constraints.append(Lambda @ theta <= u.cpu().numpy())
+
+ prob = cp.Problem(objective, constraints)
+
+ if self.rate is not None:
+ theta.value = (
+ torch.cat([self.rate, torch.zeros([self.get_m() - len(self.rate)])])
+ .cpu()
+ .numpy()
+ )
+
+ try:
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-4,
+ },
+ )
+
+ self.rate = torch.tensor(theta.value)
+ return self.rate
+ except:
+ print("Optimization failed. Using the old value.")
+ print(prob.status)
+ return self.rate
+
+ def penalized_likelihood_integral(self, threads=4):
+
+ phis = self.phis.cpu().numpy()
+ counts = self.counts.cpu().numpy()
+
+ theta = cp.Variable(self.get_m())
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov().numpy()
+ objective = cp.Minimize(
+ -cp.sum(counts @ cp.log(phis @ theta))
+ + cp.sum(phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta)
+ )
+
+ constraints = []
+ Lambda = Lambda @ Gamma_half
+ constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+
+ # if self.rate is not None:
+ # theta.value = self.rate.cpu().numpy()
+ try:
+ prob = cp.Problem(objective, constraints)
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-4,
+ },
+ )
+ self.rate = torch.tensor(theta.value)
+ except:
+ print("Optimization failed. Using the old value.")
+ print(prob.status)
+
+ return self.rate
+
+ def bucketization(self):
+
+ phis = []
+ observations = []
+
+ # project sets to smallest forms, and then sum on those only
+ basic_sets = self.basic_sets
+
+ data_basic = [[] for _ in range(len(basic_sets))]
+ sensing_times = [[] for _ in range(len(basic_sets))]
+ counts = torch.zeros(len(basic_sets)).int()
+ total_data = 0.0
+ self.total_bucketized_obs = (
+ torch.zeros(size=(len(basic_sets), 1)).double().view(-1)
+ )
+ self.total_bucketized_time = (
+ torch.zeros(size=(len(basic_sets), 1)).double().view(-1)
+ )
+
+ for sample in self.data:
+ S, obs, dt = sample
+ if obs is not None:
+ total_data = total_data + obs.size()[0] # total counts
+ for index, elementary in enumerate(
+ basic_sets
+ ): # iterate over basic sets
+ mask = elementary.is_inside(
+ obs
+ ) # mask which belong to the elementary
+ if S.inside(elementary) == True:
+ data_basic[index].append(obs[mask])
+ counts[index] += 1
+ sensing_times[index].append(dt)
+ else:
+ for index, elementary in enumerate(basic_sets):
+ if S.inside(elementary) == True:
+ data_basic[index].append(torch.tensor([]))
+ counts[index] += 1
+ sensing_times[index].append(dt)
+
+ for index, elementary in enumerate(basic_sets):
+ arr = np.array(
+ [int(elem.size()[0]) for elem in data_basic[index]]
+ ) # counts over sensing rounds
+ phi = self.packing.integral(elementary) # * counts[index]
+
+ self.total_bucketized_obs[index] = float(np.sum(arr))
+ self.total_bucketized_time[index] = float(np.sum(sensing_times[index]))
+
+ observations.append(arr)
+ phis.append(phi.view(1, -1)) # construct varphi_B
+
+ self.bucketized_obs = (
+ observations.copy()
+ ) # these are number of counts associated with sensings
+ self.bucketized_time = (
+ sensing_times.copy()
+ ) # these are times each basic set has been sensed
+ self.bucketized_counts = (
+ counts # these are count each basic set has been sensed
+ )
+
+ def variance_correction(self, variance):
+
+ if self.var_cor_on == 1:
+
+ g = (
+ lambda B, k, mu: -0.5 * (B**2) / ((mu**2) * k)
+ - B / (mu * k)
+ + (np.exp(B / (k * mu)) - 1)
+ )
+ gn = lambda k: g(self.U, k, variance)
+
+ from scipy import optimize
+
+ k = optimize.bisect(gn, 1, 10000000)
+
+ return k
+ else:
+ return 1.0
+
+ def least_squares_weighted(self, threads=4):
+
+ # if self.approx_fit == False:
+ # self.bucketization()
+
+ theta = cp.Variable(self.get_m())
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov().numpy()
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ phis = self.varphis[mask, :].clone().numpy()
+ tau = self.total_bucketized_time.clone().numpy()
+
+ variances = self.variances.view(-1).clone().numpy()
+
+ for i in range(variances.shape[0]):
+ if mask[i] > 0:
+ variances[i] = (
+ variances[i]
+ * tau[i]
+ * self.variance_correction(variances[i] * tau[i])
+ )
+
+ selected_variances = variances[mask]
+ objective = cp.Minimize(
+ cp.sum_squares(
+ (cp.multiply((phis @ theta), tau[mask]) - observations)
+ / (np.sqrt(selected_variances))
+ )
+ + 0.5 * self.s * cp.norm2(theta) ** 2
+ )
+
+ constraints = []
+ Lambda = Lambda @ Gamma_half
+ # constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+
+ prob = cp.Problem(objective, constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-4,
+ },
+ )
+ print(prob.status)
+ self.rate = torch.tensor(theta.value)
+ return self.rate
+
+ def least_sqaures_weighted_fast(self, threads=4):
+
+ l, Lambda, u = self.get_constraints()
+ Gamma_half, invGamma_half = self.cov(inverse=True)
+
+ mask = self.bucketized_counts > 0
+ observations = self.total_bucketized_obs[mask]
+ phis = self.varphis[mask, :]
+ tau = self.total_bucketized_time
+
+ variances = self.variances.view(-1)
+ for i in range(variances.size()[0]):
+ if mask[i] > 0:
+ variances[i] = (
+ variances[i]
+ * tau[i]
+ * self.variance_correction(variances[i] * tau[i])
+ )
+ selected_variances = variances[mask]
+
+ def objective(theta):
+ return torch.sum(
+ (
+ (tau[mask] * (phis @ invGamma_half @ theta) - observations)
+ / (np.sqrt(selected_variances))
+ )
+ ** 2
+ ) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2)
+
+ if self.rate is not None:
+ theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double()
+ theta0.data = Gamma_half @ self.rate.data
+ else:
+ theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double()
+
+ eps = 1e-4
+ res = minimize(
+ objective,
+ theta0.cpu().numpy(),
+ backend="torch",
+ method="L-BFGS-B",
+ bounds=(l[0] + eps, u[0]),
+ precision="float64",
+ tol=1e-8,
+ options={
+ "ftol": 1e-06,
+ "gtol": 1e-06,
+ "eps": 1e-08,
+ "maxfun": 15000,
+ "maxiter": 15000,
+ "maxls": 20,
+ },
+ )
+ self.rate = invGamma_half @ torch.tensor(res.x)
+
+ return self.rate
+
+ def least_squares_weighted_integral(self, threads=4):
+
+ # if self.approx_fit == False:
+ # self.bucketization()
+
+ theta = cp.Variable(self.get_m())
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov().numpy()
+
+ phis = self.phis.clone().numpy() # integrated actions
+ if self.rate is None:
+ rate = torch.pinverse(torch.tensor(Gamma_half)) @ torch.tensor(u)
+ else:
+ rate = self.rate.clone()
+
+ if len(self.variances_histogram) > 0:
+ variances = self.variances_histogram.cpu().numpy()
+
+ for i in range(variances.shape[0]):
+ variances[i] = variances[i] * self.variance_correction(variances[i])
+ else:
+ variances = np.zeros(len(self.data))
+ i = 0
+ for S, obs, dt in self.data:
+ variances[i] = S.volume() * self.B
+ variances[i] = variances[i] * self.variance_correction(variances[i])
+ i = i + 1
+
+ observations = self.counts.clone().numpy()
+
+ objective = cp.Minimize(
+ cp.sum_squares((phis @ theta - observations) / np.sqrt(variances))
+ + self.s * cp.sum_squares(theta)
+ )
+ constraints = []
+ Lambda = Lambda @ Gamma_half
+ constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+ prob = cp.Problem(objective, constraints)
+
+ prob.solve(
+ solver=cp.MOSEK,
+ warm_start=False,
+ verbose=False,
+ mosek_params={
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.dual,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-6,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-6,
+ },
+ )
+
+ self.rate = torch.tensor(theta.value)
+
+ return self.rate
+
+ def penalized_likelihood_bins(self, threads=4):
+ theta = cp.Variable(self.get_m())
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov().numpy()
+
+ mask = self.bucketized_counts.clone().numpy() > 0
+ observations = self.total_bucketized_obs[mask].clone().numpy()
+ phis = self.varphis[mask, :].clone().numpy()
+ tau = self.total_bucketized_time[mask].clone().numpy()
+
+ constraints = []
+ Lambda = Lambda @ Gamma_half
+ constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+
+ objective = cp.Minimize(
+ -cp.sum(observations @ cp.log(cp.multiply(tau, phis @ theta)))
+ + cp.sum(cp.multiply(phis @ theta, tau))
+ + self.s * 0.5 * cp.sum_squares(theta)
+ )
+ prob = cp.Problem(objective, constraints)
+ try:
+ prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True)
+
+ self.rate = torch.tensor(theta.value)
+ except:
+ print("optimization failed.")
+ return self.rate
+
+ def penalized_likelihood_integral_bins(self, threads=4):
+ phis = self.phis.cpu().numpy()
+ counts = self.counts.cpu().numpy()
+
+ theta = cp.Variable(self.get_m())
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov().numpy()
+ objective = cp.Minimize(
+ -cp.sum(counts @ cp.log(phis @ theta))
+ + cp.sum(phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta)
+ )
+
+ constraints = []
+ Lambda = Lambda @ Gamma_half
+ constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+
+ try:
+ if constraints:
+ prob = cp.Problem(objective, constraints)
+ else:
+ prob = cp.Problem(objective)
+ prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True)
+ self.rate = torch.tensor(theta.value)
+ except:
+ print("Optimization failed. Using the old value.")
+
+ return self.rate
+
+ def update_variances(self, value=False, force=False):
+ self.approx_fit = True
+ if (
+ self.feedback == "count-record" and self.estimator == "least-sq"
+ ) or force == True:
+ print("updating variance")
+ for index, set in enumerate(self.basic_sets):
+ if value == False:
+ ucb = self.ucb(set)
+ self.variances[index] = np.minimum(ucb, self.variances[index])
+ else:
+ self.variances[index] = self.mean_set(set)
+ else:
+ if self.data is not None:
+ if self.peeking == True:
+ new_var = []
+ for S, _, dt in self.data:
+ new_var.append(float(self.ucb(S)) * dt)
+ self.variances_histogram = torch.tensor(new_var.copy()).double()
+ else:
+ last = self.data[-1]
+ new_var = torch.tensor([self.ucb(last[0]) * last[2]]).double()
+ if len(self.variances_histogram) > 0:
+ self.variances_histogram = torch.cat(
+ (self.variances_histogram, new_var)
+ )
+ else:
+ self.variances_histogram = new_var
+ self.approx_fit = False
+
+ def ucb(self, S, dt=1.0, delta=0.5):
+
+ if self.data is None or self.rate is None:
+ return self.B * S.volume() * dt
+
+ if self.approx == None:
+
+ if self.uncertainty == "laplace":
+ return self.mean_var_laplace_set(S, dt=dt, beta=self.beta(0))[1]
+
+ elif self.uncertainty == "least-sq":
+ return self.mean_var_reg_set(S, dt=dt, beta=self.beta(0))[1]
+
+ elif self.uncertainty == "bins":
+ return self.mean_var_bins_set(S, dt=dt, beta=self.beta(0))[1]
+
+ elif self.uncertainty == "likelihood-ratio":
+ return self.mean_var_ratio_set(S, dt=dt, beta=self.beta(0))[1]
+
+ elif self.uncertainty == "conformal":
+ return self.mean_var_conformal_set(S, dt=dt, delta=delta)[2]
+
+ else:
+ raise AssertionError("Not Implemented.")
+
+ elif self.approx == "ellipsoid":
+
+ if self.approx_fit == False:
+ self.fit_ellipsoid_approx()
+ self.beta(0)
+ print("Fitting Approximation.")
+ self.approx_fit = True
+ return self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0))[2]
+ else:
+ raise AssertionError("Not implemented.")
+
+ def mean_std_per_action(self, S, W, dt, beta):
+
+ phi = self.packing.integral(S) * dt
+ Gamma_half = self.cov().numpy()
+
+ l, Lambda, u = self.get_constraints()
+
+ Lambda = Lambda @ Gamma_half
+ ucb, _ = maximize_on_elliptical_slice(
+ phi.cpu().numpy(),
+ (W).numpy(),
+ self.rate.view(-1).cpu().numpy(),
+ beta,
+ l,
+ Lambda,
+ u,
+ )
+ lcb, _ = maximize_on_elliptical_slice(
+ -phi.cpu().numpy(),
+ (W).numpy(),
+ self.rate.view(-1).cpu().numpy(),
+ beta,
+ l,
+ Lambda,
+ u,
+ )
+ map = phi @ self.rate
+
+ return map, float(ucb), -float(lcb)
+
+ def mean_var_laplace_set(self, S, dt, beta=2.0):
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix_laplace()
+ self.approx_fit = True
+ return self.mean_std_per_action(S, self.W, dt, beta)
+
+ def mean_var_reg_set(self, S, dt, beta=2.0):
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix_regression()
+ self.approx_fit = True
+ return self.mean_std_per_action(S, self.W, dt, beta)
+
+ def mean_var_bins_set(self, S, dt, beta=2.0):
+ if self.approx_fit == False:
+ self.W = self.construct_covariance_matrix_bins()
+ self.approx_fit = True
+ return self.mean_std_per_action(S, self.W, dt, beta)
+
+ def mean_var_ratio_set(self, S, dt, beta=2.0):
+ x = self.packing.integral(S) * dt
+ map = x @ self.rate
+ # v = np.log(1. / 0.1) - torch.sum(self.counts.double() @ torch.log(self.phis.double() @ self.rate)) \
+ # + torch.sum(self.phis.double() @ self.rate) + 0.5 * self.s * torch.norm(self.rate) ** 2
+ v = (
+ np.log(1.0 / 0.1)
+ + self.likelihood
+ + 0.5 * self.s * torch.norm(self.rate) ** 2
+ )
+
+ phis = self.phis.cpu().numpy()
+ counts = self.counts.cpu().numpy()
+ theta = cp.Variable(self.get_m())
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov().numpy()
+
+ objective_min = cp.Minimize(x @ theta)
+ objective_max = cp.Maximize(x @ theta)
+
+ constraints = []
+ Lambda = Lambda @ Gamma_half
+ constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+
+ constraints.append(
+ -cp.sum(counts @ cp.log(phis @ theta))
+ + cp.sum(phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta)
+ <= v
+ )
+
+ prob = cp.Problem(objective_min, constraints)
+ prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
+ lcb = np.dot(theta.value, x)
+ prob = cp.Problem(objective_max, constraints)
+ prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
+ ucb = np.dot(theta.value, x)
+
+ return map, ucb, lcb
+
+ def map_lcb_ucb_approx_action(self, S, dt=1.0, beta=2.0):
+ phi = self.packing.integral(S)
+ map = dt * phi @ self.rate
+
+ ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
+ # ucb = np.minimum(dt * ucb, self.B * S.volume() * dt)
+
+ lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)
+ # lcb = np.maximum(dt * lcb, self.b * S.volume() * dt)
+ return map, lcb, ucb
+
+ def fit_ellipsoid_approx(self):
+
+ if self.uncertainty == "laplace":
+ self.W = self.construct_covariance_matrix_laplace()
+ elif self.uncertainty == "least-sq":
+ self.W = self.construct_covariance_matrix_regression()
+ elif self.uncertainty == "bins":
+ self.W = self.construct_covariance_matrix_bins()
+ else:
+ raise AssertionError("Not implemented.")
+
+ self.W_inv_approx = torch.pinverse(self.W)
+
+ def construct_covariance_matrix(self):
+ if self.estimator == "likelihood":
+ self.W = self.construct_covariance_matrix_laplace()
+ elif self.estimator == "least-sq":
+ self.W = self.construct_covariance_matrix_regression()
+ elif self.estimator == "bins":
+ self.W = self.construct_covariance_matrix_bins()
+ else:
+ raise NotImplementedError("This estimator is not implemented.")
+ return self.W
+
+ def construct_covariance_matrix_laplace(self, theta=None):
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+
+ if self.feedback == "count-record":
+
+ if self.observations is not None:
+
+ if theta is None:
+ D = torch.diag(
+ 1.0 / ((self.observations @ self.rate).view(-1) ** 2)
+ )
+ W = self.observations.T @ D @ self.observations
+ else:
+ D = torch.diag(1.0 / ((self.observations @ theta).view(-1) ** 2))
+ W = self.observations.T @ D @ self.observations
+
+ elif self.feedback == "histogram":
+ # D = torch.diag(self.counts / (self.phis @ self.rate).view(-1) ** 2)
+ if len(self.variances_histogram) > 0:
+ variances = self.variances_histogram.view(-1).clone()
+
+ for i in range(variances.shape[0]):
+ variances[i] = variances[i] * self.variance_correction(variances[i])
+
+ D = torch.diag(self.counts / variances**2)
+
+ W = self.phis.T @ D @ self.phis
+ else:
+ raise AssertionError("Not implemented.")
+
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def construct_covariance_matrix_regression(self):
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+ if self.data is not None:
+ variances = self.variances
+ if self.feedback == "count-record":
+ mask = self.bucketized_counts > 0
+ tau = self.total_bucketized_time
+ for index_o, o in enumerate(self.bucketized_obs):
+ n = mask[index_o]
+ if n > 0:
+ A = (
+ self.varphis[index_o, :].view(-1, 1)
+ @ self.varphis[index_o, :].view(1, -1)
+ * tau[index_o]
+ )
+ k = self.variance_correction(tau[index_o] * variances[index_o])
+ W = W + A / (variances[index_o] * k)
+
+ elif self.feedback == "histogram":
+
+ if len(self.variances_histogram) > 0:
+ variances = self.variances_histogram.view(-1).clone()
+
+ for i in range(variances.shape[0]):
+ variances[i] = variances[i] * self.variance_correction(
+ variances[i]
+ )
+
+ D = torch.diag(1.0 / variances)
+ W = self.phis.T @ D @ self.phis
+
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def construct_covariance_matrix_bins(self):
+ W = torch.zeros(size=(self.get_m(), self.get_m())).double()
+
+ if self.feedback == "count-record":
+
+ mask = self.bucketized_counts > 0
+ tau = self.total_bucketized_time
+ varphis = self.varphis[mask, :]
+ variances = self.variances.view(-1).clone()
+
+ for i in range(variances.size()[0]):
+ if mask[i] > 0:
+ variances[i] = variances[i] * self.variance_correction(
+ variances[i] * tau[i]
+ )
+
+ variances = variances[mask]
+ tau = tau[mask]
+
+ if self.observations is not None:
+ D = torch.diag(tau / variances)
+ W = varphis.T @ D @ varphis
+
+ elif self.feedback == "histogram":
+
+ if len(self.variances_histogram) > 0:
+ variances = self.variances_histogram.view(-1).clone()
+
+ for i in range(variances.shape[0]):
+ variances[i] = variances[i] * self.variance_correction(variances[i])
+
+ D = torch.diag(1.0 / variances)
+ W = self.phis.T @ D @ self.phis
+ else:
+ raise AssertionError("Not implemented.")
+
+ return W + torch.eye(self.get_m()).double() * self.s
+
+ def gap(self, S, actions, w, dt, beta=2.0):
+ """
+ Estimates the gap of an action S,
+ :param S:
+ :param dt:
+ :return:
+ """
+ phi = self.packing.integral(S) * dt
+ Gamma_half = self.packing.cov().numpy()
+
+ if self.approx is None:
+ l, Lambda, u = self.get_constraints()
+ Lambda = Lambda @ Gamma_half
+ ucbs = []
+ for action in actions:
+ phi_a = self.packing.integral(action) * dt
+ # ucb, _ = maximize_on_elliptical_slice(phi_a.cpu().numpy()-phi.cpu().numpy(), self.W.cpu().numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u)
+ ucb, _ = maximize_on_elliptical_slice(
+ phi.cpu().numpy(),
+ self.W.cpu().numpy(),
+ self.rate.view(-1).numpy(),
+ beta,
+ l,
+ Lambda,
+ u,
+ )
+ ucbs.append(float(ucb))
+ gap = torch.max(torch.tensor(ucbs))
+
+ else:
+ if self.data is None:
+ return (self.B - self.b) * S.volume()
+
+ if self.ucb_identified == False:
+ print("Recomputing UCB.....")
+ self.ucb_identified = True
+ self.fit_ellipsoid_approx()
+ self.max_ucb = -1000
+ self.ucb_action = None
+
+ for action in actions:
+ _, __, ucb = self.map_lcb_ucb_approx_action(
+ action, dt=dt, beta=self.beta(0)
+ )
+ ucb = ucb / w(action)
+
+ if ucb > self.max_ucb:
+ self.max_ucb = ucb
+ self.ucb_action = action
+
+ map, lcb, ucb = self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0))
+ gap = w(S) * self.max_ucb - lcb
+ return gap
+
+ def information(self, S, dt, precomputed=None):
+
+ if self.data is None:
+ return 1.0
+
+ if self.W is None:
+ self.construct_covariance_matrix()
+
+ if self.feedback == "count-record":
+ varphi_UCB = self.packing.integral(self.ucb_action).view(1, -1) * dt
+
+ if precomputed is not None:
+ Upsilon = precomputed[S] * dt
+ else:
+ ind = []
+ for index, set in enumerate(self.basic_sets):
+ if S.inside(set):
+ ind.append(index)
+ Upsilon = self.varphis[ind, :] * dt
+
+ I = torch.eye(Upsilon.size()[0]).double()
+ G = (
+ self.W_inv_approx
+ - self.W_inv_approx
+ @ Upsilon.T
+ @ torch.inverse(I + Upsilon @ Upsilon.T)
+ @ Upsilon
+ @ self.W_inv_approx
+ )
+ return (
+ 10e-4
+ + torch.logdet(varphi_UCB @ self.W_inv_approx @ varphi_UCB.T)
+ - torch.logdet(varphi_UCB @ G @ varphi_UCB.T)
+ )
+
+ elif self.feedback == "histogram":
+
+ return torch.log(
+ 1
+ + self.packing.integral(S)
+ @ self.W_inv_approx
+ @ self.packing.integral(S)
+ * dt**2
+ )
+
+ def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01):
+ xtest = S.return_discretization(n)
+ if self.data is None:
+ return (
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, 1),
+ )
+
+ self.fit_ellipsoid_approx()
+ self.fit_ellipsoid_approx()
+
+ Phi = self.packing.embed(xtest).double()
+ map = Phi @ self.rate
+ N = Phi.size()[0]
+
+ ucb = torch.zeros(size=(N, 1)).double()
+ lcb = torch.zeros(size=(N, 1)).double()
+
+ for i in range(N):
+ x = Phi[i, :].view(-1, 1)
+ ucb[i, 0] = np.minimum(
+ map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.B
+ )
+ lcb[i, 0] = np.maximum(
+ map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.b
+ )
+ return map, lcb, ucb
+
+ def map_lcb_ucb(self, S, n, beta=2.0):
+ """
+ Calculate exact confidence using laplace approximation on a whole set domain
+ :param S: set
+ :param n: discretization
+ :param beta: beta
+ :return:
+ """
+
+ xtest = S.return_discretization(n)
+ if self.data is None:
+ return (
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, 1),
+ )
+
+ N = xtest.size()[0]
+ Phi = self.packing.embed(xtest)
+ map = Phi @ self.rate
+
+ if self.uncertainty == "laplace":
+ W = self.construct_covariance_matrix_laplace()
+ elif self.uncertainty == "least-sq":
+ W = self.construct_covariance_matrix_regression()
+ elif self.uncertainty == "bins":
+ W = self.construct_covariance_matrix_bins()
+ else:
+ raise AssertionError("Not implemented ")
+
+ Gamma_half = self.cov().numpy()
+ l, Lambda, u = self.get_constraints()
+ Lambda = Lambda @ Gamma_half
+ ucb = torch.zeros(size=(N, 1)).double()
+ lcb = torch.zeros(size=(N, 1)).double()
+
+ for i in range(N):
+ x = Phi[i, :]
+ ucbi, _ = maximize_on_elliptical_slice(
+ x.cpu().numpy(),
+ (W).numpy(),
+ self.rate.view(-1).numpy(),
+ np.sqrt(beta),
+ l,
+ Lambda,
+ u,
+ )
+ lcbi, _ = maximize_on_elliptical_slice(
+ -x.cpu().numpy(),
+ (W).numpy(),
+ self.rate.view(-1).numpy(),
+ np.sqrt(beta),
+ l,
+ Lambda,
+ u,
+ )
+ ucb[i, 0] = ucbi
+ lcb[i, 0] = -lcbi
+
+ return map, lcb, ucb
+
+ def map_lcb_ucb_likelihood_ratio(self, S, n, delta=0.1, current=False):
+ xtest = S.return_discretization(n)
+
+ if self.data is None:
+ return (
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.b + 0 * xtest[:, 0].view(-1, 1),
+ self.B + 0 * xtest[:, 0].view(-1, 1),
+ )
+
+ N = xtest.size()[0]
+ Phi = self.packing.embed(xtest)
+ map = Phi @ self.rate
+
+ ucb = torch.zeros(size=(N, 1)).double()
+ lcb = torch.zeros(size=(N, 1)).double()
+
+ phis = self.phis.cpu().numpy()
+
+ if current:
+ if self.observations is not None:
+ v = (
+ np.log(1.0 / delta)
+ - torch.sum(torch.log(self.observations @ self.rate))
+ + torch.sum(self.phis @ self.rate)
+ + self.s * 0.5 * torch.sum(self.rate**2)
+ )
+ else:
+ v = (
+ np.log(1.0 / delta)
+ + torch.sum(self.phis @ self.rate)
+ + self.s * 0.5 * torch.sum(self.rate**2)
+ )
+ else:
+ if self.feedback == "count-record":
+ v = (
+ np.log(1.0 / delta)
+ + self.loglikelihood
+ + 0.5 * self.s * torch.sum(self.rate**2)
+ )
+ elif self.feedback == "histogram":
+ v = (
+ np.log(1.0 / delta)
+ + self.loglikelihood
+ + 0.5 * self.s * torch.sum(self.rate**2)
+ )
+ else:
+ raise NotImplementedError("Not compatible with given feedback model ")
+
+ l, Lambda, u = self.get_constraints()
+ Gamma_half = self.cov().numpy()
+ Lambda = Lambda @ Gamma_half
+
+ for i in range(N):
+ x = Phi[i, :].cpu().numpy()
+
+ theta = cp.Variable(self.get_m())
+
+ objective_min = cp.Minimize(x @ theta)
+ objective_max = cp.Maximize(x @ theta)
+
+ constraints = []
+ constraints.append(Lambda @ theta >= l)
+ constraints.append(Lambda @ theta <= u)
+
+ if self.feedback == "count-record":
+ if self.observations is not None:
+ observations = self.observations.cpu().numpy()
+
+ constraints.append(
+ -cp.sum(cp.log(observations @ theta))
+ + cp.sum(phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta)
+ <= v
+ )
+ else:
+ constraints.append(
+ cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta) <= v
+ )
+
+ elif self.feedback == "histogram":
+ constraints.append(
+ -cp.sum(cp.log(phis @ theta))
+ + cp.sum(phis @ theta)
+ + self.s * 0.5 * cp.sum_squares(theta)
+ <= v
+ )
+ else:
+ raise NotImplementedError("Does not exist.")
+
+ prob = cp.Problem(objective_min, constraints)
+ prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
+ lcb[i, 0] = float(np.dot(theta.value, x))
+
+ prob = cp.Problem(objective_max, constraints)
+ prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False)
+ ucb[i, 0] = float(np.dot(theta.value, x))
+
+ return map, lcb, ucb
+
+ def mean_var_conformal_set(self, S, dt, beta=2.0, max_val=None, delta=0.05):
+ # self.bucketize_prepare()
+ if max_val is None:
+ max_val = int(self.B * self.basic_sets[0].volume() * dt) + 1
+ map, lcb, ucb = self.conformal_confidence_set(
+ S, delta=delta, max_val=max_val, dt=dt
+ )
+ return map, lcb, ucb
+
+ def conformal_score_func(self, theta, new, index):
+
+ if new[1] is None:
+ n_new = 0
+ else:
+ n_new = new[1].size()[0]
+
+ varphi = self.packing.integral(new[0]) * new[2]
+ err_new = abs(float(n_new) - float(varphi @ theta))
+ n = len(self.bucketized_obs[index])
+
+ if n > 0:
+
+ phis = self.varphis[index].repeat(n, 1)
+ res = torch.tensor(self.bucketized_obs[index]).double()
+
+ err = torch.abs(res - (phis @ theta.view(-1, 1)).view(-1))
+
+ return torch.sum(err < err_new).double() / float(n + 1.0) + 1.0 / (
+ float(n) + 1.0
+ )
+
+ else:
+ return 0.0
+
+ def conformal_confidence(self, delta=0.05, max_val=20, dt=1, step=1):
+ lcb = []
+ ucb = []
+ map = []
+
+ if self.data is not None:
+ self.bucketization(time=True)
+
+ for S in self.basic_sets:
+ m, u, l = self.conformal_confidence_set(
+ S, delta=delta, max_val=max_val, dt=dt, step=step
+ )
+
+ map.append(m)
+ ucb.append(u)
+ lcb.append(l)
+
+ return (
+ torch.tensor(map).double(),
+ torch.tensor(ucb).double(),
+ torch.tensor(lcb).double(),
+ )
+
+ def conformal_confidence_set(self, S, delta=0.05, max_val=20, dt=1.0, step=1):
+ """
+ :return: (lcb,ucb)
+ """
+
+ if self.data is not None:
+ if self.feedback == "count-record":
+ self.penalized_likelihood()
+ elif self.feedback == "histogram":
+ self.penalized_likelihood_integral()
+
+ # identify the set in basic sets
+ index = 0
+ for set in self.basic_sets:
+ if set.inside(S):
+ break
+ index += 1
+
+ # calculate map estimate
+ map = float(self.rate @ self.packing.integral(S))
+ else:
+ map = self.b
+ return map, self.B, self.b
+
+ scores = []
+ j = 0
+ score = 1.0
+ lowest = 0
+ n = float(len(self.bucketized_obs[index]))
+
+ while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j <= max_val:
+ lowest = j
+ if j > 0:
+ obs = torch.zeros(size=(j, self.d)).double()
+ for i in range(self.d):
+ obs[:, i] = torch.tensor(
+ np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j)
+ )
+ else:
+ obs = None
+
+ # new observation
+ new = (S, obs, dt)
+
+ old_phis, old_observations, old_counts = self.add_data_point_and_remove(new)
+
+ if self.feedback == "count-record":
+ theta_new = self.penalized_likelihood()
+ elif self.feedback == "histogram":
+ theta_new = self.penalized_likelihood_integral()
+
+ # restore back the data
+ self.phis = old_phis
+ self.observations = old_observations
+ self.counts = old_counts
+
+ # calculate the score
+ score = self.conformal_score_func(theta_new, new, index)
+ n = float(len(self.bucketized_obs[index]))
+
+ print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1))
+ j = j + 1
+
+ j = max_val
+ score = 1.0
+ largest = max_val
+
+ while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j > lowest:
+ largest = j
+ if j > 0:
+ obs = torch.zeros(size=(j, self.d)).double()
+ for i in range(self.d):
+ obs[:, i] = torch.tensor(
+ np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j)
+ )
+ else:
+ obs = None
+
+ # new observation
+ new = (S, obs, dt)
+
+ old_phis, old_observations, old_counts = self.add_data_point_and_remove(new)
+
+ if self.feedback == "count-record":
+ theta_new = self.penalized_likelihood()
+ elif self.feedback == "histogram":
+ theta_new = self.penalized_likelihood_integral()
+
+ # restore back the data
+ self.phis = old_phis
+ self.observations = old_observations
+ self.counts = old_counts
+
+ # calculate the score
+ score = self.conformal_score_func(theta_new, new, index)
+ n = float(len(self.bucketized_obs[index]))
+
+ print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1))
+ j = j - 1
+ # scores = np.array(scores)
+ # mask = scores < np.ceil((1-delta)*(n+1))/(n+1)
+
+ # if np.sum(mask) == 0:
+ # lowest = 0
+ # largest = max_val
+ # else:
+ # lowest = np.min(np.arange(0,max_val,step)[mask])
+ # largest = np.max(np.arange(0, max_val, step)[mask])
+
+ lcb = lowest / dt / S.volume()
+ ucb = largest / dt / S.volume()
+
+ return (map, ucb, lcb)
diff --git a/stpy/point_processes/positive_basis_estimator.py b/stpy/point_processes/positive_basis_estimator.py
index 3d09bc6..d404af6 100644
--- a/stpy/point_processes/positive_basis_estimator.py
+++ b/stpy/point_processes/positive_basis_estimator.py
@@ -9,125 +9,124 @@
from stpy.borel_set import BorelSet
from stpy.point_processes.poisson import PoissonPointProcess
-class RateEstimator():
- def __init__(self):
- pass
-
-
- def get_min_max(self):
- basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
- volumes = []
- for index, elementary in enumerate(basic_sets):
- volumes.append(elementary.volume())
-
- return (np.min(volumes), np.max(volumes))
-
-
-
- def load_data(self, data, times = True):
- self.approx_fit = False
-
- if len(data) > 0:
- self.approx_fit = False
- phis = []
- observations = []
- self.data = data.copy()
- counts = []
- #times_arr = []
-
- for sample in data:
- S, obs, dt = sample
- count = torch.Tensor([0])
-
- if obs is not None:
- if times == True:
- emb = self.packing.embed(obs) * dt
- else:
- emb = self.packing.embed(obs)
-
- phi = self.packing.integral(S) * dt
- observations.append(emb)
- count = torch.Tensor([emb.size()[0]])
- phis.append(phi.view(1, -1))
-
-
- if self.dual == True:
- self.global_dt = dt
- dist_matrix = torch.cdist(obs, self.anchor_points, p = 2)
- for k in range(obs.size()[0]):
- index = torch.argmin(dist_matrix[k,:])
- self.anchor_weights[index] = self.anchor_weights[index] + 1.
- else:
- phi = self.packing.integral(S) * dt
- phis.append(phi.view(1, -1))
- counts.append(count)
-
- self.counts = torch.cat(counts, dim=0) # n(A_i)
- self.phis = torch.cat(phis, dim=0) # integrals of A_i
-
- if len(observations) > 0:
- self.observations = torch.cat(observations, dim=0) # \{x_i\}_{i=1}^{n(A_i)}
- else:
- self.observations = None
-
- if self.feedback == "count-record":
- self.bucketization()
-
- def add_data_point(self, new_data, times = True):
- self.approx_fit = False
-
- if self.data is None:
- self.load_data([new_data])
- return
-
- self.data.append(new_data)
-
- # update standard form data
- S, obs, dt = new_data
- if obs is not None:
-
- if times == True:
- emb = self.packing.embed(obs) * dt
- else:
- emb = self.packing.embed(obs)
-
- phi = self.packing.integral(S).view(1, -1) * dt
-
- count = torch.Tensor([emb.size()[0]])
-
- if self.observations is not None:
- self.observations = torch.cat((self.observations, emb), dim=0)
- #self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() ))
- else:
- self.observations = emb
- #self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double()
-
-
- if self.dual == True:
-
- dist_matrix = torch.cdist(obs, self.anchor_points, p=2)
- for k in range(obs.size()[0]):
- index = torch.argmin(dist_matrix[k, :])
- self.anchor_weights[index] += 1.
- else:
- count = torch.Tensor([0])
- phi = self.packing.integral(S).view(1, -1) * dt
-
-
- self.phis = torch.cat((self.phis, phi), dim=0)
- self.counts = torch.cat((self.counts, count))
-
- if self.feedback == "count-record":
-
- for index, elementary in enumerate(self.basic_sets):
-
- if S.inside(elementary) == True:
- if obs is not None:
- mask = elementary.is_inside(obs)
- self.total_bucketized_obs[index] += float(obs[mask].size()[0])
- else:
- self.total_bucketized_obs[index] += 0.0
-
- self.bucketized_counts[index] += 1
- self.total_bucketized_time[index] += dt
+class RateEstimator:
+
+ def __init__(self):
+ pass
+
+ def get_min_max(self):
+ basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
+ volumes = []
+ for index, elementary in enumerate(basic_sets):
+ volumes.append(elementary.volume())
+
+ return (np.min(volumes), np.max(volumes))
+
+ def load_data(self, data, times=True):
+ self.approx_fit = False
+
+ if len(data) > 0:
+ self.approx_fit = False
+ phis = []
+ observations = []
+ self.data = data.copy()
+ counts = []
+ # times_arr = []
+
+ for sample in data:
+ S, obs, dt = sample
+ count = torch.tensor([0])
+
+ if obs is not None:
+ if times == True:
+ emb = self.packing.embed(obs) * dt
+ else:
+ emb = self.packing.embed(obs)
+
+ phi = self.packing.integral(S) * dt
+ observations.append(emb)
+ count = torch.tensor([emb.size()[0]])
+ phis.append(phi.view(1, -1))
+
+ if self.dual == True:
+ self.global_dt = dt
+ dist_matrix = torch.cdist(obs, self.anchor_points, p=2)
+ for k in range(obs.size()[0]):
+ index = torch.argmin(dist_matrix[k, :])
+ self.anchor_weights[index] = (
+ self.anchor_weights[index] + 1.0
+ )
+ else:
+ phi = self.packing.integral(S) * dt
+ phis.append(phi.view(1, -1))
+ counts.append(count)
+
+ self.counts = torch.cat(counts, dim=0) # n(A_i)
+ self.phis = torch.cat(phis, dim=0) # integrals of A_i
+
+ if len(observations) > 0:
+ self.observations = torch.cat(
+ observations, dim=0
+ ) # \{x_i\}_{i=1}^{n(A_i)}
+ else:
+ self.observations = None
+
+ if self.feedback == "count-record":
+ self.bucketization()
+
+ def add_data_point(self, new_data, times=True):
+ self.approx_fit = False
+
+ if self.data is None:
+ self.load_data([new_data])
+ return
+
+ self.data.append(new_data)
+
+ # update standard form data
+ S, obs, dt = new_data
+ if obs is not None:
+
+ if times == True:
+ emb = self.packing.embed(obs) * dt
+ else:
+ emb = self.packing.embed(obs)
+
+ phi = self.packing.integral(S).view(1, -1) * dt
+
+ count = torch.tensor([emb.size()[0]])
+
+ if self.observations is not None:
+ self.observations = torch.cat((self.observations, emb), dim=0)
+ # self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() ))
+ else:
+ self.observations = emb
+ # self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double()
+
+ if self.dual == True:
+
+ dist_matrix = torch.cdist(obs, self.anchor_points, p=2)
+ for k in range(obs.size()[0]):
+ index = torch.argmin(dist_matrix[k, :])
+ self.anchor_weights[index] += 1.0
+ else:
+ count = torch.tensor([0])
+ phi = self.packing.integral(S).view(1, -1) * dt
+
+ self.phis = torch.cat((self.phis, phi), dim=0)
+ self.counts = torch.cat((self.counts, count))
+
+ if self.feedback == "count-record":
+
+ for index, elementary in enumerate(self.basic_sets):
+
+ if S.inside(elementary) == True:
+ if obs is not None:
+ mask = elementary.is_inside(obs)
+ self.total_bucketized_obs[index] += float(obs[mask].size()[0])
+ else:
+ self.total_bucketized_obs[index] += 0.0
+
+ self.bucketized_counts[index] += 1
+ self.total_bucketized_time[index] += dt
diff --git a/stpy/point_processes/rate_estimator.py b/stpy/point_processes/rate_estimator.py
index 016661f..ed6dbcc 100644
--- a/stpy/point_processes/rate_estimator.py
+++ b/stpy/point_processes/rate_estimator.py
@@ -1,191 +1,214 @@
+from typing import List
import numpy as np
import torch
-class RateEstimator():
-
- def __init__(self):
- pass
-
- def get_min_max(self):
- basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
- volumes = []
- for index, elementary in enumerate(basic_sets):
- volumes.append(elementary.volume())
-
- return (np.min(volumes), np.max(volumes))
-
- def load_data(self, data, times=True):
- self.approx_fit = False
-
- if len(data) > 0:
- self.approx_fit = False
- phis = []
- observations = []
- self.data = data.copy()
- counts = []
- # times_arr = []
- x = []
- for sample in data:
- S, obs, dt = sample
- count = torch.Tensor([0])
-
- if obs is not None:
- x.append(obs)
-
- if obs is not None:
- obs, _, duplicates = torch.unique(obs, dim=0, return_inverse=True, return_counts=True)
- #obs = torch.diag(torch.exp(duplicates.double()))@obs\
- obs = torch.einsum('ij,i->ij', obs, duplicates)
-
- if times == True:
- emb = self.packing.embed(obs) * dt
- else:
- emb = self.packing.embed(obs)
-
- phi = self.packing.integral(S) * dt
- observations.append(emb)
- count = torch.Tensor([emb.size()[0]])
- phis.append(phi.view(1, -1))
-
- if self.dual == True:
- self.global_dt = dt
- dist_matrix = torch.cdist(obs, self.anchor_points, p=2)
- for k in range(obs.size()[0]):
- index = torch.argmin(dist_matrix[k, :])
- self.anchor_weights[index] = self.anchor_weights[index] + 1.
- else:
- phi = self.packing.integral(S) * dt
- phis.append(phi.view(1, -1))
- counts.append(count)
-
- self.counts = torch.cat(counts, dim=0) # n(A_i)
- self.phis = torch.cat(phis, dim=0) # integrals of A_i
- if len(x) > 0:
- self.x = torch.cat(x, dim=0)
- else:
- self.x = None
-
- if len(observations) > 0:
- self.observations = torch.cat(observations, dim=0) # \{x_i\}_{i=1}^{n(A_i)}
- else:
- self.observations = None
-
- if self.feedback == "count-record":
- self.bucketization()
-
- def add_data_point(self, new_data, times=True):
- self.approx_fit = False
-
- if self.data is None:
- self.load_data([new_data])
- return
-
- self.data.append(new_data)
-
- # update standard form data
- S, obs, dt = new_data
- if obs is not None:
-
- if times == True:
- emb = self.packing.embed(obs) * dt
- else:
- emb = self.packing.embed(obs)
-
- phi = self.packing.integral(S).view(1, -1) * dt
-
- count = torch.Tensor([emb.size()[0]])
-
- if self.observations is not None:
- self.observations = torch.cat((self.observations, emb), dim=0)
- # self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() ))
- else:
- self.observations = emb
- # self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double()
-
- if self.dual == True:
-
- dist_matrix = torch.cdist(obs, self.anchor_points, p=2)
- for k in range(obs.size()[0]):
- index = torch.argmin(dist_matrix[k, :])
- self.anchor_weights[index] += 1.
- else:
- count = torch.Tensor([0])
- phi = self.packing.integral(S).view(1, -1) * dt
-
- self.phis = torch.cat((self.phis, phi), dim=0)
- self.counts = torch.cat((self.counts, count))
-
- if self.feedback == "count-record":
-
- for index, elementary in enumerate(self.basic_sets):
-
- if S.inside(elementary) == True:
- if obs is not None:
- mask = elementary.is_inside(obs)
- self.total_bucketized_obs[index] += float(obs[mask].size()[0])
- else:
- self.total_bucketized_obs[index] += 0.0
-
- self.bucketized_counts[index] += 1
- self.total_bucketized_time[index] += dt
-
- def get_m(self):
- return self.packing.get_m()
-
- def mean_rate(self, S, n=128):
- xtest = S.return_discretization(n)
- if self.rate is not None:
- return self.packing.embed(xtest) @ self.rate.view(-1, 1)
- else:
- return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b
-
- def mean_rate_points(self, xtest):
- if self.rate is not None:
- return self.packing.embed(xtest) @ self.rate.view(-1, 1)
- else:
- return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b
-
- def mean_set(self, S, dt=1):
- phi = self.packing.integral(S) * dt
- map = phi @ self.rate.view(-1, 1)
- return map
-
- def rate_value(self, x, dt=1):
- phi = self.packing.embed(x) * dt
-
- if self.rate is not None:
- map = phi @ self.rate.view(-1, 1)
- else:
- print("Rate function not fitted!")
- map = 0 * phi[:, 0].view(-1, 1) + self.b
-
- return map
-
- def sample_value(self, S):
- """
- Given a pre-sampled value evaluate certain portions of the domain S
- :param S:
- :return:
- """
- return self.packing.integral(S) @ self.sampled_theta
-
- def sample_path(self, S, n=128):
- xtest = S.return_discretization(n)
- return self.packing.embed(xtest) @ self.sampled_theta
-
- def sample_path_points(self, xtest):
- return self.packing.embed(xtest) @ self.sampled_theta.view(-1, 1)
-
- def get_observations(self):
- if self.data is not None:
- points = []
- for datapoint in self.data:
- if datapoint[1] is not None:
- points.append(datapoint[1])
- if len(points) > 0:
- return torch.vstack(points)
- else:
- return None
- else:
- return None
+class RateEstimator:
+
+ def __init__(self):
+ pass
+
+ def get_min_max(self):
+ basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels)
+ volumes = []
+ for index, elementary in enumerate(basic_sets):
+ volumes.append(elementary.volume())
+
+ return (np.min(volumes), np.max(volumes))
+
+ def load_data(self, data: List, times=True):
+ r"""Load the data and save $\phi(x)$ into `self.observations`, $n(A_i)$ in
+ `self.counts` and $\int_{A_i} \phi_j(x) dx$ into `self.phis`
+
+
+ Parameters
+ ----------
+ data
+
+ List of samples, where each sample is a tuple of
+
+ * The Borel set on which the data lies
+ * A tensor of the datapoints them selves i.e. of shape
+ [num_data_points, self.d...]
+ * The amount of time in minutes that the data spans
+ i.e. max time - min time of all data points
+
+ times, optional
+ by default True
+ """
+ self.approx_fit = False
+
+ if len(data) > 0:
+ self.approx_fit = False
+ phis = []
+ observations = []
+ self.data = data.copy()
+ counts = []
+ # times_arr = []
+ x = []
+ for sample in data:
+ S, obs, dt = sample
+ count = torch.tensor([0])
+
+ if obs is not None:
+ x.append(obs)
+
+ if obs is not None:
+ if times == True:
+ emb = self.packing.embed(obs) * dt
+ else:
+ emb = self.packing.embed(obs)
+
+ phi = self.packing.integral(S) * dt
+ observations.append(emb)
+ count = torch.tensor([emb.size()[0]])
+ phis.append(phi.view(1, -1))
+
+ if self.dual == True:
+ self.global_dt = dt
+ dist_matrix = torch.cdist(obs, self.anchor_points, p=2)
+ for k in range(obs.size()[0]):
+ index = torch.argmin(dist_matrix[k, :])
+ self.anchor_weights[index] = (
+ self.anchor_weights[index] + 1.0
+ )
+ else:
+ phi = self.packing.integral(S) * dt
+ phis.append(phi.view(1, -1))
+ counts.append(count)
+
+ self.counts = torch.cat(counts, dim=0) # n(A_i)
+ self.phis = torch.cat(phis, dim=0) # integrals of A_i
+ if len(x) > 0:
+ self.x = torch.cat(x, dim=0)
+ else:
+ self.x = None
+
+ if len(observations) > 0:
+ self.observations = torch.cat(
+ observations, dim=0
+ ) # \{x_i\}_{i=1}^{n(A_i)}
+ else:
+ self.observations = None
+
+ if self.feedback == "count-record" and self.dual:
+ self.bucketization()
+
+ def add_data_point(self, new_data, times=True):
+ self.approx_fit = False
+
+ if self.data is None:
+ self.load_data([new_data])
+ return
+
+ self.data.append(new_data)
+
+ # update standard form data
+ S, obs, dt = new_data
+ if obs is not None:
+
+ if times == True:
+ emb = self.packing.embed(obs) * dt
+ else:
+ emb = self.packing.embed(obs)
+
+ phi = self.packing.integral(S).view(1, -1) * dt
+
+ count = torch.tensor([emb.size()[0]])
+
+ if self.observations is not None:
+ self.observations = torch.cat((self.observations, emb), dim=0)
+ # self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() ))
+ else:
+ self.observations = emb
+ # self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double()
+
+ if self.dual == True:
+
+ dist_matrix = torch.cdist(obs, self.anchor_points, p=2)
+ for k in range(obs.size()[0]):
+ index = torch.argmin(dist_matrix[k, :])
+ self.anchor_weights[index] += 1.0
+ else:
+ count = torch.tensor([0])
+ phi = self.packing.integral(S).view(1, -1) * dt
+
+ self.phis = torch.cat((self.phis, phi), dim=0)
+ self.counts = torch.cat((self.counts, count))
+
+ if self.feedback == "count-record":
+
+ for index, elementary in enumerate(self.basic_sets):
+
+ if S.inside(elementary) == True:
+ if obs is not None:
+ mask = elementary.is_inside(obs)
+ self.total_bucketized_obs[index] += float(obs[mask].size()[0])
+ else:
+ self.total_bucketized_obs[index] += 0.0
+
+ self.bucketized_counts[index] += 1
+ self.total_bucketized_time[index] += dt
+
+ def get_m(self):
+ return self.packing.get_m()
+
+ def mean_rate(self, S, n=128):
+ xtest = S.return_discretization(n)
+ if self.rate is not None:
+ return self.packing.embed(xtest) @ self.rate.view(-1, 1)
+ else:
+ return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b
+
+ def mean_rate_points(self, xtest):
+ if self.rate is not None:
+ return self.packing.embed(xtest) @ self.rate.view(-1, 1)
+ else:
+ return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b
+
+ def mean_set(self, S, dt=1):
+ phi = self.packing.integral(S) * dt
+ map = phi @ self.rate.view(-1, 1)
+ return map
+
+ def rate_value(self, x, dt=1):
+ phi = self.packing.embed(x) * dt
+
+ if self.rate is not None:
+ map = phi @ self.rate.view(-1, 1)
+ else:
+ print("Rate function not fitted!")
+ map = 0 * phi[:, 0].view(-1, 1) + self.b
+
+ return map
+
+ def sample_value(self, S):
+ """
+ Given a pre-sampled value evaluate certain portions of the domain S
+ :param S:
+ :return:
+ """
+ return self.packing.integral(S) @ self.sampled_theta
+
+ def sample_path(self, S, n=128):
+ xtest = S.return_discretization(n)
+ return self.packing.embed(xtest) @ self.sampled_theta
+
+ def sample_path_points(self, xtest):
+ return self.packing.embed(xtest) @ self.sampled_theta.view(-1, 1)
+
+ def get_observations(self):
+ if self.data is not None:
+ points = []
+ for datapoint in self.data:
+ if datapoint[1] is not None:
+ points.append(datapoint[1])
+ if len(points) > 0:
+ return torch.vstack(points)
+ else:
+ return None
+ else:
+ return None
+
+ def fit(self):
+ raise NotImplementedError()
diff --git a/stpy/point_processes/seasonal_point_process.py b/stpy/point_processes/seasonal_point_process.py
index 3590e71..c75cf55 100644
--- a/stpy/point_processes/seasonal_point_process.py
+++ b/stpy/point_processes/seasonal_point_process.py
@@ -6,77 +6,98 @@
class SeasonalPoissonPointProcess(PoissonPointProcess):
- def __init__(self, *args, seasonality=lambda t: 1., **kwargs):
- self.seasonality = seasonality
-
- def rate_default(self, x, t, dt=1.):
- return (self.B * torch.sum(torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1).view(-1,
- 1) + self.b) * dt
-
- def rate_volume(self, S, t, dt=1, rate=None):
- if self.rate_volume_f is None:
- # integrate rate numerically over S
- import scipy.integrate as integrate
- if rate is None:
- rate = self.rate
- else:
- rate = rate
- integral = 0
- if self.d == 1:
- # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1))
- integral, _ = integrate.quad(lambda x: rate(torch.Tensor([x]).view(1, 1), t).numpy(),
- float(S.bounds[0, 0]), float(S.bounds[0, 1]))
- elif self.d == 2:
- integrand = lambda x, y: rate(torch.Tensor([x, y], t).view(1, 2).double()).numpy()
- integral, _ = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]),
- lambda x: float(S.bounds[1, 0]), lambda x: float(S.bounds[1, 1]))
-
- return integral * dt
- else:
- return self.rate_volume_f(S) * dt
-
- def sample(self, S, t, dt=1., verbose=False, rate=None):
- """
-
- :param S: set where it should be sampled
- :return:
- """
- if self.exact == True:
- return self.sample_discretized(S, t, dt=dt)
- else:
-
- lam = self.rate_volume(S, t, dt)
- n = np.random.poisson(lam=lam)
- new_sample = []
- vol = S.volume()
- size = 0
-
- alpha = 1. / lam
-
- while size < n:
- # uniform sample g(s) = 1/vol(S)
- sample = S.uniform_sample(1)
-
- t = self.rate(sample, t) / (alpha * lam)
- p = np.random.uniform(0, 1)
- if p < t:
- new_sample.append(sample.view(1, -1))
- size = size + 1
-
- if len(new_sample) > 1:
- x = torch.cat(new_sample, dim=0)
- else:
- return None
- return x
-
- def sample_discretized(self, S, t, dt, n=50):
- lam = float(self.rate_volume(S, t, dt))
- count = np.random.poisson(lam=lam)
- if count > 0:
- x = S.return_discretization(n)
- r = self.rate(x, t) * dt
- sample = torch.from_numpy(
- np.random.choice(np.arange(0, x.size()[0], 1), size=count, p=(r / torch.sum(r)).numpy().reshape(-1)))
- return x[sample, :]
- else:
- return None
+ def __init__(self, *args, seasonality=lambda t: 1.0, **kwargs):
+ self.seasonality = seasonality
+
+ def rate_default(self, x, t, dt=1.0):
+ return (
+ self.B
+ * torch.sum(
+ torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1
+ ).view(-1, 1)
+ + self.b
+ ) * dt
+
+ def rate_volume(self, S, t, dt=1, rate=None):
+ if self.rate_volume_f is None:
+ # integrate rate numerically over S
+ import scipy.integrate as integrate
+
+ if rate is None:
+ rate = self.rate
+ else:
+ rate = rate
+ integral = 0
+ if self.d == 1:
+ # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1))
+ integral, _ = integrate.quad(
+ lambda x: rate(torch.tensor([x]).view(1, 1), t).numpy(),
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ )
+ elif self.d == 2:
+ integrand = lambda x, y: rate(
+ torch.tensor([x, y], t).view(1, 2).double()
+ ).numpy()
+ integral, _ = integrate.dblquad(
+ integrand,
+ float(S.bounds[0, 0]),
+ float(S.bounds[0, 1]),
+ lambda x: float(S.bounds[1, 0]),
+ lambda x: float(S.bounds[1, 1]),
+ )
+
+ return integral * dt
+ else:
+ return self.rate_volume_f(S) * dt
+
+ def sample(self, S, t, dt=1.0, verbose=False, rate=None):
+ """
+
+ :param S: set where it should be sampled
+ :return:
+ """
+ if self.exact == True:
+ return self.sample_discretized(S, t, dt=dt)
+ else:
+
+ lam = self.rate_volume(S, t, dt)
+ n = np.random.poisson(lam=lam)
+ new_sample = []
+ vol = S.volume()
+ size = 0
+
+ alpha = 1.0 / lam
+
+ while size < n:
+ # uniform sample g(s) = 1/vol(S)
+ sample = S.uniform_sample(1)
+
+ t = self.rate(sample, t) / (alpha * lam)
+ p = np.random.uniform(0, 1)
+ if p < t:
+ new_sample.append(sample.view(1, -1))
+ size = size + 1
+
+ if len(new_sample) > 1:
+ x = torch.cat(new_sample, dim=0)
+ else:
+ return None
+ return x
+
+ def sample_discretized(self, S, t, dt, n=50):
+ lam = float(self.rate_volume(S, t, dt))
+ count = np.random.poisson(lam=lam)
+ if count > 0:
+ x = S.return_discretization(n)
+ r = self.rate(x, t) * dt
+ sample = torch.from_numpy(
+ np.random.choice(
+ np.arange(0, x.size()[0], 1),
+ size=count,
+ p=(r / torch.sum(r)).numpy().reshape(-1),
+ )
+ )
+ return x[sample, :]
+ else:
+ return None
diff --git a/stpy/probability/bernoulli_likelihood.py b/stpy/probability/bernoulli_likelihood.py
index 2630337..14148e4 100644
--- a/stpy/probability/bernoulli_likelihood.py
+++ b/stpy/probability/bernoulli_likelihood.py
@@ -6,79 +6,103 @@
from stpy.probability.gaussian_likelihood import GaussianLikelihood
import scipy
+
class BernoulliLikelihoodCanonical(GaussianLikelihood):
def __init__(self):
super().__init__()
- def evaluate_datapoint(self, theta, d, mask = None):
+ def evaluate_datapoint(self, theta, d, mask=None):
if mask is None:
- mask = 1.
+ mask = 1.0
x, y = d
- r = -y*(x@theta) + torch.log(1+torch.exp(x@theta))
+ r = -y * (x @ theta) + torch.log(1 + torch.exp(x @ theta))
r = r * mask
return r
def link(self, s):
- return 1./(1.+ torch.exp(-s))
+ return 1.0 / (1.0 + torch.exp(-s))
- def scale(self, mask = None):
- return 1.
+ def scale(self, mask=None):
+ return 1.0
- def get_objective_cvxpy(self, mask = None):
+ def get_objective_cvxpy(self, mask=None):
if mask is None:
+
def likelihood(theta):
- return -self.y.T@(self.x @ theta) + cp.sum(cp.logistic(self.x @ theta))
+ return -self.y.T @ (self.x @ theta) + cp.sum(
+ cp.logistic(self.x @ theta)
+ )
+
else:
+
def likelihood(theta):
- if torch.sum(mask.double())>1e-8:
- return -(mask*self.y)@(self.x @ theta) + mask @ cp.logistic(self.x @ theta)
+ if torch.sum(mask.double()) > 1e-8:
+ return -(mask * self.y) @ (self.x @ theta) + mask @ cp.logistic(
+ self.x @ theta
+ )
else:
- return cp.sum(theta*0)
+ return cp.sum(theta * 0)
+
return likelihood
def lipschitz_constant(self, b):
return np.exp(b)
- def get_confidence_set_cvxpy(self,
- theta: cp.Variable,
- type: Union[str, None] = None,
- params: Dict = {},
- delta: float = 0.1):
+ def get_confidence_set_cvxpy(
+ self,
+ theta: cp.Variable,
+ type: Union[str, None] = None,
+ params: Dict = {},
+ delta: float = 0.1,
+ ):
if self.fitted == True:
return self.set_fn(theta)
- theta_fit = params['estimate']
- H = params['regularizer_hessian']
+ theta_fit = params["estimate"]
+ H = params["regularizer_hessian"]
lam = torch.max(torch.linalg.eigvalsh(H))
- B = params['bound']
- d_eff = params['d_eff']
+ B = params["bound"]
+ d_eff = params["d_eff"]
- if type in ['faubry']:
- D = torch.diag(1./(self.x @ theta_fit).view(-1))
+ if type in ["faubry"]:
+ D = torch.diag(1.0 / (self.x @ theta_fit).view(-1))
V = self.x.T @ D @ self.x + H
- beta = np.sqrt(lam*B) / 2. + 2. / np.sqrt(lam*B) * (torch.logdet(V) - torch.logdet(H)) + 2 / np.sqrt(
- lam*B) * np.log(1 / delta) * d_eff
+ beta = (
+ np.sqrt(lam * B) / 2.0
+ + 2.0 / np.sqrt(lam * B) * (torch.logdet(V) - torch.logdet(H))
+ + 2 / np.sqrt(lam * B) * np.log(1 / delta) * d_eff
+ )
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
- self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta]
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
- elif type in ['laplace']:
- sigma = 1./4.
+ elif type in ["laplace"]:
+ sigma = 1.0 / 4.0
V = self.x.T @ self.x / sigma**2 + H
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
- beta = 2. * self.lipschitz_constant(B)
- self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta]
+ beta = 2.0 * self.lipschitz_constant(B)
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
elif type in ["adaptive-AB"]:
- sigma = 1./4.
+ sigma = 1.0 / 4.0
V = self.x.T @ self.x / sigma**2 + H
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
- beta = 2 * np.log(1. / delta) + (torch.logdet(V + H) - torch.logdet(H)) + lam * B
- self.set_fn = lambda theta: [cp.sum_squares(L@(theta - theta_fit)) <= beta]
+ beta = (
+ 2 * np.log(1.0 / delta)
+ + (torch.logdet(V + H) - torch.logdet(H))
+ + lam * B
+ )
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
elif type == "LR":
@@ -86,7 +110,9 @@ def get_confidence_set_cvxpy(self,
set = self.lr_confidence_set_cvxpy(theta, beta, params)
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
self.set = set
self.fitted = True
@@ -94,35 +120,43 @@ def get_confidence_set_cvxpy(self,
return set
def information_matrix(self):
- V = self.x.T@self.x/self.sigma
+ V = self.x.T @ self.x / self.sigma
return V
- def confidence_parameter(self, delta, params, type = None):
- H = params['regularizer_hessian']
+ def confidence_parameter(self, delta, params, type=None):
+ H = params["regularizer_hessian"]
lam = torch.max(torch.linalg.eigvalsh(H))
- B = params['bound']
- d_eff = params['d_eff']
+ B = params["bound"]
+ d_eff = params["d_eff"]
if type is None or type == "none" or type == "laplace":
# this is a common heuristic
- beta = 2.0
+ beta = 2.0
elif type == "adaptive-AB":
- sigma = 1./4.
- V = self.x.T @ self.x / sigma ** 2 + H
- beta = 2 * np.log(1. / delta) + (torch.logdet(V + H) - torch.logdet(H)) + lam * B
+ sigma = 1.0 / 4.0
+ V = self.x.T @ self.x / sigma**2 + H
+ beta = (
+ 2 * np.log(1.0 / delta)
+ + (torch.logdet(V + H) - torch.logdet(H))
+ + lam * B
+ )
elif type == "LR":
# this is based on sequential LR test
beta = self.confidence_parameter_likelihood_ratio(delta, params)
elif type == "Faubry":
- H = params['regularizer_hessian']
- lam = H[0., 0]
- theta_fit = params['estimate']
- D = torch.diag(1./(self.x @ theta_fit).view(-1))
+ H = params["regularizer_hessian"]
+ lam = H[0.0, 0]
+ theta_fit = params["estimate"]
+ D = torch.diag(1.0 / (self.x @ theta_fit).view(-1))
V = self.x.T @ D @ self.x + H
- beta = np.sqrt(lam)/2. + 2./np.sqrt(lam)*(torch.logdet(V) - torch.logdet(H)) + 2/np.sqrt(lam)* np.log(1/delta)*d_eff
+ beta = (
+ np.sqrt(lam) / 2.0
+ + 2.0 / np.sqrt(lam) * (torch.logdet(V) - torch.logdet(H))
+ + 2 / np.sqrt(lam) * np.log(1 / delta) * d_eff
+ )
else:
raise NotImplementedError("Not implemented")
return beta
diff --git a/stpy/probability/gaussian_likelihood.py b/stpy/probability/gaussian_likelihood.py
index bdbf2fe..53a66e7 100644
--- a/stpy/probability/gaussian_likelihood.py
+++ b/stpy/probability/gaussian_likelihood.py
@@ -5,24 +5,27 @@
from stpy.probability.likelihood import Likelihood
import scipy
+
class GaussianLikelihood(Likelihood):
- def __init__(self, sigma = 0.1, Sigma=None):
+ def __init__(self, sigma=0.1, Sigma=None):
super().__init__()
self.sigma = sigma
self.Sigma = Sigma
- def scale(self, err = None, bound = None):
+ def scale(self, err=None, bound=None):
if self.Sigma is None:
return self.sigma**2
else:
- return torch.max(self.Sigma.T@self.Sigma)
+ return torch.max(self.Sigma.T @ self.Sigma)
def evaluate_log(self, f):
if self.Sigma is None:
- res = torch.sum((f - self.y)**2)/self.sigma**2
+ res = torch.sum((f - self.y) ** 2) / self.sigma**2
else:
- res = ((f - self.y).T @ torch.inverse(self.Sigma.T@self.Sigma) @ (f - self.y) )
+ res = (
+ (f - self.y).T @ torch.inverse(self.Sigma.T @ self.Sigma) @ (f - self.y)
+ )
return res
def load_data(self, D):
@@ -30,81 +33,119 @@ def load_data(self, D):
self.fitted = False
def add_data_point(self, d):
- x,y = d
- self.x = torch.vstack(self.x,x)
- self.y = torch.vstack(self.y,y)
+ x, y = d
+ self.x = torch.vstack(self.x, x)
+ self.y = torch.vstack(self.y, y)
self.fitted = False
- def evaluate_datapoint(self, theta, d, mask = None):
- x,y = d
+ def evaluate_datapoint(self, theta, d, mask=None):
+ x, y = d
if mask is None:
- mask = 1.
+ mask = 1.0
if self.Sigma is None:
- return mask*((x @ theta - y) ** 2)/ (2*self.sigma ** 2)
+ return mask * ((x @ theta - y) ** 2) / (2 * self.sigma**2)
else:
- return mask*(x @ theta - y).T @ torch.linalg.inv(self.Sigma.T @ self.Sigma) @ (
- x @ theta - y)
+ return (
+ mask
+ * (x @ theta - y).T
+ @ torch.linalg.inv(self.Sigma.T @ self.Sigma)
+ @ (x @ theta - y)
+ )
def normalization(self, d):
- return 1./np.sqrt(2.*np.pi*self.sigma**2)
+ return 1.0 / np.sqrt(2.0 * np.pi * self.sigma**2)
def get_objective_torch(self):
if self.Sigma is None:
- def likelihood(theta): return torch.sum((self.x@theta - self.y)**2)/(2*self.sigma**2)
+
+ def likelihood(theta):
+ return torch.sum((self.x @ theta - self.y) ** 2) / (2 * self.sigma**2)
else:
- def likelihood(theta): return (self.x@theta - self.y).T@torch.linalg.inv(self.Sigma.T@self.Sigma*2)@(self.x@theta - self.y)
+
+ def likelihood(theta):
+ return (
+ (self.x @ theta - self.y).T
+ @ torch.linalg.inv(self.Sigma.T @ self.Sigma * 2)
+ @ (self.x @ theta - self.y)
+ )
+
return likelihood
- def get_objective_cvxpy(self, mask = None):
+ def get_objective_cvxpy(self, mask=None):
if mask is None:
if self.Sigma is None:
- def likelihood(theta): return cp.sum_squares(self.x@theta - self.y)/(2*self.sigma**2)
+
+ def likelihood(theta):
+ return cp.sum_squares(self.x @ theta - self.y) / (2 * self.sigma**2)
else:
- def likelihood(theta): return cp.matrix_frac(self.x@theta - self.y,2*self.Sigma.T@self.Sigma)
+
+ def likelihood(theta):
+ return cp.matrix_frac(
+ self.x @ theta - self.y, 2 * self.Sigma.T @ self.Sigma
+ )
+
else:
if self.Sigma is None:
+
def likelihood(theta):
- if torch.sum(mask.int())>1e-8:
- return cp.sum_squares(cp.multiply(mask.double().view(-1,1),(self.x @ theta - self.y)) )/ (2*self.sigma ** 2)
+ if torch.sum(mask.int()) > 1e-8:
+ return cp.sum_squares(
+ cp.multiply(
+ mask.double().view(-1, 1), (self.x @ theta - self.y)
+ )
+ ) / (2 * self.sigma**2)
else:
- return cp.sum(theta*0)
+ return cp.sum(theta * 0)
else:
+
def likelihood(theta):
- if torch.sum(mask.int())>1e-8:
- return cp.matrix_frac(cp.multiply(mask.double().view(-1,1),(self.x @ theta - self.y)), 2*self.Sigma.T @ self.Sigma)
+ if torch.sum(mask.int()) > 1e-8:
+ return cp.matrix_frac(
+ cp.multiply(
+ mask.double().view(-1, 1), (self.x @ theta - self.y)
+ ),
+ 2 * self.Sigma.T @ self.Sigma,
+ )
else:
- return cp.sum(theta*0)
+ return cp.sum(theta * 0)
+
return likelihood
- def information_matrix(self, mask = None):
+ def information_matrix(self, mask=None):
if mask is None:
if self.Sigma is None:
- V = self.x.T@self.x/(2*self.sigma**2)
+ V = self.x.T @ self.x / (2 * self.sigma**2)
else:
- V = self.x.T@torch.linalg.inv(self.Sigma.T@self.Sigma*2)@self.x
+ V = self.x.T @ torch.linalg.inv(self.Sigma.T @ self.Sigma * 2) @ self.x
return V
else:
if self.Sigma is None:
- V = self.x[mask,:].T@self.x[mask,:]/(2*self.sigma**2)
+ V = self.x[mask, :].T @ self.x[mask, :] / (2 * self.sigma**2)
else:
- V = self.x[mask,:].T@torch.linalg.inv(self.Sigma.T@self.Sigma*2)@self.x[mask,:]
+ V = (
+ self.x[mask, :].T
+ @ torch.linalg.inv(self.Sigma.T @ self.Sigma * 2)
+ @ self.x[mask, :]
+ )
return V
- def get_confidence_set_cvxpy(self,
- theta: cp.Variable,
- type: Union[str,None] = None,
- params: Dict = {},
- delta: float = 0.1):
+ def get_confidence_set_cvxpy(
+ self,
+ theta: cp.Variable,
+ type: Union[str, None] = None,
+ params: Dict = {},
+ delta: float = 0.1,
+ ):
if self.fitted == True:
return self.set_fn(theta)
- theta_fit = params['estimate']
- H = params['regularizer_hessian']
+ theta_fit = params["estimate"]
+ H = params["regularizer_hessian"]
if H is not None:
V = self.information_matrix() + H
@@ -112,23 +153,29 @@ def get_confidence_set_cvxpy(self,
V = self.information_matrix()
if type in ["none", None, "fixed"]:
-# L = torch.linalg.cholesky(V).double()
+ # L = torch.linalg.cholesky(V).double()
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
beta = self.confidence_parameter(delta, params, type=type)
- self.set_fn = lambda theta: [cp.sum_squares(L@(theta - theta_fit)) <= beta]
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
elif type in ["adaptive-AB"]:
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
beta = self.confidence_parameter(delta, params, type=type)
- self.set_fn = lambda theta: [cp.sum_squares(L@(theta - theta_fit)) <= beta]
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
elif type in ["adaptive-optimized"]:
beta = self.confidence_parameter(delta, params, type=type)
sqrtV = scipy.linalg.sqrtm(V)
- L = torch.linalg.cholesky(V+sqrtV).double()
- self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta]
+ L = torch.linalg.cholesky(V + sqrtV).double()
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
elif type == "LR":
beta = self.confidence_parameter_likelihood_ratio(delta, params)
@@ -139,20 +186,22 @@ def get_confidence_set_cvxpy(self,
beta = self.confidence_parameter_prior_posterior(delta, params)
set = self.prior_posterior_lr_confidence_set_cvxpy(theta, beta, params)
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
- print (type, "USING BETA: ", beta)
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
+ print(type, "USING BETA: ", beta)
self.set = set
self.fitted = True
return set
- def confidence_parameter(self, delta, params, type = None):
- print (type)
+ def confidence_parameter(self, delta, params, type=None):
+ print(type)
if type is None or type == "none":
# this is a common heuristic
- beta = 2.0 * np.log(1/delta)
+ beta = 2.0 * np.log(1 / delta)
# elif type == "LR" or type == "LR-vovk":
# # this is based on sequential LR test
@@ -160,26 +209,37 @@ def confidence_parameter(self, delta, params, type = None):
# beta = self.confidence_parameter_likelihood_ratio(delta, params)
else:
- if 'd_eff' in params.keys():
+ if "d_eff" in params.keys():
n = self.x.size()[0]
- d = params['d_eff']
+ d = params["d_eff"]
else:
- d = params['m']
+ d = params["m"]
- B = params['bound']
- H = params['regularizer_hessian']
+ B = params["bound"]
+ H = params["regularizer_hessian"]
lam = torch.max(torch.linalg.eigvalsh(H))
if type == "fixed":
# this is fixed design
- beta = d + 2 * np.log(1 / delta) + 2 * np.sqrt(d * np.log(1 / delta)) + lam*B
+ beta = (
+ d
+ + 2 * np.log(1 / delta)
+ + 2 * np.sqrt(d * np.log(1 / delta))
+ + lam * B
+ )
elif type == "adaptive-AB":
- print ("calculating: adaptive-AB")
+ print("calculating: adaptive-AB")
# this takes the pseudo-maximization with a fixed mixture
V = self.information_matrix()
- beta = 2*np.log(1./delta) + (torch.logdet(V+H) - torch.logdet(H)) + lam*B
+ beta = (
+ 2 * np.log(1.0 / delta)
+ + (torch.logdet(V + H) - torch.logdet(H))
+ + lam * B
+ )
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
- return beta
\ No newline at end of file
+ return beta
diff --git a/stpy/probability/huber_likelihood.py b/stpy/probability/huber_likelihood.py
index f9321c7..66f7180 100644
--- a/stpy/probability/huber_likelihood.py
+++ b/stpy/probability/huber_likelihood.py
@@ -8,7 +8,7 @@
class HuberLikelihood(GaussianLikelihood):
- def __init__(self, sigma=0.1, M=1.):
+ def __init__(self, sigma=0.1, M=1.0):
super().__init__()
self.sigma = sigma
self.M = M
@@ -16,23 +16,23 @@ def __init__(self, sigma=0.1, M=1.):
def evaluate_log(self, f):
pass
- def scale(self, err = None):
+ def scale(self, err=None):
if self.Sigma is None:
return self.sigma**2
else:
- return torch.max(self.Sigma.T@self.Sigma)
+ return torch.max(self.Sigma.T @ self.Sigma)
- def evaluate_datapoint(self, theta, d, mask = None):
+ def evaluate_datapoint(self, theta, d, mask=None):
if mask is None:
- mask = 1.
+ mask = 1.0
x, y = d
res = (x @ theta - y) / self.sigma
mask1 = torch.abs(res) < self.M
mask2 = torch.abs(res) >= self.M
v = res
v[mask1] = res[mask1] ** 2
- v[mask2] = 2 * self.M * torch.abs(res[mask2]) - self.M ** 2
- return torch.sum(v)*mask
+ v[mask2] = 2 * self.M * torch.abs(res[mask2]) - self.M**2
+ return torch.sum(v) * mask
def add_data_point(self, d):
x, y = d
@@ -46,20 +46,27 @@ def load_data(self, D):
def get_objective_cvxpy(self, mask=None):
if mask is None:
+
def likelihood(theta):
return cp.sum(cp.huber((self.x @ theta - self.y) / self.sigma))
+
else:
+
def likelihood(theta):
if torch.sum(mask.int()) > 0:
- return cp.sum(cp.huber((self.x[mask, :] @ theta - self.y[mask, :]) / self.sigma))
+ return cp.sum(
+ cp.huber(
+ (self.x[mask, :] @ theta - self.y[mask, :]) / self.sigma
+ )
+ )
else:
return cp.sum(theta * 0)
+
return likelihood
def information_matrix(self):
V = self.x.T @ self.x / self.sigma
return V
-
def get_objective_torch(self):
raise NotImplementedError("Implement me please.")
diff --git a/stpy/probability/laplace_likelihood.py b/stpy/probability/laplace_likelihood.py
index 732c82c..afb4912 100644
--- a/stpy/probability/laplace_likelihood.py
+++ b/stpy/probability/laplace_likelihood.py
@@ -6,62 +6,76 @@
from stpy.probability.likelihood import Likelihood
from stpy.probability.gaussian_likelihood import GaussianLikelihood
+
class LaplaceLikelihood(GaussianLikelihood):
- def __init__(self, b = 0.1):
+ def __init__(self, b=0.1):
super().__init__()
self.b = b
- def scale(self, err = None, bound = None):
+ def scale(self, err=None, bound=None):
return self.b
def evaluate_log(self, f):
- res = torch.sum(torch.abs(f - self.y))/self.b
+ res = torch.sum(torch.abs(f - self.y)) / self.b
return res
- def evaluate_datapoint(self, theta, d, mask = None):
+ def evaluate_datapoint(self, theta, d, mask=None):
if mask is None:
- mask = 1.
+ mask = 1.0
x, y = d
- return mask* (torch.abs(x @ theta - y)) / self.b
+ return mask * (torch.abs(x @ theta - y)) / self.b
- def get_objective_cvxpy(self, mask = None):
+ def get_objective_cvxpy(self, mask=None):
if mask is None:
- def likelihood(theta): return cp.sum(cp.abs(self.x@theta - self.y)/self.b)
+
+ def likelihood(theta):
+ return cp.sum(cp.abs(self.x @ theta - self.y) / self.b)
+
else:
+
def likelihood(theta):
- if torch.sum(mask.int())>0:
- return cp.sum(cp.abs(self.x[mask,:]@theta - self.y[mask,:])/self.b)
+ if torch.sum(mask.int()) > 0:
+ return cp.sum(
+ cp.abs(self.x[mask, :] @ theta - self.y[mask, :]) / self.b
+ )
else:
- return cp.sum(theta*0)
+ return cp.sum(theta * 0)
+
return likelihood
- def get_confidence_set_cvxpy(self,
- theta: cp.Variable,
- type: Union[str, None] = None,
- params: Dict = {},
- delta: float = 0.1):
+ def get_confidence_set_cvxpy(
+ self,
+ theta: cp.Variable,
+ type: Union[str, None] = None,
+ params: Dict = {},
+ delta: float = 0.1,
+ ):
if self.fitted == True:
return self.set_fn(theta)
- theta_fit = params['estimate']
- H = params['regularizer_hessian']
+ theta_fit = params["estimate"]
+ H = params["regularizer_hessian"]
if H is not None:
V = self.information_matrix() + H
else:
V = self.information_matrix()
- if type in ["none","sub-exp"]:
+ if type in ["none", "sub-exp"]:
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
beta = self.confidence_parameter(delta, params, type=type)
- self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta]
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
elif type == "adaptive-AB":
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
beta = self.confidence_parameter(delta, params, type=type)
- self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta]
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
elif type == "LR":
@@ -69,7 +83,9 @@ def get_confidence_set_cvxpy(self,
set = self.lr_confidence_set_cvxpy(theta, beta, params)
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
print(type, "USING BETA: ", beta)
self.set = set
@@ -78,49 +94,62 @@ def get_confidence_set_cvxpy(self,
return set
def information_matrix(self):
- V = self.x.T@self.x/(2*self.b)**2
+ V = self.x.T @ self.x / (2 * self.b) ** 2
return V
-
- def get_objective_torch(self, mask = None):
+ def get_objective_torch(self, mask=None):
if mask is None:
- def likelihood(theta): return torch.sum(torch.abs(self.x@theta - self.y)/self.sigma)
+
+ def likelihood(theta):
+ return torch.sum(torch.abs(self.x @ theta - self.y) / self.sigma)
+
else:
+
def likelihood(theta):
- if torch.sum(mask.int())>0:
- return torch.sum(torch.abs(self.x[mask,:]@theta - self.y[mask,:])/self.sigma)
+ if torch.sum(mask.int()) > 0:
+ return torch.sum(
+ torch.abs(self.x[mask, :] @ theta - self.y[mask, :])
+ / self.sigma
+ )
else:
- return torch.sum(theta*0)
- return likelihood
-
+ return torch.sum(theta * 0)
+ return likelihood
- def confidence_parameter(self, delta, params, type = None):
- print (type)
+ def confidence_parameter(self, delta, params, type=None):
+ print(type)
if type is None or type == "none":
- beta = 2.0 * np.log(1/delta)
+ beta = 2.0 * np.log(1 / delta)
else:
- if 'd_eff' in params.keys():
+ if "d_eff" in params.keys():
n = self.x.size()[0]
- d = params['d_eff']
+ d = params["d_eff"]
else:
- d = params['m']
+ d = params["m"]
- B = params['bound']
- H = params['regularizer_hessian']
+ B = params["bound"]
+ H = params["regularizer_hessian"]
lam = torch.max(torch.linalg.eigvalsh(H))
if type == "sub-exp":
# this takes the pseudo-maximization with a fixed mixture
V = self.information_matrix()
- L = 1.
+ L = 1.0
size = V.size()[0]
- beta = (lam*(B + self.b/L) + L/(self.b*np.sqrt(lam))*(d*np.log(2)+np.log(1./delta)+0.5*torch.slogdet(V*lam+torch.eye(size))[1]))
+ beta = lam * (B + self.b / L) + L / (self.b * np.sqrt(lam)) * (
+ d * np.log(2)
+ + np.log(1.0 / delta)
+ + 0.5 * torch.slogdet(V * lam + torch.eye(size))[1]
+ )
elif type == "adaptive-AB":
V = self.information_matrix()
- beta = 2*np.log(1./delta) + (torch.logdet(V+H) - torch.logdet(H)) + lam*B
+ beta = (
+ 2 * np.log(1.0 / delta)
+ + (torch.logdet(V + H) - torch.logdet(H))
+ + lam * B
+ )
else:
raise NotImplementedError("given confidence sets are not implemented.")
- return beta
\ No newline at end of file
+ return beta
diff --git a/stpy/probability/likelihood.py b/stpy/probability/likelihood.py
index c950cc0..939cd12 100644
--- a/stpy/probability/likelihood.py
+++ b/stpy/probability/likelihood.py
@@ -3,6 +3,7 @@
import numpy as np
import torch
+
class Likelihood(ABC):
def __init__(self):
@@ -14,7 +15,7 @@ def evaluate_log(self, f):
pass
@abstractmethod
- def scale(self, err = None, bound = None):
+ def scale(self, err=None, bound=None):
return
@abstractmethod
@@ -22,7 +23,7 @@ def normalization(self, d):
return
@abstractmethod
- def evaluate_datapoint(self, f, d, mask = None):
+ def evaluate_datapoint(self, f, d, mask=None):
pass
@abstractmethod
@@ -33,20 +34,18 @@ def get_confidence_set_cvxpy(self, theta, type, params, delta):
def information_matrix(self, theta_fit):
pass
-
@abstractmethod
- def get_objective_cvxpy(self, mask = None):
+ def get_objective_cvxpy(self, mask=None):
pass
@abstractmethod
def get_objective_torch(self):
pass
-
def add_data_point(self, d):
- x,y = d
- self.x = torch.vstack(self.x,x)
- self.y = torch.vstack(self.y,y)
+ x, y = d
+ self.x = torch.vstack(self.x, x)
+ self.y = torch.vstack(self.y, y)
self.fitted = False
def load_data(self, D):
@@ -60,18 +59,18 @@ def confidence_parameter_likelihood_ratio(self, delta, params):
:param params:
:return:
"""
- evidence = params['evidence']
- estimators = params['estimator_sequence']
+ evidence = params["evidence"]
+ estimators = params["estimator_sequence"]
- val = 0.
- for i in range(len(estimators)-1):
+ val = 0.0
+ for i in range(len(estimators) - 1):
ev = evidence[i]
est = estimators[i]
if est is not None:
- xx = self.x[i,:].view(1,-1)
- yy = self.y[i,:].view(1,-1)
- val += self.evaluate_datapoint(est, (xx, yy), mask = ev)
- val = np.log(1/delta) + val
+ xx = self.x[i, :].view(1, -1)
+ yy = self.y[i, :].view(1, -1)
+ val += self.evaluate_datapoint(est, (xx, yy), mask=ev)
+ val = np.log(1 / delta) + val
return val
def lr_confidence_set_cvxpy(self, theta, beta, params):
@@ -82,20 +81,24 @@ def lr_confidence_set_cvxpy(self, theta, beta, params):
:param params:
:return:
"""
- evidence = torch.Tensor(params['evidence']).bool()
- self.set_fn = lambda theta: [self.get_objective_cvxpy(mask = evidence)(theta) <= beta]
+ evidence = torch.tensor(params["evidence"]).bool()
+ self.set_fn = lambda theta: [
+ self.get_objective_cvxpy(mask=evidence)(theta) <= beta
+ ]
set = self.set_fn(theta)
return set
-
- def confidence_parameter_prior_posterior(self, delta,params):
- H = params['regularizer_hessian']
- sigma = params['sigma']
+ def confidence_parameter_prior_posterior(self, delta, params):
+ H = params["regularizer_hessian"]
+ sigma = params["sigma"]
n = self.x.size()[0]
- K = (self.x@self.x.T + torch.max(H)*sigma**2*torch.eye(n))
- evidence_of_the_data = -0.5*self.y.T@torch.linalg.solve(K,self.y)-0.5*torch.linalg.slogdet(K)[1]#-(n/2)*np.log(2*np.pi) ## remove this as in likelihood not added
- evidence_of_the_data = evidence_of_the_data #- np.log(2*np.pi*sigma**2)
- return np.log(1./delta) - evidence_of_the_data
+ K = self.x @ self.x.T + torch.max(H) * sigma**2 * torch.eye(n)
+ evidence_of_the_data = (
+ -0.5 * self.y.T @ torch.linalg.solve(K, self.y)
+ - 0.5 * torch.linalg.slogdet(K)[1]
+ ) # -(n/2)*np.log(2*np.pi) ## remove this as in likelihood not added
+ evidence_of_the_data = evidence_of_the_data # - np.log(2*np.pi*sigma**2)
+ return np.log(1.0 / delta) - evidence_of_the_data
def prior_posterior_lr_confidence_set_cvxpy(self, theta, beta, params):
"""
@@ -106,11 +109,11 @@ def prior_posterior_lr_confidence_set_cvxpy(self, theta, beta, params):
:return:
"""
# create a Gaussian likelihood
- sigma = params['sigma']
- def gauss_likelihood(theta): return cp.sum_squares(self.x @ theta - self.y) / (2 * sigma ** 2)
- self.set_fn = lambda theta: [gauss_likelihood(theta)<= beta]
- set = self.set_fn(theta)
- return set
-
+ sigma = params["sigma"]
+ def gauss_likelihood(theta):
+ return cp.sum_squares(self.x @ theta - self.y) / (2 * sigma**2)
+ self.set_fn = lambda theta: [gauss_likelihood(theta) <= beta]
+ set = self.set_fn(theta)
+ return set
diff --git a/stpy/probability/noise_models.py b/stpy/probability/noise_models.py
index d736646..19db7a8 100644
--- a/stpy/probability/noise_models.py
+++ b/stpy/probability/noise_models.py
@@ -9,366 +9,421 @@
class NoiseModel(ABC):
- """
- Class provides an interface to sample noise observations and evaluate their likelihood
- """
- def __init__(self):
- pass
-
- @abstractmethod
- def sample(self, xs, theta):
- pass
-
- @abstractmethod
- def sample_noise(self, xs):
- pass
-
- def joint_log_likelihood(self, ys, xs, theta: Union[np.array, cp.Variable]) -> Union[np.array, cp.Expression]:
- """ Returns the sum of the lls, i.e. the joint ll"""
- if isinstance(theta, cp.Variable):
- return cp.sum(self.log_likelihood(ys, xs, theta))
- else:
- return np.sum(self.log_likelihood(ys, xs, theta))
-
-
-
- def get_mosek_params(self, threads=4):
- if self.convex:
- return {
- mosek.iparam.num_threads: threads,
- mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
- mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
- mosek.dparam.intpnt_co_tol_rel_gap: 1e-4
- }
- else:
- raise AttributeError("Fetching mosek parameters disallowed for non-convex problems")
-
- @abstractmethod
- def convex(self) -> bool:
- pass
+ """
+ Class provides an interface to sample noise observations and evaluate their likelihood
+ """
+
+ def __init__(self):
+ pass
+
+ @abstractmethod
+ def sample(self, xs, theta):
+ pass
+
+ @abstractmethod
+ def sample_noise(self, xs):
+ pass
+
+ def joint_log_likelihood(
+ self, ys, xs, theta: Union[np.array, cp.Variable]
+ ) -> Union[np.array, cp.Expression]:
+ """Returns the sum of the lls, i.e. the joint ll"""
+ if isinstance(theta, cp.Variable):
+ return cp.sum(self.log_likelihood(ys, xs, theta))
+ else:
+ return np.sum(self.log_likelihood(ys, xs, theta))
+
+ def get_mosek_params(self, threads=4):
+ if self.convex:
+ return {
+ mosek.iparam.num_threads: threads,
+ mosek.iparam.intpnt_solve_form: mosek.solveform.primal,
+ mosek.dparam.intpnt_co_tol_pfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_dfeas: 1e-4,
+ mosek.dparam.intpnt_co_tol_rel_gap: 1e-4,
+ }
+ else:
+ raise AttributeError(
+ "Fetching mosek parameters disallowed for non-convex problems"
+ )
+
+ @abstractmethod
+ def convex(self) -> bool:
+ pass
class AdditiveHomoscedasticNoiseModel(NoiseModel):
- """
- Assume a linear model. Only thing left to implement is the eta log-likelihood in both cvxpy and numpy
+ """
+ Assume a linear model. Only thing left to implement is the eta log-likelihood in both cvxpy and numpy
- TODO discuss whether xs @ theta should be replaced by a f_noiseless type function you can pass at initialization?
- """
- @abstractmethod
- def sample_noise(self, xs):
- """ pass xs in order to know how large noise should be. Also able to deal with heteroscedastic later on """
- pass
+ TODO discuss whether xs @ theta should be replaced by a f_noiseless type function you can pass at initialization?
+ """
- def sample(self, xs, theta):
- return xs @ theta + self.sample_noise(xs)
+ @abstractmethod
+ def sample_noise(self, xs):
+ """pass xs in order to know how large noise should be. Also able to deal with heteroscedastic later on"""
+ pass
- def log_likelihood(self, ys, xs, theta): # TODO change base class
- if ys.shape[0] == 0:
- return 0. # this is to avoid problems with cvxpy variables of size 0, which it doesn't like
- if isinstance(theta, cp.Variable):
- return self.cvxpy_noise_log_likelihood(ys - (xs @ theta))
- else:
- return self.noise_log_likelihood(ys - (xs @ theta))
+ def sample(self, xs, theta):
+ return xs @ theta + self.sample_noise(xs)
+ def log_likelihood(self, ys, xs, theta): # TODO change base class
+ if ys.shape[0] == 0:
+ return 0.0 # this is to avoid problems with cvxpy variables of size 0, which it doesn't like
+ if isinstance(theta, cp.Variable):
+ return self.cvxpy_noise_log_likelihood(ys - (xs @ theta))
+ else:
+ return self.noise_log_likelihood(ys - (xs @ theta))
class PoissonNoise(NoiseModel):
- def __init__(self, lam):
- self.lam = lam
+ def __init__(self, lam):
+ self.lam = lam
- def sample_noise(self, xs):
- return torch.poisson(self.lam(xs).view(-1)).view(-1,1)
- def convex(self) -> bool:
- pass
+ def sample_noise(self, xs):
+ return torch.poisson(self.lam(xs).view(-1)).view(-1, 1)
- def sample(self, xs, theta):
- pass
+ def convex(self) -> bool:
+ pass
- def mean(self, xs):
- return self.lam(xs)
-class GaussianNoise(AdditiveHomoscedasticNoiseModel):
- def __init__(self, sigma=0.1):
- """
- :param sigma: standard deviation
- """
- super().__init__()
- self.sigma = sigma
+ def sample(self, xs, theta):
+ pass
- def sample_noise(self, xs):
- return self.sigma*np.random.normal(scale=1.0, size=(xs.shape[0], 1))
+ def mean(self, xs):
+ return self.lam(xs)
- def noise_log_likelihood(self, etas, xs=None):
- return -(0.5*((etas) ** 2))/(self.sigma ** 2) - 0.5*np.log(2*np.pi*(self.sigma**2))
- def cvxpy_noise_log_likelihood(self, etas, xs=None):
- return -0.5 * cp.square(etas) / (self.sigma ** 2) - 0.5*np.log(2 * np.pi * self.sigma ** 2)
+class GaussianNoise(AdditiveHomoscedasticNoiseModel):
+ def __init__(self, sigma=0.1):
+ """
+ :param sigma: standard deviation
+ """
+ super().__init__()
+ self.sigma = sigma
- @property
- def convex(self) -> bool:
- return True
+ def sample_noise(self, xs):
+ return self.sigma * np.random.normal(scale=1.0, size=(xs.shape[0], 1))
- def __str__(self):
- return "GaussianAdditive"
+ def noise_log_likelihood(self, etas, xs=None):
+ return -(0.5 * ((etas) ** 2)) / (self.sigma**2) - 0.5 * np.log(
+ 2 * np.pi * (self.sigma**2)
+ )
+ def cvxpy_noise_log_likelihood(self, etas, xs=None):
+ return -0.5 * cp.square(etas) / (self.sigma**2) - 0.5 * np.log(
+ 2 * np.pi * self.sigma**2
+ )
+ @property
+ def convex(self) -> bool:
+ return True
-class HuberNoise(AdditiveHomoscedasticNoiseModel):
- def __init__(self, sigma=0.1):
- """
- :param sigma: standard deviation
- """
- super().__init__()
- self.sigma = sigma
+ def __str__(self):
+ return "GaussianAdditive"
- def sample_noise(self, xs):
- return self.sigma*(np.random.normal(scale=1.0, size=(xs.shape[0], 1)) + np.random.laplace(scale=self.sigma, size=(xs.shape[0], 1)))/2.
- @property
- def convex(self) -> bool:
- return True
+class HuberNoise(AdditiveHomoscedasticNoiseModel):
+ def __init__(self, sigma=0.1):
+ """
+ :param sigma: standard deviation
+ """
+ super().__init__()
+ self.sigma = sigma
+
+ def sample_noise(self, xs):
+ return (
+ self.sigma
+ * (
+ np.random.normal(scale=1.0, size=(xs.shape[0], 1))
+ + np.random.laplace(scale=self.sigma, size=(xs.shape[0], 1))
+ )
+ / 2.0
+ )
+
+ @property
+ def convex(self) -> bool:
+ return True
+
+ def __str__(self):
+ return "GaussianAdditive"
- def __str__(self):
- return "GaussianAdditive"
class AdditiveBoundedNoise(GaussianNoise):
- """ Sub-Gaussian bounded norm, with a Gaussian Likelihood"""
- def __init__(self, lower, upper):
- super().__init__(upper-lower)
- self.lower = lower
- self.upper = upper
+ """Sub-Gaussian bounded norm, with a Gaussian Likelihood"""
+
+ def __init__(self, lower, upper):
+ super().__init__(upper - lower)
+ self.lower = lower
+ self.upper = upper
- def sample_noise(self, xs):
- raw = np.random.random_sample(size=(xs.shape[0], 1))
- rescaled = self.lower + raw * self.sigma
- print(rescaled)
- return rescaled # sigma is the length of the interval
+ def sample_noise(self, xs):
+ raw = np.random.random_sample(size=(xs.shape[0], 1))
+ rescaled = self.lower + raw * self.sigma
+ print(rescaled)
+ return rescaled # sigma is the length of the interval
- def __str__(self):
- return "BoundedNoiseAdditive"
+ def __str__(self):
+ return "BoundedNoiseAdditive"
class MisspecifiedAdditiveGaussianNoise(GaussianNoise):
- def __init__(self, sigma=1.0, actual_sigma=0.1):
- """
- :param sigma: standard deviation
- """
- super().__init__(sigma=sigma)
- self.actual_sigma = actual_sigma
+ def __init__(self, sigma=1.0, actual_sigma=0.1):
+ """
+ :param sigma: standard deviation
+ """
+ super().__init__(sigma=sigma)
+ self.actual_sigma = actual_sigma
- def sample_noise(self, xs):
- return self.actual_sigma*np.random.normal(scale=1.0, size=(xs.shape[0], 1))
+ def sample_noise(self, xs):
+ return self.actual_sigma * np.random.normal(scale=1.0, size=(xs.shape[0], 1))
- def __str__(self):
- return "MisspecifiedGaussianAdditive"
+ def __str__(self):
+ return "MisspecifiedGaussianAdditive"
class LaplaceNoise(GaussianNoise):
- def __init__(self, b):
- """
- :param sigma: this is sometimes also denoted as b
- """
- super().__init__()
- self.b = b
+ def __init__(self, b):
+ """
+ :param sigma: this is sometimes also denoted as b
+ """
+ super().__init__()
+ self.b = b
- def noise_log_likelihood(self, etas):
- return -np.log(2*self.b) - np.abs(etas)/self.b
+ def noise_log_likelihood(self, etas):
+ return -np.log(2 * self.b) - np.abs(etas) / self.b
- def cvxpy_noise_log_likelihood(self, etas):
- return -np.log(2*self.b) - cp.abs(etas)/self.b
+ def cvxpy_noise_log_likelihood(self, etas):
+ return -np.log(2 * self.b) - cp.abs(etas) / self.b
- def sample_noise(self, xs):
- return np.random.laplace(loc = 0, scale=self.b, size=(xs.shape[0], 1))
+ def sample_noise(self, xs):
+ return np.random.laplace(loc=0, scale=self.b, size=(xs.shape[0], 1))
- def __str__(self):
- return "Laplace"
+ def __str__(self):
+ return "Laplace"
- @property
- def convex(self) -> bool:
- return True
+ @property
+ def convex(self) -> bool:
+ return True
class AdditiveGumbelNoise(AdditiveHomoscedasticNoiseModel):
- def __init__(self, beta, mu):
- super().__init__()
- self.beta = beta
- self.mu = mu
+ def __init__(self, beta, mu):
+ super().__init__()
+ self.beta = beta
+ self.mu = mu
- def sample_noise(self, xs):
- return np.random.gumbel(loc=self.mu, scale=self.beta, size=(xs.shape[0],))
+ def sample_noise(self, xs):
+ return np.random.gumbel(loc=self.mu, scale=self.beta, size=(xs.shape[0],))
- def noise_log_likelihood(self, etas):
- return -np.log(self.beta) - 1/self.beta*(etas - self.mu) - np.exp(-1/self.beta*(etas-self.mu))
+ def noise_log_likelihood(self, etas):
+ return (
+ -np.log(self.beta)
+ - 1 / self.beta * (etas - self.mu)
+ - np.exp(-1 / self.beta * (etas - self.mu))
+ )
- def cvxpy_noise_log_likelihood(self, etas):
- return -np.log(self.beta) - 1/self.beta*(etas - self.mu) - cp.exp(-1/self.beta*(etas-self.mu))
+ def cvxpy_noise_log_likelihood(self, etas):
+ return (
+ -np.log(self.beta)
+ - 1 / self.beta * (etas - self.mu)
+ - cp.exp(-1 / self.beta * (etas - self.mu))
+ )
- def __str__(self):
- return "Gumbel"
+ def __str__(self):
+ return "Gumbel"
+
+ @property
+ def convex(self) -> bool:
+ return True
- @property
- def convex(self) -> bool:
- return True
class AdditiveTwoSidedWeibullNoise(AdditiveHomoscedasticNoiseModel):
- def __init__(self, scale, shape):
- """
- :param scale: lambda
- :param shape: k
- """
- super().__init__()
- self.scale = scale
- self.shape = shape
-
- def noise_log_likelihood(self, etas):
- etas = np.abs(etas)
- return np.log(0.5*self.shape/self.scale) + (self.shape - 1)*np.log(etas/self.scale) - np.power(etas/self.scale, self.shape)
-
- def cvxpy_noise_log_likelihood(self, etas):
- raise NotImplementedError("cvxpy makes no sense for non-convex sets")
-
- def sample_noise(self, xs):
- signs = np.sign(np.random.normal(size=xs.shape[0]))
- weibull = np.random.weibull(self.shape, size=xs.shape[0])
- return self.scale * signs * weibull
-
- def __str__(self):
- return "TwoSidedWeibull"
-
- @property
- def convex(self) -> bool:
- return False
+ def __init__(self, scale, shape):
+ """
+ :param scale: lambda
+ :param shape: k
+ """
+ super().__init__()
+ self.scale = scale
+ self.shape = shape
+
+ def noise_log_likelihood(self, etas):
+ etas = np.abs(etas)
+ return (
+ np.log(0.5 * self.shape / self.scale)
+ + (self.shape - 1) * np.log(etas / self.scale)
+ - np.power(etas / self.scale, self.shape)
+ )
+
+ def cvxpy_noise_log_likelihood(self, etas):
+ raise NotImplementedError("cvxpy makes no sense for non-convex sets")
+
+ def sample_noise(self, xs):
+ signs = np.sign(np.random.normal(size=xs.shape[0]))
+ weibull = np.random.weibull(self.shape, size=xs.shape[0])
+ return self.scale * signs * weibull
+
+ def __str__(self):
+ return "TwoSidedWeibull"
+
+ @property
+ def convex(self) -> bool:
+ return False
+
class BernoulliNoise(NoiseModel):
- def __init__(self, prob):
- """
- :param scale: lambda
- Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta
- :param shape: p
- """
- super().__init__()
- self.prob = prob # lambda , $lambda^(1/a) to connect to sampling below
+ def __init__(self, prob):
+ """
+ :param scale: lambda
+ Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta
+ :param shape: p
+ """
+ super().__init__()
+ self.prob = prob # lambda , $lambda^(1/a) to connect to sampling below
- def mean(self, xs):
- return self.prob(xs)
+ def mean(self, xs):
+ return self.prob(xs)
- def sample_noise(self, xs):
- bernouli = torch.bernoulli(self.prob(xs).view(-1))
- return bernouli.view(-1,1)
+ def sample_noise(self, xs):
+ bernouli = torch.bernoulli(self.prob(xs).view(-1))
+ return bernouli.view(-1, 1)
- def convex(self):
- pass
+ def convex(self):
+ pass
- def sample(self, xs, theta):
- pass
+ def sample(self, xs, theta):
+ pass
- def log_likelihood(self, ys, xs, theta: Union[np.array, cp.Variable]) -> Union[np.array, cp.Expression]:
- pass
+ def log_likelihood(
+ self, ys, xs, theta: Union[np.array, cp.Variable]
+ ) -> Union[np.array, cp.Expression]:
+ pass
class LogWeibullNoise(NoiseModel):
- def __init__(self, lam, p = 2, lam_form = lambda x, y: np.exp(x@y)):
- """
- :param scale: lambda
- Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta
- :param shape: p
- """
- super().__init__()
- self.lam = lam # lambda , $lambda^(1/a) to connect to sampling below
- self.p = p #
- self.lam_form = lam_form
-
- def sample(self,xs,theta):
- pass
-
- def log_likelihood(self, ys, xs, theta):
- assert(xs is not None)
- if isinstance(theta, cp.Variable):
- return self.cvxpy_log_likelihood(ys, xs, theta)
- else:
- return self.noise_log_likelihood(ys, xs, theta)
-
- def noise_log_likelihood(self,ys, xs, theta):
- return np.log(self.lam_form(xs, theta).reshape(-1)) + self.p*ys.reshape(-1) - np.exp(ys).reshape(-1)**self.p*self.lam_form(xs, theta).reshape(-1)
- # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes
- # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta
-
- def sample_noise(self, xs):
- weibull = (self.lam(xs)**(1/self.p)).reshape(-1)*np.random.weibull(self.p, size=xs.shape[0])
- weibull = weibull.reshape(-1,1)
- return np.log(weibull)
-
- def mean(self, xs):
- return (np.log(self.lam(xs)) - np.euler_gamma)/self.p
-
- def cvxpy_log_likelihood(self, ys, xs, theta):
- # This works only fi
- return xs @ theta + self.p*ys - cp.multiply((np.exp(ys)**self.p).reshape(-1),cp.exp(xs@theta))
-
- def __str__(self):
- return "logWeibull"
-
- @property
- def convex(self) -> bool:
- return True
-
-class WeibullNoise(LogWeibullNoise):
-
- def noise_log_likelihood(self,ys, xs, theta):
- return np.log(self.lam_form(xs, theta).reshape(-1)) + np.log(self.p * (ys.reshape(-1)**(self.p-1))) - self.lam_form(xs, theta).reshape(-1)*(ys.reshape(-1)**self.p)
- # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes
- # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta
+ def __init__(self, lam, p=2, lam_form=lambda x, y: np.exp(x @ y)):
+ """
+ :param scale: lambda
+ Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta
+ :param shape: p
+ """
+ super().__init__()
+ self.lam = lam # lambda , $lambda^(1/a) to connect to sampling below
+ self.p = p #
+ self.lam_form = lam_form
+
+ def sample(self, xs, theta):
+ pass
+
+ def log_likelihood(self, ys, xs, theta):
+ assert xs is not None
+ if isinstance(theta, cp.Variable):
+ return self.cvxpy_log_likelihood(ys, xs, theta)
+ else:
+ return self.noise_log_likelihood(ys, xs, theta)
+
+ def noise_log_likelihood(self, ys, xs, theta):
+ return (
+ np.log(self.lam_form(xs, theta).reshape(-1))
+ + self.p * ys.reshape(-1)
+ - np.exp(ys).reshape(-1) ** self.p * self.lam_form(xs, theta).reshape(-1)
+ )
+ # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes
+ # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta
+
+ def sample_noise(self, xs):
+ weibull = (self.lam(xs) ** (1 / self.p)).reshape(-1) * np.random.weibull(
+ self.p, size=xs.shape[0]
+ )
+ weibull = weibull.reshape(-1, 1)
+ return np.log(weibull)
+
+ def mean(self, xs):
+ return (np.log(self.lam(xs)) - np.euler_gamma) / self.p
+
+ def cvxpy_log_likelihood(self, ys, xs, theta):
+ # This works only fi
+ return (
+ xs @ theta
+ + self.p * ys
+ - cp.multiply((np.exp(ys) ** self.p).reshape(-1), cp.exp(xs @ theta))
+ )
+
+ def __str__(self):
+ return "logWeibull"
+
+ @property
+ def convex(self) -> bool:
+ return True
- def noise_likelihood(self,ys, xs, theta):
- return self.lam_form(xs, theta).reshape(-1)*(self.p * (ys.reshape(-1)**(self.p-1)))*np.exp(- self.lam_form(xs, theta).reshape(-1)*(ys.reshape(-1)**self.p))
- # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes
- # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta
- def sample_noise(self, xs):
- convert_lambda = (1/self.lam(xs))**(1/self.p)
- weibull = convert_lambda.view(-1)*np.random.weibull(self.p, size=xs.shape[0])
- weibull = weibull.reshape(-1,1)
- return weibull
-
- def mode(self, xs):
- convert_lambda = (1/self.lam(xs))**(1/self.p)
- return convert_lambda*((((self.p-1)/self.p))**(1/self.p))
+class WeibullNoise(LogWeibullNoise):
- def mean(self, xs):
- convert_lambda = (1/self.lam(xs))**(1/self.p)
- return convert_lambda*scipy.special.gamma(1. + 1./self.p)
+ def noise_log_likelihood(self, ys, xs, theta):
+ return (
+ np.log(self.lam_form(xs, theta).reshape(-1))
+ + np.log(self.p * (ys.reshape(-1) ** (self.p - 1)))
+ - self.lam_form(xs, theta).reshape(-1) * (ys.reshape(-1) ** self.p)
+ )
+ # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes
+ # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta
+
+ def noise_likelihood(self, ys, xs, theta):
+ return (
+ self.lam_form(xs, theta).reshape(-1)
+ * (self.p * (ys.reshape(-1) ** (self.p - 1)))
+ * np.exp(-self.lam_form(xs, theta).reshape(-1) * (ys.reshape(-1) ** self.p))
+ )
+ # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes
+ # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta
+
+ def sample_noise(self, xs):
+ convert_lambda = (1 / self.lam(xs)) ** (1 / self.p)
+ weibull = convert_lambda.view(-1) * np.random.weibull(self.p, size=xs.shape[0])
+ weibull = weibull.reshape(-1, 1)
+ return weibull
+
+ def mode(self, xs):
+ convert_lambda = (1 / self.lam(xs)) ** (1 / self.p)
+ return convert_lambda * ((((self.p - 1) / self.p)) ** (1 / self.p))
+
+ def mean(self, xs):
+ convert_lambda = (1 / self.lam(xs)) ** (1 / self.p)
+ return convert_lambda * scipy.special.gamma(1.0 + 1.0 / self.p)
if __name__ == "__main__":
- import matplotlib.pyplot as plt
-
- d = 2
- p = 2
- lam = lambda x: torch.exp(torch.sum(x, dim = 1))
- lam_form = lambda x,theta: torch.exp(x@theta)
-
- W = WeibullNoise(lam, p = p, lam_form=lam_form)
-
- tstar = torch.ones(size = (2,1)).double()
- x = torch.ones(size = (1,2)).double()
- print(lam(x), lam_form(x,tstar))
- pdf = lambda y: W.noise_likelihood(y,x,tstar)#torch.exp(W.noise_log_likelihood(y,x,tstar))
-
- y = torch.linspace(0,5,1000).double()
- #plt.plot(y, pdf(y))
- samples = []
- mean = float(np.log(lam(x)))
- for _ in range(10000):
- samples.append(-np.log(float(W.sample_noise(x).view(-1)))*p - np.euler_gamma - mean)
-
- print (np.mean(samples))
- print( (np.pi**2/6))
- print (np.var(samples))
- #plt.plot(np.exp(W.mode(x)),pdf(W.mode(x)),'ko')
-
- plt.hist(samples, density=True)
- plt.show()
-
-
+ import matplotlib.pyplot as plt
+
+ d = 2
+ p = 2
+ lam = lambda x: torch.exp(torch.sum(x, dim=1))
+ lam_form = lambda x, theta: torch.exp(x @ theta)
+
+ W = WeibullNoise(lam, p=p, lam_form=lam_form)
+
+ tstar = torch.ones(size=(2, 1)).double()
+ x = torch.ones(size=(1, 2)).double()
+ print(lam(x), lam_form(x, tstar))
+ pdf = lambda y: W.noise_likelihood(
+ y, x, tstar
+ ) # torch.exp(W.noise_log_likelihood(y,x,tstar))
+
+ y = torch.linspace(0, 5, 1000).double()
+ # plt.plot(y, pdf(y))
+ samples = []
+ mean = float(np.log(lam(x)))
+ for _ in range(10000):
+ samples.append(
+ -np.log(float(W.sample_noise(x).view(-1))) * p - np.euler_gamma - mean
+ )
+
+ print(np.mean(samples))
+ print((np.pi**2 / 6))
+ print(np.var(samples))
+ # plt.plot(np.exp(W.mode(x)),pdf(W.mode(x)),'ko')
+
+ plt.hist(samples, density=True)
+ plt.show()
diff --git a/stpy/probability/poisson_likelihood.py b/stpy/probability/poisson_likelihood.py
index d588fb7..af110a0 100644
--- a/stpy/probability/poisson_likelihood.py
+++ b/stpy/probability/poisson_likelihood.py
@@ -6,6 +6,7 @@
from stpy.probability.gaussian_likelihood import GaussianLikelihood
import scipy
+
class PoissonLikelihoodCanonical(GaussianLikelihood):
def __init__(self):
@@ -13,66 +14,80 @@ def __init__(self):
def evaluate_datapoint(self, theta, d, mask):
if mask is None:
- mask = 1.
+ mask = 1.0
x, y = d
- r = -y*x@theta + torch.exp(x@theta)
+ r = -y * x @ theta + torch.exp(x @ theta)
r = r * mask
return r
def link(self, s):
return torch.exp(s)
- def scale(self, err = None, bound = None):
+ def scale(self, err=None, bound=None):
return np.exp(bound)
- def get_objective_cvxpy(self, mask = None):
+ def get_objective_cvxpy(self, mask=None):
if mask is None:
+
def likelihood(theta):
- return -self.y.T@(self.x @ theta) + cp.sum(cp.exp(self.x@theta))
+ return -self.y.T @ (self.x @ theta) + cp.sum(cp.exp(self.x @ theta))
+
else:
+
def likelihood(theta):
- if torch.sum(mask.double())>1e-8:
- return -(mask*self.y).T@(self.x @ theta) + mask.T @ cp.exp(self.x@theta)
+ if torch.sum(mask.double()) > 1e-8:
+ return -(mask * self.y).T @ (self.x @ theta) + mask.T @ cp.exp(
+ self.x @ theta
+ )
else:
- return cp.sum(theta*0)
+ return cp.sum(theta * 0)
+
return likelihood
- def get_confidence_set_cvxpy(self,
- theta: cp.Variable,
- type: Union[str, None] = None,
- params: Dict = {},
- delta: float = 0.1):
+ def get_confidence_set_cvxpy(
+ self,
+ theta: cp.Variable,
+ type: Union[str, None] = None,
+ params: Dict = {},
+ delta: float = 0.1,
+ ):
if self.fitted == True:
return self.set_fn(theta)
- theta_fit = params['estimate']
- H = params['regularizer_hessian']
+ theta_fit = params["estimate"]
+ H = params["regularizer_hessian"]
lam = torch.max(torch.linalg.eigvalsh(H))
- B = params['bound']
- d_eff = params['d_eff']
- bound = params['bound']
+ B = params["bound"]
+ d_eff = params["d_eff"]
+ bound = params["bound"]
if type == "LR":
beta = self.confidence_parameter(delta, params, type=type)
set = self.lr_confidence_set_cvxpy(theta, beta, params)
- elif type in ['mutny']:
+ elif type in ["mutny"]:
vars = np.exp(bound)
- V = self.x.T @torch.diag(vars)@ self.x + H
+ V = self.x.T @ torch.diag(vars) @ self.x + H
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
- beta = 2.*np.log(1./delta)
- self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta]
+ beta = 2.0 * np.log(1.0 / delta)
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
- elif type in ['laplace']:
- vars = torch.exp(self.x@ theta_fit).view(-1)
- V = self.x.T @torch.diag(vars) @ self.x + H
+ elif type in ["laplace"]:
+ vars = torch.exp(self.x @ theta_fit).view(-1)
+ V = self.x.T @ torch.diag(vars) @ self.x + H
L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy()))
- beta = 2.*np.log(1./delta)
- self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta]
+ beta = 2.0 * np.log(1.0 / delta)
+ self.set_fn = lambda theta: [
+ cp.sum_squares(L @ (theta - theta_fit)) <= beta
+ ]
set = self.set_fn(theta)
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
self.set = set
self.fitted = True
@@ -80,35 +95,43 @@ def get_confidence_set_cvxpy(self,
return set
def information_matrix(self):
- V = self.x.T@self.x/self.sigma
+ V = self.x.T @ self.x / self.sigma
return V
- def confidence_parameter(self, delta, params, type = None):
- H = params['regularizer_hessian']
+ def confidence_parameter(self, delta, params, type=None):
+ H = params["regularizer_hessian"]
lam = torch.max(torch.linalg.eigvalsh(H))
- B = params['bound']
- d_eff = params['d_eff']
+ B = params["bound"]
+ d_eff = params["d_eff"]
if type is None or type == "none" or type == "laplace":
# this is a common heuristic
- beta = 2.0
+ beta = 2.0
elif type == "adaptive-AB":
- sigma = 1./4.
- V = self.x.T @ self.x / sigma ** 2 + H
- beta = 2 * np.log(1. / delta) + (torch.logdet(V + H) - torch.logdet(H)) + lam * B
+ sigma = 1.0 / 4.0
+ V = self.x.T @ self.x / sigma**2 + H
+ beta = (
+ 2 * np.log(1.0 / delta)
+ + (torch.logdet(V + H) - torch.logdet(H))
+ + lam * B
+ )
elif type == "LR":
# this is based on sequential LR test
beta = self.confidence_parameter_likelihood_ratio(delta, params)
elif type == "Faubry":
- H = params['regularizer_hessian']
- lam = H[0., 0]
- theta_fit = params['estimate']
- D = torch.diag(1./(self.x @ theta_fit).view(-1))
+ H = params["regularizer_hessian"]
+ lam = H[0.0, 0]
+ theta_fit = params["estimate"]
+ D = torch.diag(1.0 / (self.x @ theta_fit).view(-1))
V = self.x.T @ D @ self.x + H
- beta = np.sqrt(lam)/2. + 2./np.sqrt(lam)*(torch.logdet(V) - torch.logdet(H)) + 2/np.sqrt(lam)* np.log(1/delta)*d_eff
+ beta = (
+ np.sqrt(lam) / 2.0
+ + 2.0 / np.sqrt(lam) * (torch.logdet(V) - torch.logdet(H))
+ + 2 / np.sqrt(lam) * np.log(1 / delta) * d_eff
+ )
else:
raise NotImplementedError("Not implemented")
return beta
diff --git a/stpy/probability/robust_likelihood.py b/stpy/probability/robust_likelihood.py
index 0cab487..8cc2dd7 100644
--- a/stpy/probability/robust_likelihood.py
+++ b/stpy/probability/robust_likelihood.py
@@ -4,9 +4,10 @@
from typing import Union, Dict, List
from stpy.probability.likelihood import Likelihood
+
class RobustGraphicalLikelihood(Likelihood):
- def __init__(self, coin, supp, sigma = 0.1):
+ def __init__(self, coin, supp, sigma=0.1):
super().__init__()
self.coin = coin
self.supp = supp
@@ -20,48 +21,68 @@ def evaluate_datapoint(self, theta, d):
return torch.log(1 + torch.exp())
def add_data_point(self, d):
- x,y = d
- self.x = torch.vstack(self.x,x)
- self.y = torch.vstack(self.y,y)
+ x, y = d
+ self.x = torch.vstack(self.x, x)
+ self.y = torch.vstack(self.y, y)
self.fitted = False
def load_data(self, D):
self.x, self.y = D
self.fitted = False
- def get_objective_cvxpy(self, mask = None):
+ def get_objective_cvxpy(self, mask=None):
if mask is None:
if self.Sigma is None:
- def likelihood(theta): return cp.sum(cp.abs(self.x@theta - self.y)/self.sigma)
+
+ def likelihood(theta):
+ return cp.sum(cp.abs(self.x @ theta - self.y) / self.sigma)
else:
- def likelihood(theta): return cp.sum(cp.abs(torch.linalg.inv(self.Sigma)@(self.x@theta - self.y)))
+
+ def likelihood(theta):
+ return cp.sum(
+ cp.abs(torch.linalg.inv(self.Sigma) @ (self.x @ theta - self.y))
+ )
+
else:
if self.Sigma is None:
+
def likelihood(theta):
- if torch.sum(mask.int())>0:
- return cp.sum(cp.abs(self.x[mask,:]@theta - self.y[mask,:])/self.sigma)
+ if torch.sum(mask.int()) > 0:
+ return cp.sum(
+ cp.abs(self.x[mask, :] @ theta - self.y[mask, :])
+ / self.sigma
+ )
else:
- return cp.sum(theta*0)
+ return cp.sum(theta * 0)
else:
+
def likelihood(theta):
- if torch.sum(mask.int())>0:
- return cp.sum(cp.abs(torch.linalg.inv(self.Sigma)@(self.x[mask,:]@theta - self.y[mask,:])))
+ if torch.sum(mask.int()) > 0:
+ return cp.sum(
+ cp.abs(
+ torch.linalg.inv(self.Sigma)
+ @ (self.x[mask, :] @ theta - self.y[mask, :])
+ )
+ )
else:
- return cp.sum(theta*0)
+ return cp.sum(theta * 0)
+
return likelihood
- def get_confidence_set_cvxpy(self,
- theta: cp.Variable,
- type: Union[str, None] = None,
- params: Dict = {},
- delta: float = 0.1):
+ def get_confidence_set_cvxpy(
+ self,
+ theta: cp.Variable,
+ type: Union[str, None] = None,
+ params: Dict = {},
+ delta: float = 0.1,
+ ):
if self.fitted == True:
return self.set_fn(theta)
- theta_fit = params['estimate']
- H = params['regularizer_hessian']
+ theta_fit = params["estimate"]
+ H = params["regularizer_hessian"]
beta = self.confidence_parameter(delta, params, type=type)
@@ -78,13 +99,14 @@ def get_confidence_set_cvxpy(self,
set = self.lr_confidence_set_cvxpy(theta, beta, params)
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
self.set = set
self.fitted = True
return set
-
def get_objective_torch(self):
raise NotImplementedError("Implement me please.")
diff --git a/stpy/probability/weibul_likelihood.py b/stpy/probability/weibul_likelihood.py
index 6d11179..77c509c 100644
--- a/stpy/probability/weibul_likelihood.py
+++ b/stpy/probability/weibul_likelihood.py
@@ -15,21 +15,19 @@ def __init__(self, p):
def information_matrix(self, theta_fit):
pass
-
def normalization(self, d):
pass
-
- def evaluate_datapoint(self, theta, d, mask = None):
+ def evaluate_datapoint(self, theta, d, mask=None):
if mask is None:
- mask = 1.
+ mask = 1.0
x, y = d
lam = torch.exp(x @ theta)
l = -torch.log(lam) + (y ** (self.p)) * lam
l = l * mask
return l
- def scale(self, err = None, bound = None):
+ def scale(self, err=None, bound=None):
return np.exp(bound)
def add_data_point(self, d):
@@ -50,33 +48,44 @@ def get_objective_torch(self):
def get_objective_cvxpy(self, mask=None):
if mask is None:
+
def likelihood(theta):
- return -cp.sum(self.x@theta) + cp.sum(cp.diag(self.y**(self.p))@cp.exp(self.x @ theta))
+ return -cp.sum(self.x @ theta) + cp.sum(
+ cp.diag(self.y ** (self.p)) @ cp.exp(self.x @ theta)
+ )
+
else:
+
def likelihood(theta):
- if torch.sum(mask.int())>0:
- return - cp.sum(self.x[mask,:] @ theta) + cp.sum(cp.diag(self.y[mask,:]**(self.p))@cp.exp(self.x[mask,:] @ theta))
+ if torch.sum(mask.int()) > 0:
+ return -cp.sum(self.x[mask, :] @ theta) + cp.sum(
+ cp.diag(self.y[mask, :] ** (self.p))
+ @ cp.exp(self.x[mask, :] @ theta)
+ )
else:
return cp.sum(theta * 0)
+
return likelihood
- def get_confidence_set_cvxpy(self,
- theta: cp.Variable,
- type: Union[str, None] = None,
- params: Dict = {},
- delta: float = 0.1):
+ def get_confidence_set_cvxpy(
+ self,
+ theta: cp.Variable,
+ type: Union[str, None] = None,
+ params: Dict = {},
+ delta: float = 0.1,
+ ):
if self.fitted == True:
return self.set_fn(theta)
- theta_fit = params['estimate']
- H = params['regularizer_hessian']
+ theta_fit = params["estimate"]
+ H = params["regularizer_hessian"]
beta = self.confidence_parameter(delta, params, type=type)
if type in ["laplace"]:
V = self.information_matrix(theta_fit)
if H is not None:
- V += H
+ V += H
self.set_fn = lambda theta: [cp.quad_form(theta - theta_fit, V) <= beta]
set = self.set_fn(theta)
@@ -84,18 +93,22 @@ def get_confidence_set_cvxpy(self,
set = self.lr_confidence_set_cvxpy(theta, beta, params)
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
self.set = set
self.fitted = True
return set
- def confidence_parameter(self, delta, params, type = None):
+ def confidence_parameter(self, delta, params, type=None):
if type == "LR":
# this is based on sequential LR test
beta = self.confidence_parameter_likelihood_ratio(delta, params)
elif type == "laplace":
- beta = 2.
+ beta = 2.0
else:
- raise NotImplementedError("The desired confidence set type is not supported.")
- return beta
\ No newline at end of file
+ raise NotImplementedError(
+ "The desired confidence set type is not supported."
+ )
+ return beta
diff --git a/stpy/random_process.py b/stpy/random_process.py
index 85aa85a..9839a28 100755
--- a/stpy/random_process.py
+++ b/stpy/random_process.py
@@ -3,335 +3,546 @@
import matplotlib.pyplot as plt
import matplotlib
+
class RandomProcess:
- def visualize_function(self,xtest,f_trues, filename = None, colors = None):
- from mpl_toolkits.mplot3d import axes3d, Axes3D
- d = xtest.size()[1]
- if d == 1:
- if isinstance(f_trues, list):
- for f_true in f_trues:
- plt.plot(xtest,f_true(xtest))
- else:
- plt.plot(xtest, f_trues(xtest))
- elif d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=(15, 7))
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- if isinstance(f_trues, list):
- for index, f_true in enumerate(f_trues):
- grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear')
- if colors is not None:
- color = colors[index]
- ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color = color)
- else:
- grid_z = griddata((xx, yy), f_trues(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4)
-
- if filename is not None:
- plt.xticks(fontsize=20, rotation=0)
- plt.yticks(fontsize=20, rotation=0)
- plt.savefig(filename, dpi = 300)
-
-
-
-
- def visualize_function_contour(self, xtest, f_true, filename = None, levels = 10, figsize = (15, 7)):
- from mpl_toolkits.mplot3d import axes3d, Axes3D
- d = xtest.size()[1]
- if d ==1:
- pass
- elif d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- f = f_true(xtest)
- grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
-
- fig, ax = plt.subplots(figsize=figsize)
- cs = ax.contourf(grid_x, grid_y, grid_z_f,levels= levels)
- ax.contour(cs, colors='k')
- cbar = fig.colorbar(cs)
- #if self.x is not None:
- # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o")
- ax.grid(c='k', ls='-', alpha=0.1)
-
- if filename is not None:
- plt.xticks(fontsize=24, rotation=0)
- plt.yticks(fontsize=24, rotation=0)
- plt.savefig(filename, dpi = 300)
- #plt.show()
-
- def visualize(self,xtest,f_true = None, points = True, show = True, size = 2,
- norm = 1, fig = True, sqrtbeta = 2, constrained = None, d = None, matheron_kernel=None):
- from mpl_toolkits.mplot3d import axes3d, Axes3D
-
- [mu, std] = self.mean_std(xtest)
-
- if d is None:
- d = self.d
-
- if d == 1:
- if fig == True:
- plt.figure(figsize=(15, 7))
- plt.clf()
- if self.x is not None:
- plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o")
- if size > 0:
-
- if matheron_kernel is not None:
- z = self.sample_matheron(xtest,matheron_kernel, size=size).numpy().T
- else:
- z = self.sample(xtest, size=size).numpy().T
-
- for z_arr,label in zip(z,['sample']+[None for _ in range(size-1)]):
- plt.plot(xtest.view(-1).numpy(),z_arr, 'k--', lw = 2, label = label)
-
- plt.fill_between(xtest.numpy().flat, (mu - sqrtbeta * std).numpy().flat, (mu + sqrtbeta * std).numpy().flat,color="#dddddd")
- if f_true is not None:
- plt.plot(xtest.numpy(),f_true(xtest).numpy(),'b-',lw = 2, label = "truth")
- plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean")
- #plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.legend()
- if show == True:
- plt.show()
-
- elif d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=(15,7))
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- if f_true is not None:
- grid_z = griddata((xx, yy), f_true(xtest)[:,0].numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4, label = "truth")
- if points == True and self.fit == True:
- ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:,0].detach().numpy(), c='r', s=100, marker="o", depthshade=False)
- if self.beta is not None:
- beta = self.beta(norm = norm)
- grid_z2 = griddata((xx, yy), (mu.detach()+beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2)
- grid_z3 = griddata((xx, yy), (mu.detach()-beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2)
-
- ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4)
- #plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.show()
-
- else:
- print("Visualization not implemented")
-
- def visualize_subopt(self,xtest,f_true = None, points = True, show = True, size = 2, norm = 1, fig = True, beta = 2):
- from mpl_toolkits.mplot3d import axes3d, Axes3D
- [mu, std] = self.mean_std(xtest)
-
- print ("Visualizing in: ", self.d, "dimensions...")
-
- if self.d == 1:
- if fig == True:
- plt.figure(figsize=(15, 7))
- plt.clf()
- if self.x is not None:
- plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o")
- plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample")
- plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat,color="#dddddd")
- if f_true is not None:
- plt.plot(xtest.numpy(),f_true(xtest).numpy(),'b-',lw = 2, label = "truth")
- plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean")
-
- min = torch.max(mu - beta*std)
- mask = (mu + beta*std < min)
- v = torch.min(mu - beta * std).numpy()-1
- plt.plot(xtest.numpy()[mask], 0*xtest.numpy()[mask]+v,'ko', lw = 6,label = "Discarted Region")
-
-
-
- plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.legend()
-
- if show == True:
- plt.show()
-
- def visualize_slice(self,xtest,slice, show = True, eps = None, size = 1, beta = 2):
- append = torch.ones(size = (xtest.size()[0],1), dtype=torch.float64)*slice
- xtest2 = torch.cat((xtest,append), dim = 1)
-
- [mu, std] = self.mean_std(xtest2)
-
- plt.figure(figsize=(15, 7))
- plt.clf()
- plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample")
- print(std.size(), mu.size())
- if self.x is not None:
- plt.plot(self.x[:,0].detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o")
- plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat, color="#dddddd")
- plt.fill_between(xtest.numpy().flat, (mu + 2 * std).numpy().flat, (mu + 2 * std + 2*self.s).numpy().flat, color="#bbdefb")
- plt.fill_between(xtest.numpy().flat, (mu - 2 * std - 2*self.s).numpy().flat, (mu - 2 * std).numpy().flat, color="#bbdefb")
-
- if eps is not None:
- mask = (beta*std < eps)
- v = torch.min(mu - beta * std - 2*self.s).numpy()
- plt.plot(xtest.numpy()[mask], 0*xtest.numpy()[mask]+v,'k', lw = 6,label = "$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace")
-
- plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean")
- plt.title('Posterior mean prediction plus 2 st.deviation')
- plt.legend()
- if show == True:
- plt.show()
-
-
-
- def visualize_contour_with_gap(self,xtest,f_true = None, gap = None, show = False):
- [mu, _] = self.mean_std(xtest)
-
- if self.d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
-
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu)
- ax.contour(cs, colors='k')
-
- ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'ro', ms=10)
- cbar = fig.colorbar(cs)
-
- ax.grid(c='k', ls='-', alpha=0.1)
-
- if f_true is not None:
- f = f_true(xtest)
- grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_f)
- ax.contour(cs, colors='k')
- cbar = fig.colorbar(cs)
- ax.grid(c='k', ls='-', alpha=0.1)
- if show == True:
- plt.show()
-
- def visualize_contour(self,xtest,f_true = None, show = True, points = True, ms = 5, levels = 20):
- [mu, _] = self.mean_std(xtest)
-
- if self.d == 2:
- from scipy.interpolate import griddata
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu)
- ax.contour(cs, colors='k')
- if points == True:
- ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'wo', ms=ms, alpha = 0.5)
- cbar = fig.colorbar(cs)
- ax.grid(c='k', ls='-', alpha=0.1)
-
- if f_true is not None:
- f = f_true(xtest)
- grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_f, levels = levels)
- ax.contour(cs, colors='k')
- cbar = fig.colorbar(cs)
- ax.grid(c='k', ls='-', alpha=0.1)
- if show == True:
- plt.show()
- return ax
-
- def visualize_quiver(self,xtest, size = 2,norm = 1):
- from mpl_toolkits.mplot3d import axes3d, Axes3D
- [mu, std] = self.mean_std(xtest)
- if self.d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=(15,7))
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].detach().numpy()
- yy = xtest[:, 1].detach().numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- #
-
- ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:,0].detach().numpy(), c='r', s=100, marker="o", depthshade=False)
-
- if self.beta is not None:
- beta = self.beta(norm = norm)
- grid_z2 = griddata((xx, yy), (mu.detach()+beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2)
- grid_z3 = griddata((xx, yy), (mu.detach()-beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2)
-
- ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4)
- plt.title('Posterior mean prediction plus 2 st.deviation')
-
-
- derivatives = torch.zeros(xtest.size()[0],2)
- for index,point in enumerate(xtest):
- derivatives[index,:] = self.mean_gradient_hessian(point.view(-1,2))
- print (derivatives[index,:] )
-
- print (derivatives.size())
-
-
- grid_der_x_mu = griddata((xx, yy), derivatives[:, 0].detach().numpy(), (grid_x, grid_y), method='linear')
- grid_der_y_mu = griddata((xx, yy), derivatives[:, 1].detach().numpy(), (grid_x, grid_y), method='linear')
-
- fig, ax = plt.subplots(figsize=(15, 7))
- cs = ax.contourf(grid_x, grid_y, grid_z_mu)
-
- ax.contour(cs, colors='k')
-
- # Plot grid.
- ax.grid(c='k', ls='-', alpha=0.1)
- ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu)
-
- plt.show()
-
- else:
- print("Visualization not implemented")
+ def visualize_function(self, xtest, f_trues, filename=None, colors=None):
+ from mpl_toolkits.mplot3d import axes3d, Axes3D
+
+ d = xtest.size()[1]
+ if d == 1:
+ if isinstance(f_trues, list):
+ for f_true in f_trues:
+ plt.plot(xtest, f_true(xtest))
+ else:
+ plt.plot(xtest, f_trues(xtest))
+ elif d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].numpy()
+ yy = xtest[:, 1].numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ if isinstance(f_trues, list):
+ for index, f_true in enumerate(f_trues):
+ grid_z = griddata(
+ (xx, yy),
+ f_true(xtest)[:, 0].numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ if colors is not None:
+ color = colors[index]
+ ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color=color)
+ else:
+ grid_z = griddata(
+ (xx, yy),
+ f_trues(xtest)[:, 0].numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4)
+
+ if filename is not None:
+ plt.xticks(fontsize=20, rotation=0)
+ plt.yticks(fontsize=20, rotation=0)
+ plt.savefig(filename, dpi=300)
+
+ def visualize_function_contour(
+ self, xtest, f_true, filename=None, levels=10, figsize=(15, 7)
+ ):
+ from mpl_toolkits.mplot3d import axes3d, Axes3D
+
+ d = xtest.size()[1]
+ if d == 1:
+ pass
+ elif d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].numpy()
+ yy = xtest[:, 1].numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ f = f_true(xtest)
+ grid_z_f = griddata(
+ (xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+
+ fig, ax = plt.subplots(figsize=figsize)
+ cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels)
+ ax.contour(cs, colors="k")
+ cbar = fig.colorbar(cs)
+ # if self.x is not None:
+ # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o")
+ ax.grid(c="k", ls="-", alpha=0.1)
+
+ if filename is not None:
+ plt.xticks(fontsize=24, rotation=0)
+ plt.yticks(fontsize=24, rotation=0)
+ plt.savefig(filename, dpi=300)
+ # plt.show()
+
+ def visualize(
+ self,
+ xtest,
+ f_true=None,
+ points=True,
+ show=True,
+ size=2,
+ norm=1,
+ fig=True,
+ sqrtbeta=2,
+ constrained=None,
+ d=None,
+ matheron_kernel=None,
+ ):
+ from mpl_toolkits.mplot3d import axes3d, Axes3D
+
+ [mu, std] = self.mean_std(xtest)
+
+ if d is None:
+ d = self.d
+
+ if d == 1:
+ if fig == True:
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ if self.x is not None:
+ plt.plot(
+ self.x.detach().numpy(),
+ self.y.detach().numpy(),
+ "r+",
+ ms=10,
+ marker="o",
+ )
+ if size > 0:
+
+ if matheron_kernel is not None:
+ z = (
+ self.sample_matheron(xtest, matheron_kernel, size=size)
+ .numpy()
+ .T
+ )
+ else:
+ z = self.sample(xtest, size=size).numpy().T
+
+ for z_arr, label in zip(
+ z, ["sample"] + [None for _ in range(size - 1)]
+ ):
+ plt.plot(xtest.view(-1).numpy(), z_arr, "k--", lw=2, label=label)
+
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu - sqrtbeta * std).numpy().flat,
+ (mu + sqrtbeta * std).numpy().flat,
+ color="#dddddd",
+ )
+ if f_true is not None:
+ plt.plot(
+ xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth"
+ )
+ plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean")
+ # plt.title('Posterior mean prediction plus 2 st.deviation')
+ plt.legend()
+ if show == True:
+ plt.show()
+
+ elif d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].numpy()
+ yy = xtest[:, 1].numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ if f_true is not None:
+ grid_z = griddata(
+ (xx, yy),
+ f_true(xtest)[:, 0].numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(
+ grid_x, grid_y, grid_z, color="b", alpha=0.4, label="truth"
+ )
+ if points == True and self.fit == True:
+ ax.scatter(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ self.y[:, 0].detach().numpy(),
+ c="r",
+ s=100,
+ marker="o",
+ depthshade=False,
+ )
+ if self.beta is not None:
+ beta = self.beta(norm=norm)
+ grid_z2 = griddata(
+ (xx, yy),
+ (mu.detach() + beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2)
+ grid_z3 = griddata(
+ (xx, yy),
+ (mu.detach() - beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2)
+
+ ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4)
+ # plt.title('Posterior mean prediction plus 2 st.deviation')
+ plt.show()
+
+ else:
+ print("Visualization not implemented")
+
+ def visualize_subopt(
+ self,
+ xtest,
+ f_true=None,
+ points=True,
+ show=True,
+ size=2,
+ norm=1,
+ fig=True,
+ beta=2,
+ ):
+ from mpl_toolkits.mplot3d import axes3d, Axes3D
+
+ [mu, std] = self.mean_std(xtest)
+
+ print("Visualizing in: ", self.d, "dimensions...")
+
+ if self.d == 1:
+ if fig == True:
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ if self.x is not None:
+ plt.plot(
+ self.x.detach().numpy(),
+ self.y.detach().numpy(),
+ "r+",
+ ms=10,
+ marker="o",
+ )
+ plt.plot(
+ xtest.numpy(),
+ self.sample(xtest, size=size).numpy(),
+ "k--",
+ lw=2,
+ label="sample",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu - 2 * std).numpy().flat,
+ (mu + 2 * std).numpy().flat,
+ color="#dddddd",
+ )
+ if f_true is not None:
+ plt.plot(
+ xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth"
+ )
+ plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean")
+
+ min = torch.max(mu - beta * std)
+ mask = mu + beta * std < min
+ v = torch.min(mu - beta * std).numpy() - 1
+ plt.plot(
+ xtest.numpy()[mask],
+ 0 * xtest.numpy()[mask] + v,
+ "ko",
+ lw=6,
+ label="Discarted Region",
+ )
+
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+ plt.legend()
+
+ if show == True:
+ plt.show()
+
+ def visualize_slice(self, xtest, slice, show=True, eps=None, size=1, beta=2):
+ append = torch.ones(size=(xtest.size()[0], 1), dtype=torch.float64) * slice
+ xtest2 = torch.cat((xtest, append), dim=1)
+
+ [mu, std] = self.mean_std(xtest2)
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ plt.plot(
+ xtest.numpy(),
+ self.sample(xtest, size=size).numpy(),
+ "k--",
+ lw=2,
+ label="sample",
+ )
+ print(std.size(), mu.size())
+ if self.x is not None:
+ plt.plot(
+ self.x[:, 0].detach().numpy(),
+ self.y.detach().numpy(),
+ "r+",
+ ms=10,
+ marker="o",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu - 2 * std).numpy().flat,
+ (mu + 2 * std).numpy().flat,
+ color="#dddddd",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu + 2 * std).numpy().flat,
+ (mu + 2 * std + 2 * self.s).numpy().flat,
+ color="#bbdefb",
+ )
+ plt.fill_between(
+ xtest.numpy().flat,
+ (mu - 2 * std - 2 * self.s).numpy().flat,
+ (mu - 2 * std).numpy().flat,
+ color="#bbdefb",
+ )
+
+ if eps is not None:
+ mask = beta * std < eps
+ v = torch.min(mu - beta * std - 2 * self.s).numpy()
+ plt.plot(
+ xtest.numpy()[mask],
+ 0 * xtest.numpy()[mask] + v,
+ "k",
+ lw=6,
+ label="$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace",
+ )
+
+ plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean")
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+ plt.legend()
+ if show == True:
+ plt.show()
+
+ def visualize_contour_with_gap(self, xtest, f_true=None, gap=None, show=False):
+ [mu, _] = self.mean_std(xtest)
+
+ if self.d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu)
+ ax.contour(cs, colors="k")
+
+ ax.plot(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ "ro",
+ ms=10,
+ )
+ cbar = fig.colorbar(cs)
+
+ ax.grid(c="k", ls="-", alpha=0.1)
+
+ if f_true is not None:
+ f = f_true(xtest)
+ grid_z_f = griddata(
+ (xx, yy),
+ f[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_f)
+ ax.contour(cs, colors="k")
+ cbar = fig.colorbar(cs)
+ ax.grid(c="k", ls="-", alpha=0.1)
+ if show == True:
+ plt.show()
+
+ def visualize_contour(
+ self, xtest, f_true=None, show=True, points=True, ms=5, levels=20
+ ):
+ [mu, _] = self.mean_std(xtest)
+
+ if self.d == 2:
+ from scipy.interpolate import griddata
+
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu)
+ ax.contour(cs, colors="k")
+ if points == True:
+ ax.plot(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ "wo",
+ ms=ms,
+ alpha=0.5,
+ )
+ cbar = fig.colorbar(cs)
+ ax.grid(c="k", ls="-", alpha=0.1)
+
+ if f_true is not None:
+ f = f_true(xtest)
+ grid_z_f = griddata(
+ (xx, yy),
+ f[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels)
+ ax.contour(cs, colors="k")
+ cbar = fig.colorbar(cs)
+ ax.grid(c="k", ls="-", alpha=0.1)
+ if show == True:
+ plt.show()
+ return ax
+
+ def visualize_quiver(self, xtest, size=2, norm=1):
+ from mpl_toolkits.mplot3d import axes3d, Axes3D
+
+ [mu, std] = self.mean_std(xtest)
+ if self.d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].detach().numpy()
+ yy = xtest[:, 1].detach().numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z_mu = griddata(
+ (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear"
+ )
+ #
+
+ ax.scatter(
+ self.x[:, 0].detach().numpy(),
+ self.x[:, 1].detach().numpy(),
+ self.y[:, 0].detach().numpy(),
+ c="r",
+ s=100,
+ marker="o",
+ depthshade=False,
+ )
+
+ if self.beta is not None:
+ beta = self.beta(norm=norm)
+ grid_z2 = griddata(
+ (xx, yy),
+ (mu.detach() + beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2)
+ grid_z3 = griddata(
+ (xx, yy),
+ (mu.detach() - beta * std.detach())[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2)
+
+ ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4)
+ plt.title("Posterior mean prediction plus 2 st.deviation")
+
+ derivatives = torch.zeros(xtest.size()[0], 2)
+ for index, point in enumerate(xtest):
+ derivatives[index, :] = self.mean_gradient_hessian(point.view(-1, 2))
+ print(derivatives[index, :])
+
+ print(derivatives.size())
+
+ grid_der_x_mu = griddata(
+ (xx, yy),
+ derivatives[:, 0].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ grid_der_y_mu = griddata(
+ (xx, yy),
+ derivatives[:, 1].detach().numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+
+ fig, ax = plt.subplots(figsize=(15, 7))
+ cs = ax.contourf(grid_x, grid_y, grid_z_mu)
+
+ ax.contour(cs, colors="k")
+
+ # Plot grid.
+ ax.grid(c="k", ls="-", alpha=0.1)
+ ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu)
+
+ plt.show()
+
+ else:
+ print("Visualization not implemented")
if __name__ == "__main__":
- from stpy.continuous_processes.gauss_procc import GaussianProcess
- from stpy.continuous_processes.fourier_fea import GaussianProcessFF
- from stpy.continuous_processes.kernelized_features import KernelizedFeatures
- from stpy.kernels import KernelFunction
- from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding
- import stpy
- import torch
- import matplotlib.pyplot as plt
- import numpy as np
-
- n = 1024
- N = 256
- gamma = 0.09
- s = 0.1
- # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n)
- benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s)
-
- x = benchmark.initial_guess(N, adv_inv=True)
- y = benchmark.eval(x)
- xtest = benchmark.interval(1024)
-
- # GP = GaussianProcess(gamma=gamma, s=s)
- # GP.fit_gp(x, y)
- # GP.visualize(xtest, show=False, size=5)
- # plt.show()
-
- m = 64
- kernel = KernelFunction(gamma=gamma)
- embedding = HermiteEmbedding(gamma=gamma, m=m)
- RFF = KernelizedFeatures(embedding=embedding, s=s, m=m)
- RFF.fit_gp(x, y)
- RFF.visualize(xtest, fig = False, show=False, size=5, matheron_kernel = kernel)
- plt.show()
\ No newline at end of file
+ from stpy.continuous_processes.gauss_procc import GaussianProcess
+ from stpy.continuous_processes.fourier_fea import GaussianProcessFF
+ from stpy.continuous_processes.kernelized_features import KernelizedFeatures
+ from stpy.kernels import KernelFunction
+ from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding
+ import stpy
+ import torch
+ import matplotlib.pyplot as plt
+ import numpy as np
+
+ n = 1024
+ N = 256
+ gamma = 0.09
+ s = 0.1
+ # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n)
+ benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s)
+
+ x = benchmark.initial_guess(N, adv_inv=True)
+ y = benchmark.eval(x)
+ xtest = benchmark.interval(1024)
+
+ # GP = GaussianProcess(gamma=gamma, s=s)
+ # GP.fit_gp(x, y)
+ # GP.visualize(xtest, show=False, size=5)
+ # plt.show()
+
+ m = 64
+ kernel = KernelFunction(gamma=gamma)
+ embedding = HermiteEmbedding(gamma=gamma, m=m)
+ RFF = KernelizedFeatures(embedding=embedding, s=s, m=m)
+ RFF.fit_gp(x, y)
+ RFF.visualize(xtest, fig=False, show=False, size=5, matheron_kernel=kernel)
+ plt.show()
diff --git a/stpy/regularization/constraints.py b/stpy/regularization/constraints.py
index 20a94df..77c945c 100644
--- a/stpy/regularization/constraints.py
+++ b/stpy/regularization/constraints.py
@@ -48,7 +48,7 @@ class AbsoluteValueConstraint(Constraints):
def __init__(self, c=None):
if c is None:
- self.c = 1.
+ self.c = 1.0
else:
self.c = c
@@ -65,7 +65,7 @@ class QuadraticInequalityConstraint(Constraints):
def __init__(self, Q, b=None, c=None):
self.Q = Q
if c is None:
- self.c = 1.
+ self.c = 1.0
else:
self.c = c
if b is None:
@@ -87,7 +87,6 @@ def __init__(self, q, c, d, groups):
self.groups = groups
self.convex = False
-
def get_list_cvxpy_constraints(self, theta):
w = self.q / (1 - self.q)
set_of_constraints = []
@@ -97,7 +96,7 @@ def get_list_cvxpy_constraints(self, theta):
# l1 constraint
constraints = []
weights = np.ones(d) * w
- weights[i] = 1.
+ weights[i] = 1.0
group = self.groups[i]
constraints.append(cp.norm(theta[group]).T * weights[i] <= self.c)
# l_infinity constraint
@@ -114,6 +113,7 @@ def get_constraint_cvxpy(self, theta):
## Does not work for non-convex constraints
return None
+
class NonConvexNormConstraint(Constraints):
def __init__(self, q, c, d):
@@ -132,7 +132,7 @@ def construct(self, q, d):
polytope = copy.copy(square)
zero = np.zeros(d).reshape(1, -1)
appex = copy.copy(zero)
- appex[0, i // 2] = (float(i % 2) - 0.5) * 2.
+ appex[0, i // 2] = (float(i % 2) - 0.5) * 2.0
polytope = np.concatenate((appex, polytope))
self.vertex_description.append(polytope)
self.polyhedra_vertex_description.append(polytope)
@@ -167,7 +167,7 @@ def get_list_cvxpy_constraints(self, theta):
# l1 constraint
constraints = []
weights = np.ones(self.d) * w
- weights[i] = 1.
+ weights[i] = 1.0
constraints.append(cp.abs(theta).T @ weights <= self.c)
# l_infinity constraint
for j in range(self.d):
diff --git a/stpy/regularization/regularizer.py b/stpy/regularization/regularizer.py
index 21888f0..9aa91c7 100644
--- a/stpy/regularization/regularizer.py
+++ b/stpy/regularization/regularizer.py
@@ -8,7 +8,7 @@
class Regularizer(ABC):
- def __init__(self, lam=1.):
+ def __init__(self, lam=1.0):
self.lam = lam
self.groups = None
self.convex = True
@@ -19,7 +19,9 @@ def eval(self, theta):
@abstractmethod
def get_regularizer_cvxpy(self):
- def reg(theta): return 0
+ def reg(theta):
+ return 0
+
return reg
def is_convex(self):
@@ -28,8 +30,10 @@ def is_convex(self):
def get_constraint_set_cvxpy(self, theta, c):
return [self.get_regularizer_cvxpy()(theta) <= c]
- def get_constraint_object(self,c):
- return CustomConstraint(None, lambda theta: self.get_constraint_set_cvxpy(theta, c))
+ def get_constraint_object(self, c):
+ return CustomConstraint(
+ None, lambda theta: self.get_constraint_set_cvxpy(theta, c)
+ )
def hessian(self, theta_fit):
pass
@@ -37,27 +41,30 @@ def hessian(self, theta_fit):
class L2Regularizer(Regularizer):
- def __init__(self, lam=1.):
- super().__init__(lam = lam)
+ def __init__(self, lam=1.0):
+ super().__init__(lam=lam)
def get_regularizer_cvxpy(self):
- def reg(theta): return self.lam*cp.sum_squares(theta)/2.
+ def reg(theta):
+ return self.lam * cp.sum_squares(theta) / 2.0
+
return reg
def eval(self, theta):
- return self.lam*torch.sum(theta**2)/2.
+ return self.lam * torch.sum(theta**2) / 2.0
def hessian(self, theta):
- return self.lam * torch.eye(n = theta.size()[0]).double()/2.
+ return self.lam * torch.eye(n=theta.size()[0]).double() / 2.0
+
class NonConvexLqRegularizer(Regularizer):
- def __init__(self, lam=1., q = 0.5):
- super().__init__(lam = lam)
+ def __init__(self, lam=1.0, q=0.5):
+ super().__init__(lam=lam)
self.q = q
def eval(self, theta):
- return self.lam*torch.sum(torch.abs(theta)**self.q)
+ return self.lam * torch.sum(torch.abs(theta) ** self.q)
def hessian(self, theta):
return None
@@ -67,14 +74,16 @@ def is_convex(self):
def get_regularizer_cvxpy(self, eta):
def reg(theta):
- norm = cp.sum_squares(theta/eta.reshape(-1,1))
- return self.q*0.5*norm*self.lam
+ norm = cp.sum_squares(theta / eta.reshape(-1, 1))
+ return self.q * 0.5 * norm * self.lam
+
return reg
+
class GroupNonCovexLqRegularizer(NonConvexLqRegularizer):
- def __init__(self, lam=1., q = 0.5, groups = None):
- super().__init__(lam = lam)
+ def __init__(self, lam=1.0, q=0.5, groups=None):
+ super().__init__(lam=lam)
self.q = q
self.groups = groups
@@ -82,43 +91,44 @@ def eval(self, theta):
val = None
for group in self.groups:
if val is None:
- val = torch.norm(theta[group])**self.q
+ val = torch.norm(theta[group]) ** self.q
else:
val += torch.norm(theta[group]) ** self.q
- return self.lam*val
+ return self.lam * val
def get_regularizer_cvxpy(self, eta):
def reg(theta):
val = None
- for i,group in enumerate(self.groups):
+ for i, group in enumerate(self.groups):
if val is None:
- val = cp.sum_squares(theta[group])/eta[i].reshape(-1,1)
+ val = cp.sum_squares(theta[group]) / eta[i].reshape(-1, 1)
else:
- val += cp.sum_squares(theta[group])/eta[i].reshape(-1,1)
- return val*self.lam
+ val += cp.sum_squares(theta[group]) / eta[i].reshape(-1, 1)
+ return val * self.lam
+
return reg
class L1Regularizer(Regularizer):
- def __init__(self, lam=1.):
- super().__init__(lam = lam)
+ def __init__(self, lam=1.0):
+ super().__init__(lam=lam)
def get_regularizer_cvxpy(self):
def reg(theta):
- return self.lam*cp.norm1(theta)
+ return self.lam * cp.norm1(theta)
+
return reg
def eval(self, theta):
- return self.lam*torch.sum(torch.abs(theta))
+ return self.lam * torch.sum(torch.abs(theta))
def hessian(self, theta):
- return self.lam * torch.eye(n = theta.size()[0]).double()
-
+ return self.lam * torch.eye(n=theta.size()[0]).double()
class GroupL1L2Regularizer(Regularizer):
- def __init__(self, lam = 1., groups = None):
+ def __init__(self, lam=1.0, groups=None):
self.groups = groups
self.lam = lam
pass
@@ -137,14 +147,16 @@ def reg(theta):
norm = cp.norm2(theta[group])
else:
norm += cp.norm2(theta[group])
- return cp.square(norm)*self.lam
+ return cp.square(norm) * self.lam
+
return reg
def hessian(self, theta):
return None
+
class NestedGroupL1Regularizer(Regularizer):
- def __init__(self, lam = 1., groups = None, weights = None):
+ def __init__(self, lam=1.0, groups=None, weights=None):
self.groups = groups
self.lam = lam
self.weights = weights
@@ -153,7 +165,7 @@ def __init__(self, lam = 1., groups = None, weights = None):
def eval(self, theta):
norm = 0
for i, group in enumerate(self.groups):
- norm += self.weights[i]*torch.sum(torch.abs(theta[group]))
+ norm += self.weights[i] * torch.sum(torch.abs(theta[group]))
return norm**2 * self.lam
def get_regularizer_cvxpy(self):
@@ -167,16 +179,17 @@ def reg(theta):
else:
norm += self.weights[i] * cp.norm1(theta[group])
- return norm*self.lam
+ return norm * self.lam
return reg
def hessian(self, theta):
return None
+
class NestedGroupL1L2Regularizer(Regularizer):
- def __init__(self, lam = 1., groups = None, weights = None):
+ def __init__(self, lam=1.0, groups=None, weights=None):
self.groups = groups
self.lam = lam
self.weights = weights
@@ -199,7 +212,7 @@ def reg(theta):
else:
norm += self.weights[i] * cp.norm2(theta[group])
- return cp.square(norm)*self.lam
+ return cp.square(norm) * self.lam
return reg
@@ -208,7 +221,7 @@ def hessian(self, theta):
class NonConvexNormRegularizer(Regularizer):
- def __init__(self, lam = 1., q = 1. , groups = None):
+ def __init__(self, lam=1.0, q=1.0, groups=None):
self.groups = groups
self.lam = lam
self.q = q
@@ -230,6 +243,6 @@ def reg(theta):
else:
norm += self.weights[i] * cp.norm2(theta[group])
- return cp.square(norm)*self.lam
+ return cp.square(norm) * self.lam
return reg
diff --git a/stpy/regularization/sdp_constraint.py b/stpy/regularization/sdp_constraint.py
index cb8d080..0c3d460 100644
--- a/stpy/regularization/sdp_constraint.py
+++ b/stpy/regularization/sdp_constraint.py
@@ -2,21 +2,23 @@
from stpy.regularization.constraints import Constraints
import cvxpy as cp
+
class SDPConstraint(Constraints):
- def __init__(self, type="trace", rank=1.):
+ def __init__(self, type="trace", rank=1.0):
super().__init__()
self.trace_constraint = None
self.lambda_max_constraint = None
self.psd_constraint = "Yes"
- self.matrix_bound = 1.
+ self.matrix_bound = 1.0
self.type = type
self.rank = rank
- self.custom_regularization= None
+ self.custom_regularization = None
self.fit_params()
+
def fit_params(self):
if self.type == "stable-rank":
self.matrix_bound = self.rank
@@ -24,12 +26,14 @@ def fit_params(self):
def get_type(self):
return self.type
- def get_constraint_cvxpy(self,A,l,s_value):
+ def get_constraint_cvxpy(self, A, l, s_value):
constraints = []
# add a classical psd constraint
if self.matrix_bound is not None:
- constraints+=[cp.trace(A) <= self.matrix_bound * l] + [cp.lambda_max(A) <= l]
+ constraints += [cp.trace(A) <= self.matrix_bound * l] + [
+ cp.lambda_max(A) <= l
+ ]
# trace regularization
if self.trace_constraint is not None:
@@ -37,13 +41,13 @@ def get_constraint_cvxpy(self,A,l,s_value):
# restrict the max eigenvalue
if s_value is not None:
- constraints += [l<=s_value]
+ constraints += [l <= s_value]
# lambda_max regularization
if self.lambda_max_constraint is not None:
constraints += [cp.lambda_max(A) <= self.lambda_max_constraint]
if self.custom_regularization is not None:
- constraints += [self.custom_regularization(A,l,s_value)]
+ constraints += [self.custom_regularization(A, l, s_value)]
- return constraints
\ No newline at end of file
+ return constraints
diff --git a/stpy/regularization/simplex_regularizer.py b/stpy/regularization/simplex_regularizer.py
index 1383812..e2b7a93 100644
--- a/stpy/regularization/simplex_regularizer.py
+++ b/stpy/regularization/simplex_regularizer.py
@@ -2,48 +2,57 @@
import cvxpy as cp
import numpy as np
import torch
+
+
class ProbabilityRegularizer(Regularizer):
def __init__(self, lam=1, w=None, d=1, **kwargs):
super().__init__(lam)
self.lam = lam
if w is None:
- self.w = torch.ones(d).double()/d
+ self.w = torch.ones(d).double() / d
else:
self.w = w
self.convex = True
self.dcp = True
self.d = d
self.name = "default"
+
+
class SupRegularizer(ProbabilityRegularizer):
- def __init__(self, constrained = False, version = '1',**kwargs):
+ def __init__(self, constrained=False, version="1", **kwargs):
super().__init__(**kwargs)
self.convex = False
self.name = "sup"
self.constrained = constrained
self.version = version
+
def get_regularizer_cvxpy(self):
pass
def get_cvxpy_objectives_constraints_variables(self, d):
if not self.constrained:
- print (d, self.w )
- objectives = [lambda x: cp.inv_pos(x[i])*self.lam/self.w[i] for i in range(d)]
+ print(d, self.w)
+ objectives = [
+ lambda x: cp.inv_pos(x[i]) * self.lam / self.w[i] for i in range(d)
+ ]
constriants = [lambda x: [] for i in range(d)]
return objectives, constriants, []
- elif self.version == '1':
- objectives = [lambda x: 0. for i in range(d)]
- #constriants = [lambda x: [cp.inv_pos(x[i])<=1/self.lam]+[cp.max(x)<=x[i]] for i in range(d)]
- constriants = [lambda x: [x[i] >= self.lam] for i in range(d)]
+ elif self.version == "1":
+ objectives = [lambda x: 0.0 for i in range(d)]
+ # constriants = [lambda x: [cp.inv_pos(x[i])<=1/self.lam]+[cp.max(x)<=x[i]] for i in range(d)]
+ constriants = [lambda x: [x[i] >= self.lam] for i in range(d)]
return objectives, constriants, []
else:
- objectives = [lambda x: 0.]
+ objectives = [lambda x: 0.0]
I = np.eye(d)
- constriants = [lambda x: [ I*self.lam*cp.sum(x) << d*cp.diag(x)]]
+ constriants = [lambda x: [I * self.lam * cp.sum(x) << d * cp.diag(x)]]
return objectives, constriants, []
+
def eval(self, theta):
- return self.lam/torch.max(self.w*theta)
+ return self.lam / torch.max(self.w * theta)
+
class DirichletRegularizer(ProbabilityRegularizer):
@@ -52,11 +61,12 @@ def __init__(self, **kwargs):
self.name = "dirichlet"
def get_regularizer_cvxpy(self):
- return lambda x: cp.sum((self.w-1)@cp.log(x)) * self.lam
+ return lambda x: cp.sum((self.w - 1) @ cp.log(x)) * self.lam
def eval(self, theta):
return self.lam / torch.sum(torch.abs(theta))
+
class WeightedAitchisonRegularizer(ProbabilityRegularizer):
def __init__(self, **kwargs):
@@ -64,21 +74,20 @@ def __init__(self, **kwargs):
self.dcp = False
self.name = "aitchison"
-
def get_regularizer_cvxpy(self):
def reg(x):
- # outer = sum([cp.log(x[j])*cp.log(x[i]) for i,j in zip(range(self.d),range(self.d)) if i!=j])
- return 2*self.lam*(cp.sum(cp.log(x)**2))
+ # outer = sum([cp.log(x[j])*cp.log(x[i]) for i,j in zip(range(self.d),range(self.d)) if i!=j])
+ return 2 * self.lam * (cp.sum(cp.log(x) ** 2))
return reg
+
def eval(self, theta):
return self.lam / torch.sum(torch.abs(theta))
class L1MeasureRegularizer(ProbabilityRegularizer):
def get_regularizer_cvxpy(self):
- return lambda x: cp.norm1(x)*self.lam
+ return lambda x: cp.norm1(x) * self.lam
def eval(self, theta):
- return self.lam/torch.sum(torch.abs(theta))
-
+ return self.lam / torch.sum(torch.abs(theta))
diff --git a/stpy/sampling/hmc.py b/stpy/sampling/hmc.py
index 879fd17..1e6ce13 100644
--- a/stpy/sampling/hmc.py
+++ b/stpy/sampling/hmc.py
@@ -1,5 +1,7 @@
-params_hmc = hamiltorch.sample(log_prob_func=log_prob_func,
- params_init=params_init,
- num_samples=num_samples,
- step_size=step_size,
- num_steps_per_sample=num_steps_per_sample)
+params_hmc = hamiltorch.sample(
+ log_prob_func=log_prob_func,
+ params_init=params_init,
+ num_samples=num_samples,
+ step_size=step_size,
+ num_steps_per_sample=num_steps_per_sample,
+)
diff --git a/stpy/sampling/langevin.py b/stpy/sampling/langevin.py
index c7255b7..1914ae5 100644
--- a/stpy/sampling/langevin.py
+++ b/stpy/sampling/langevin.py
@@ -2,25 +2,30 @@
import torch
import scipy
-class LangevinSampler():
- def __init__(self, verbose = False):
- self.verbose = verbose
- pass
+class LangevinSampler:
- def calculate(self, HessianF,theta0):
- W = HessianF(theta0)
- L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3))
- return L
+ def __init__(self, verbose=False):
+ self.verbose = verbose
+ pass
- def sample(self, F, nablaF, HessianF, theta0, steps = 100):
- L = self.calculate(HessianF, theta0)
- eta = 0.5 / (L + 1)
- m = theta0.size()[0]
- theta = theta0
- for k in range(steps):
- w = torch.randn(size=(m, 1)).double()
- theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w
- if self.verbose == True:
- print("Iter:", k, theta.T)
- return theta
\ No newline at end of file
+ def calculate(self, HessianF, theta0):
+ W = HessianF(theta0)
+ L = float(
+ scipy.sparse.linalg.eigsh(
+ W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3
+ )
+ )
+ return L
+
+ def sample(self, F, nablaF, HessianF, theta0, steps=100):
+ L = self.calculate(HessianF, theta0)
+ eta = 0.5 / (L + 1)
+ m = theta0.size()[0]
+ theta = theta0
+ for k in range(steps):
+ w = torch.randn(size=(m, 1)).double()
+ theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w
+ if self.verbose == True:
+ print("Iter:", k, theta.T)
+ return theta
diff --git a/stpy/sampling/proximal_langevin.py b/stpy/sampling/proximal_langevin.py
index 2cf69c1..322ccab 100644
--- a/stpy/sampling/proximal_langevin.py
+++ b/stpy/sampling/proximal_langevin.py
@@ -2,20 +2,26 @@
import torch
import numpy as np
+
def ProximalLangevin(LangevinSampler):
- def sample(self, F, nablaF, HessianF, theta0, prox, steps = 100):
- L = self.calculate(HessianF, theta0)
- eta = 0.5 / (L + 1)
- m = theta0.size()[0]
- theta = theta0
- for k in range(steps):
- w = torch.randn(size=(m, 1)).double()
- theta = (1 - eta) * theta - eta * nablaF(theta) + eta * prox(theta) + np.sqrt(2 * eta) * w
- if self.verbose == True:
- print("Iter:", k, theta.T)
- return prox(theta)
+ def sample(self, F, nablaF, HessianF, theta0, prox, steps=100):
+ L = self.calculate(HessianF, theta0)
+ eta = 0.5 / (L + 1)
+ m = theta0.size()[0]
+ theta = theta0
+ for k in range(steps):
+ w = torch.randn(size=(m, 1)).double()
+ theta = (
+ (1 - eta) * theta
+ - eta * nablaF(theta)
+ + eta * prox(theta)
+ + np.sqrt(2 * eta) * w
+ )
+ if self.verbose == True:
+ print("Iter:", k, theta.T)
+ return prox(theta)
def MirrorLangevin(LangvinSampler):
- pass
\ No newline at end of file
+ pass
diff --git a/stpy/sampling/sampling_helper.py b/stpy/sampling/sampling_helper.py
index 6cbef04..0024a3a 100644
--- a/stpy/sampling/sampling_helper.py
+++ b/stpy/sampling/sampling_helper.py
@@ -1,58 +1,56 @@
-
-
import torch
import numpy as np
import matplotlib.pyplot as plt
-def get_increment(eta, steps, f, w0, path = False):
- """
- :param eta: terminal time
- :param steps: number of steps
- :param f: the operator
- :param w0: initial point
- :return:
- """
+def get_increment(eta, steps, f, w0, path=False):
+ """
- tau = eta/steps
- w = w0
- sequence = []
+ :param eta: terminal time
+ :param steps: number of steps
+ :param f: the operator
+ :param w0: initial point
+ :return:
+ """
- for i in range(steps):
+ tau = eta / steps
+ w = w0
+ sequence = []
+ for i in range(steps):
- n = torch.randn(size = w0.size()).double()
- w = w + np.sqrt(2*tau)*f(w,n)
- if path:
- sequence.append(w)
+ n = torch.randn(size=w0.size()).double()
+ w = w + np.sqrt(2 * tau) * f(w, n)
+ if path:
+ sequence.append(w)
- if path:
- return sequence
- else:
- return w
+ if path:
+ return sequence
+ else:
+ return w
-if __name__ == "__main__":
- f = lambda w: torch.diag(1./torch.abs(w.view(-1)))
- d = 1
- w0 = torch.zeros(size = (d,1)).double() + 2
- step = 100
- path = get_increment(2, step, f, w0, path = True)
- #plt.plot(path)
-
- i = 0
- colors = ['k','r','b','orange','brown','purple']
- for steps in [5,10,20,100,200,500]:
-
- repeats = 100
- ws = []
- for _ in range(repeats):
- path = get_increment(2,steps,f,w0, path = True)
- xtest = torch.linspace(0,2,steps)
- plt.plot(xtest, path, color = colors[i])
- i = i + 1
- # plt.hist(np.array(ws), label = str(step))
-
- plt.legend()
- plt.show()
+if __name__ == "__main__":
+ f = lambda w: torch.diag(1.0 / torch.abs(w.view(-1)))
+ d = 1
+ w0 = torch.zeros(size=(d, 1)).double() + 2
+ step = 100
+ path = get_increment(2, step, f, w0, path=True)
+ # plt.plot(path)
+
+ i = 0
+ colors = ["k", "r", "b", "orange", "brown", "purple"]
+ for steps in [5, 10, 20, 100, 200, 500]:
+
+ repeats = 100
+ ws = []
+ for _ in range(repeats):
+ path = get_increment(2, steps, f, w0, path=True)
+ xtest = torch.linspace(0, 2, steps)
+ plt.plot(xtest, path, color=colors[i])
+ i = i + 1
+ # plt.hist(np.array(ws), label = str(step))
+
+ plt.legend()
+ plt.show()
diff --git a/stpy/test_functions/benchmarks.py b/stpy/test_functions/benchmarks.py
index 9cd2c47..8432ca4 100755
--- a/stpy/test_functions/benchmarks.py
+++ b/stpy/test_functions/benchmarks.py
@@ -6,513 +6,566 @@
from stpy.continuous_processes.gauss_procc import GaussianProcess
-class BenchmarkFunction():
-
- def __init__(self, type="discrete", d=1, gamma=1.0, dts=None, **kwargs):
- self.scale = 1.0
- self.type = type
- self.gamma = gamma
- self.d = d
- self.dts = None
- self.groups = None
-
- def eval_noiseless(self, X):
- if X.size()[1] != self.d:
- raise AssertionError("Invalid dimension for the Benchmark function ...")
- pass
-
- def eval(self, X, sigma=None):
- z = self.eval_noiseless(X)
- if sigma is None:
- y = z/self.scale + self.s * torch.randn(X.size()[0], 1, dtype=torch.float64)
- else:
- y = z/self.scale + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64)
- return y
-
- def optimum(self):
- return 1.0
-
- def maximum(self, xtest=None):
- if self.type == "discrete":
- self.max = self.maximum_discrete(xtest)
- else:
- self.max = self.maximum_continuous()
- return self.max
-
- def maximum_discrete(self, xtest):
- maximum =torch.max(self.eval_noiseless(xtest))
- return maximum
-
- def maximum_continuous(self):
- return 1.0
-
- def scale_max(self, xtest=None):
- self.scale = self.maximum(xtest=xtest)
- print("Scaling with", self.scale)
-
- def optimize(self, xtest, sigma, restarts=5):
- (n, d) = xtest.size()
- ytest = self.eval(xtest, sigma=sigma)
- kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(d, dtype=torch.float64) * 0.1,
- groups=self.groups)
- GP = stpy.continuous_processes.gauss_procc.GaussianProcess(kernel_custom=kernel, s=sigma, d=d)
- GP.fit(xtest, ytest)
- GP.optimize_params(type="bandwidth", restarts=restarts)
- print("Optimized")
- # GP.visualize(xtest)
- self.gamma = torch.min(kernel.gamma)
- return self.gamma
-
- def return_params(self):
- return (self.gamma, self.groups, self.d)
-
- def bandwidth(self):
- return self.gamma
-
- def set_group_param(self, groups):
- self.groups = groups
-
- def bounds(self):
- b = tuple([(-0.5, 0.5) for i in range(self.d)])
- return b
-
- def initial_guess(self, N, adv_inv=False):
- if adv_inv == False:
- x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.d)))
- else:
- x = torch.from_numpy(np.random.uniform(-0.5, 0., size=(N, self.d)))
- return x
-
- def interval(self, n, L_infinity_ball=0.5):
- if n == None:
- xtest = None
- else:
- xtest = torch.from_numpy(stpy.helpers.helper.interval(n, self.d, L_infinity_ball=L_infinity_ball))
- return xtest
-
- def visualize(self, xtest):
- import matplotlib.pyplot as plt
- d = xtest.size()[1]
- if d == 1:
- plt.figure(figsize=(15, 7))
- plt.clf()
- plt.plot(xtest.numpy(), self.eval_noiseless(xtest)[:, 0].numpy())
- plt.show()
- elif d == 2:
- from scipy.interpolate import griddata
- plt.figure(figsize=(15, 7))
- plt.clf()
- ax = plt.axes(projection='3d')
- xx = xtest[:, 0].numpy()
- yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z = griddata((xx, yy), self.eval_noiseless(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear')
- ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4)
- plt.show()
+class BenchmarkFunction:
+
+ def __init__(self, type="discrete", d=1, gamma=1.0, dts=None, **kwargs):
+ self.scale = 1.0
+ self.type = type
+ self.gamma = gamma
+ self.d = d
+ self.dts = None
+ self.groups = None
+
+ def eval_noiseless(self, X):
+ if X.size()[1] != self.d:
+ raise AssertionError("Invalid dimension for the Benchmark function ...")
+ pass
+
+ def eval(self, X, sigma=None):
+ z = self.eval_noiseless(X)
+ if sigma is None:
+ y = z / self.scale + self.s * torch.randn(
+ X.size()[0], 1, dtype=torch.float64
+ )
+ else:
+ y = z / self.scale + sigma * torch.randn(
+ X.size()[0], 1, dtype=torch.float64
+ )
+ return y
+
+ def optimum(self):
+ return 1.0
+
+ def maximum(self, xtest=None):
+ if self.type == "discrete":
+ self.max = self.maximum_discrete(xtest)
+ else:
+ self.max = self.maximum_continuous()
+ return self.max
+
+ def maximum_discrete(self, xtest):
+ maximum = torch.max(self.eval_noiseless(xtest))
+ return maximum
+
+ def maximum_continuous(self):
+ return 1.0
+
+ def scale_max(self, xtest=None):
+ self.scale = self.maximum(xtest=xtest)
+ print("Scaling with", self.scale)
+
+ def optimize(self, xtest, sigma, restarts=5):
+ (n, d) = xtest.size()
+ ytest = self.eval(xtest, sigma=sigma)
+ kernel = stpy.kernels.KernelFunction(
+ kernel_name="ard",
+ gamma=torch.ones(d, dtype=torch.float64) * 0.1,
+ groups=self.groups,
+ )
+ GP = stpy.continuous_processes.gauss_procc.GaussianProcess(
+ kernel_custom=kernel, s=sigma, d=d
+ )
+ GP.fit(xtest, ytest)
+ GP.optimize_params(type="bandwidth", restarts=restarts)
+ print("Optimized")
+ # GP.visualize(xtest)
+ self.gamma = torch.min(kernel.gamma)
+ return self.gamma
+
+ def return_params(self):
+ return (self.gamma, self.groups, self.d)
+
+ def bandwidth(self):
+ return self.gamma
+
+ def set_group_param(self, groups):
+ self.groups = groups
+
+ def bounds(self):
+ b = tuple([(-0.5, 0.5) for i in range(self.d)])
+ return b
+
+ def initial_guess(self, N, adv_inv=False):
+ if adv_inv == False:
+ x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.d)))
+ else:
+ x = torch.from_numpy(np.random.uniform(-0.5, 0.0, size=(N, self.d)))
+ return x
+
+ def interval(self, n, L_infinity_ball=0.5):
+ if n == None:
+ xtest = None
+ else:
+ xtest = torch.from_numpy(
+ stpy.helpers.helper.interval(n, self.d, L_infinity_ball=L_infinity_ball)
+ )
+ return xtest
+
+ def visualize(self, xtest):
+ import matplotlib.pyplot as plt
+
+ d = xtest.size()[1]
+ if d == 1:
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ plt.plot(xtest.numpy(), self.eval_noiseless(xtest)[:, 0].numpy())
+ plt.show()
+ elif d == 2:
+ from scipy.interpolate import griddata
+
+ plt.figure(figsize=(15, 7))
+ plt.clf()
+ ax = plt.axes(projection="3d")
+ xx = xtest[:, 0].numpy()
+ yy = xtest[:, 1].numpy()
+ grid_x, grid_y = np.mgrid[
+ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j
+ ]
+ grid_z = griddata(
+ (xx, yy),
+ self.eval_noiseless(xtest)[:, 0].numpy(),
+ (grid_x, grid_y),
+ method="linear",
+ )
+ ax.plot_surface(grid_x, grid_y, grid_z, color="b", alpha=0.4)
+ plt.show()
class CamelbackBenchmark(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.d = 2
-
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- xx = X[:, 0] * 4
- yy = X[:, 1] * 2
- y = (4. - 2.1 * xx ** 2 + (xx ** 4) / 3.) * (xx ** 2) + xx * yy + (-4. + 4 * (yy ** 2)) * (yy ** 2)
- y = -y.view(X.size()[0], 1)
- # y = np.tanh(y)
- y = y / 5.
- return y / self.scale
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.d = 2
+
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ xx = X[:, 0] * 4
+ yy = X[:, 1] * 2
+ y = (
+ (4.0 - 2.1 * xx**2 + (xx**4) / 3.0) * (xx**2)
+ + xx * yy
+ + (-4.0 + 4 * (yy**2)) * (yy**2)
+ )
+ y = -y.view(X.size()[0], 1)
+ # y = np.tanh(y)
+ y = y / 5.0
+ return y / self.scale
# def optimize(self,xtest,sigma, restarts = 5):
# self.gamma = 0.3
+
# self.gamma = 0.3
class QuadraticBenchmark(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.d = kwargs['d']
- self.type = "continuous"
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.d = kwargs["d"]
+ self.type = "continuous"
- if 'R' in kwargs:
- self.R = kwargs['R']
- print("Quadratic Problem: Rotating - no longer additive.")
- print(self.R)
- else:
- self.R = torch.eye(self.d, self.d, dtype=torch.float64)
- print("Quadratic Problem: Additive.")
+ if "R" in kwargs:
+ self.R = kwargs["R"]
+ print("Quadratic Problem: Rotating - no longer additive.")
+ print(self.R)
+ else:
+ self.R = torch.eye(self.d, self.d, dtype=torch.float64)
+ print("Quadratic Problem: Additive.")
- def eval_noiseless(self, X):
- D = torch.diag(torch.Tensor([1., 2.]).double())
- super().eval_noiseless(X)
- (n, d) = X.size()
- X = X @ self.R
- sum_ = torch.sum((X @ D) ** 2, dim=1)
- print(sum_.size())
- return -sum_.view(-1, 1) / self.scale + 1
+ def eval_noiseless(self, X):
+ D = torch.diag(torch.tensor([1.0, 2.0]).double())
+ super().eval_noiseless(X)
+ (n, d) = X.size()
+ X = X @ self.R
+ sum_ = torch.sum((X @ D) ** 2, dim=1)
+ print(sum_.size())
+ return -sum_.view(-1, 1) / self.scale + 1
- def bandwidth(self):
- return 0.2
+ def bandwidth(self):
+ return 0.2
class PolynomialBenchmark(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.d = kwargs['d']
- self.type = "continuous"
-
- if 'R' in kwargs:
- self.R = kwargs['R']
- print("Quadratic Problem: Rotating - no longer additive.")
- print(self.R)
- else:
- self.R = torch.eye(self.d, self.d, dtype=torch.float64)
- print("Quadratic Problem: Additive.")
-
- def eval_noiseless(self, X):
- D = torch.diag(torch.Tensor([1., 2.]).double())
- super().eval_noiseless(X)
- (n, d) = X.size()
- X = X @ self.R
- sum_ = torch.sum((X @ D) ** 2, dim=1) + torch.sum((X @ D) ** 3, dim=1) * 0.5 + torch.sum((X @ D) ** 4, dim=1)
- print(sum_.size())
- return -sum_.view(-1, 1) / self.scale + 1
-
- def bandwidth(self):
- return 0.2
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.d = kwargs["d"]
+ self.type = "continuous"
+
+ if "R" in kwargs:
+ self.R = kwargs["R"]
+ print("Quadratic Problem: Rotating - no longer additive.")
+ print(self.R)
+ else:
+ self.R = torch.eye(self.d, self.d, dtype=torch.float64)
+ print("Quadratic Problem: Additive.")
+
+ def eval_noiseless(self, X):
+ D = torch.diag(torch.tensor([1.0, 2.0]).double())
+ super().eval_noiseless(X)
+ (n, d) = X.size()
+ X = X @ self.R
+ sum_ = (
+ torch.sum((X @ D) ** 2, dim=1)
+ + torch.sum((X @ D) ** 3, dim=1) * 0.5
+ + torch.sum((X @ D) ** 4, dim=1)
+ )
+ print(sum_.size())
+ return -sum_.view(-1, 1) / self.scale + 1
+
+ def bandwidth(self):
+ return 0.2
class MichalBenchmark(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.d = kwargs['d']
- self.type = "continuous"
-
- if 'R' in kwargs:
- self.R = kwargs['R']
- print("Michal Problem: Rotating - no longer additive.")
- print(self.R)
- else:
- self.R = torch.eye(self.d, self.d, dtype=torch.float64)
- print("Michal Problem: Additive.")
-
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- (n, d) = X.size()
- X = X @ self.R
- X = X / 0.75
- X = (X + 0.5) * np.pi
- ar = torch.from_numpy(np.arange(1, d + 1, 1, dtype=np.float64))
- sum_ = torch.sin(X) * torch.pow(torch.sin(ar * X / np.pi), int(2 * d))
- sum_ = torch.sum(sum_, dim=1).view(-1, 1)
- return sum_ / self.scale
-
- def optimize(self, xtest, sigma, restarts=5, n=512):
- xtest = torch.zeros(n, self.d, dtype=torch.float64)
- xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64)
- ytest = self.eval(xtest, sigma=sigma)
- kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1,
- groups=self.groups)
- GP = GaussianProcess(kernel=kernel, s=sigma, d=self.d)
- GP.fit_gp(xtest, ytest)
- #GP.optimize_params(type="bandwidth", restarts=restarts)
- #print("Optimized")
- #GP.back_prop
- self.gamma = torch.min(kernel.gamma)
- return self.gamma
-
- def bandwidth(self):
- return 0.2
-
- def maximum_continuous(self):
- opt = np.ones(shape=(20))
- # holds with different constnat
- opt[0] = 2.93254
- opt[1] = 2.34661
- opt[2] = 1.64107
- opt[3] = 1.24415
- opt[4] = 0.999643
- opt[5] = 0.834879
- opt[6] = 2.1089
- opt[7] = 1.84835
- opt[8] = 1.64448
- opt[9] = 1.48089
- opt[10] = 1.34678
- opt[11] = 1.2349
- opt[12] = 1.89701
- opt[13] = 1.76194
- opt[14] = 1.64477
- opt[15] = 1.54218
- opt[16] = 1.45162
- opt[17] = 1.37109
- opt[18] = 1.81774
- return float(opt[self.d])
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.d = kwargs["d"]
+ self.type = "continuous"
+
+ if "R" in kwargs:
+ self.R = kwargs["R"]
+ print("Michal Problem: Rotating - no longer additive.")
+ print(self.R)
+ else:
+ self.R = torch.eye(self.d, self.d, dtype=torch.float64)
+ print("Michal Problem: Additive.")
+
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ (n, d) = X.size()
+ X = X @ self.R
+ X = X / 0.75
+ X = (X + 0.5) * np.pi
+ ar = torch.from_numpy(np.arange(1, d + 1, 1, dtype=np.float64))
+ sum_ = torch.sin(X) * torch.pow(torch.sin(ar * X / np.pi), int(2 * d))
+ sum_ = torch.sum(sum_, dim=1).view(-1, 1)
+ return sum_ / self.scale
+
+ def optimize(self, xtest, sigma, restarts=5, n=512):
+ xtest = torch.zeros(n, self.d, dtype=torch.float64)
+ xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64)
+ ytest = self.eval(xtest, sigma=sigma)
+ kernel = stpy.kernels.KernelFunction(
+ kernel_name="ard",
+ gamma=torch.ones(self.d, dtype=torch.float64) * 0.1,
+ groups=self.groups,
+ )
+ GP = GaussianProcess(kernel=kernel, s=sigma, d=self.d)
+ GP.fit_gp(xtest, ytest)
+ # GP.optimize_params(type="bandwidth", restarts=restarts)
+ # print("Optimized")
+ # GP.back_prop
+ self.gamma = torch.min(kernel.gamma)
+ return self.gamma
+
+ def bandwidth(self):
+ return 0.2
+
+ def maximum_continuous(self):
+ opt = np.ones(shape=(20))
+ # holds with different constnat
+ opt[0] = 2.93254
+ opt[1] = 2.34661
+ opt[2] = 1.64107
+ opt[3] = 1.24415
+ opt[4] = 0.999643
+ opt[5] = 0.834879
+ opt[6] = 2.1089
+ opt[7] = 1.84835
+ opt[8] = 1.64448
+ opt[9] = 1.48089
+ opt[10] = 1.34678
+ opt[11] = 1.2349
+ opt[12] = 1.89701
+ opt[13] = 1.76194
+ opt[14] = 1.64477
+ opt[15] = 1.54218
+ opt[16] = 1.45162
+ opt[17] = 1.37109
+ opt[18] = 1.81774
+ return float(opt[self.d])
class StybTangBenchmark(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.d = kwargs['d']
- self.type = "discrete"
- if 'R' in kwargs:
- self.R = kwargs['R']
- print("Stybtang Problem: Rotating - no longer additive.")
- print(self.R)
- else:
- self.R = torch.eye(self.d, self.d, dtype=torch.float64)
- print("Stybtang Problem: Additive")
-
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- (n, d) = X.size()
- X = X @ self.R
- X = X * 8
- Y = X ** 2
- sum_ = torch.sum(Y ** 2 - 16. * Y + 5 * X, dim=1).view(-1, 1)
- return -(0.5 * sum_ / (d * 200.) + 0.5)/self.scale
-
- # def maximum_continuous(self):
- # opt = np.ones(shape=(self.d)) * (-2.9035) / 8
- # opt = torch.from_numpy(opt.reshape(1, -1))
- # value = self.eval_noiseless(opt)[0][0] * 16
- # return value
- #
- # def optimize(self, xtest, sigma, restarts=5, n=512):
- # xtest = torch.zeros(n, self.d, dtype=torch.float64)
- # xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64)
- # ytest = self.eval(xtest, sigma=sigma)
- # kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1,
- # groups=self.groups)
- # GP = GaussianProcess(kernel_custom=kernel, s=sigma, d=self.d)
- # GP.fit(xtest, ytest)
- # GP.optimize_params(type="bandwidth", restarts=restarts)
- # print("Optimized")
- # self.gamma = torch.min(kernel.gamma)
- # return self.gamma
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.d = kwargs["d"]
+ self.type = "discrete"
+ if "R" in kwargs:
+ self.R = kwargs["R"]
+ print("Stybtang Problem: Rotating - no longer additive.")
+ print(self.R)
+ else:
+ self.R = torch.eye(self.d, self.d, dtype=torch.float64)
+ print("Stybtang Problem: Additive")
+
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ (n, d) = X.size()
+ X = X @ self.R
+ X = X * 8
+ Y = X**2
+ sum_ = torch.sum(Y**2 - 16.0 * Y + 5 * X, dim=1).view(-1, 1)
+ return -(0.5 * sum_ / (d * 200.0) + 0.5) / self.scale
+
+ # def maximum_continuous(self):
+ # opt = np.ones(shape=(self.d)) * (-2.9035) / 8
+ # opt = torch.from_numpy(opt.reshape(1, -1))
+ # value = self.eval_noiseless(opt)[0][0] * 16
+ # return value
+ #
+ # def optimize(self, xtest, sigma, restarts=5, n=512):
+ # xtest = torch.zeros(n, self.d, dtype=torch.float64)
+ # xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64)
+ # ytest = self.eval(xtest, sigma=sigma)
+ # kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1,
+ # groups=self.groups)
+ # GP = GaussianProcess(kernel_custom=kernel, s=sigma, d=self.d)
+ # GP.fit(xtest, ytest)
+ # GP.optimize_params(type="bandwidth", restarts=restarts)
+ # print("Optimized")
+ # self.gamma = torch.min(kernel.gamma)
+ # return self.gamma
+
class GeneralizedAdditiveOverlap(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.d = kwargs['d']
- self.type = "continuous"
-
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- (n, d) = X.size()
- sum_ = torch.sum(torch.exp(-(torch.from_numpy(np.diff(X.numpy(), axis=1) / 0.25)) ** 2), dim=1).view(-1, 1)
- return 0.5 * sum_ / self.scale
-
- def maximum_continuous(self):
- opt = torch.from_numpy(np.zeros(shape=(1, self.d)))
- value = self.eval_noiseless(opt)[0][0]
- return value
-
- def optimize(self, xtest, sigma, restarts=5, n=512):
- xtest = torch.zeros(n, self.d, dtype=torch.float64)
- xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64)
- ytest = self.eval(xtest, sigma=sigma)
- kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1,
- groups=self.groups)
- GP = stpy.continuous_processes.gauss_procc.GaussianProcess(kernel_custom=kernel, s=sigma, d=self.d)
- GP.fit(xtest, ytest)
- GP.optimize_params(type="bandwidth", restarts=restarts)
- print("Optimized")
- # self.gamma = torch.min(kernel.gamma)
- # self.gamma = torch.zeros(1,1,dtype = torch.DoubleTensor)
- # self.gamma[0,0] =0.35
- self.gamma = torch.Tensor([0.35]).double()
- return self.gamma
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.d = kwargs["d"]
+ self.type = "continuous"
+
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ (n, d) = X.size()
+ sum_ = torch.sum(
+ torch.exp(-((torch.from_numpy(np.diff(X.numpy(), axis=1) / 0.25)) ** 2)),
+ dim=1,
+ ).view(-1, 1)
+ return 0.5 * sum_ / self.scale
+
+ def maximum_continuous(self):
+ opt = torch.from_numpy(np.zeros(shape=(1, self.d)))
+ value = self.eval_noiseless(opt)[0][0]
+ return value
+
+ def optimize(self, xtest, sigma, restarts=5, n=512):
+ xtest = torch.zeros(n, self.d, dtype=torch.float64)
+ xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64)
+ ytest = self.eval(xtest, sigma=sigma)
+ kernel = stpy.kernels.KernelFunction(
+ kernel_name="ard",
+ gamma=torch.ones(self.d, dtype=torch.float64) * 0.1,
+ groups=self.groups,
+ )
+ GP = stpy.continuous_processes.gauss_procc.GaussianProcess(
+ kernel_custom=kernel, s=sigma, d=self.d
+ )
+ GP.fit(xtest, ytest)
+ GP.optimize_params(type="bandwidth", restarts=restarts)
+ print("Optimized")
+ # self.gamma = torch.min(kernel.gamma)
+ # self.gamma = torch.zeros(1,1,dtype = torch.DoubleTensor)
+ # self.gamma[0,0] =0.35
+ self.gamma = torch.tensor([0.35]).double()
+ return self.gamma
class SwissFEL(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.d = kwargs['d']
- name = kwargs['dts']
- self.Simulator = FelSimulator(self.d, 0.0, "quadrupoles_2d")
- self.Simulator.load_fresh(name, dts='0')
- #self.groups = stpy.helpers.helper.full_group(self.d)
- GP = GaussianProcess(kernel_name="ard", d = self.d)
- self.Simulator.fit_simulator(GP, optimize="bandwidth", restarts=2)
- self.type = "continuous"
- self.s = self.Simulator.s
-
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- y = self.Simulator.eval(X, sigma=0)
- return y
-
- def maximum(self, xtest=None):
- return torch.max(self.Simulator.eval(xtest,sigma = 0))
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.d = kwargs["d"]
+ name = kwargs["dts"]
+ self.Simulator = FelSimulator(self.d, 0.0, "quadrupoles_2d")
+ self.Simulator.load_fresh(name, dts="0")
+ # self.groups = stpy.helpers.helper.full_group(self.d)
+ GP = GaussianProcess(kernel_name="ard", d=self.d)
+ self.Simulator.fit_simulator(GP, optimize="bandwidth", restarts=2)
+ self.type = "continuous"
+ self.s = self.Simulator.s
+
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ y = self.Simulator.eval(X, sigma=0)
+ return y
+
+ def maximum(self, xtest=None):
+ return torch.max(self.Simulator.eval(xtest, sigma=0))
class CustomBenchmark(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- if 'func' in kwargs:
- self.eval_f = kwargs['func']
- else:
- self.eval_f = lambda x: x[:, 0].view(-1, 1) * 0
- if 'likelihood' in kwargs:
- self.likelihood = kwargs['likelihood']
- else:
- self.likelihood = None
-
- def set_eval(self, f, scale=1.):
- self.eval_f = f
- self.scale = scale
-
- def eval_noiseless(self, X):
- #super().eval_noiseless(X)
- y = self.eval_f(X)
- return y / self.scale
-
- def eval(self, X):
- if self.likelihood is not None:
- return self.eval_noiseless(X)+self.likelihood.sample_noise(X)
- else:
- return self.eval_noiseless(X)
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ if "func" in kwargs:
+ self.eval_f = kwargs["func"]
+ else:
+ self.eval_f = lambda x: x[:, 0].view(-1, 1) * 0
+ if "likelihood" in kwargs:
+ self.likelihood = kwargs["likelihood"]
+ else:
+ self.likelihood = None
+
+ def set_eval(self, f, scale=1.0):
+ self.eval_f = f
+ self.scale = scale
+
+ def eval_noiseless(self, X):
+ # super().eval_noiseless(X)
+ y = self.eval_f(X)
+ return y / self.scale
+
+ def eval(self, X):
+ if self.likelihood is not None:
+ return self.eval_noiseless(X) + self.likelihood.sample_noise(X)
+ else:
+ return self.eval_noiseless(X)
+
class GaussianProcessSample(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__()
- self.d = kwargs['d']
- self.kernel_name = kwargs['name']
- self.gamma = kwargs['gamma']
- self.sigma = kwargs['sigma']
- self.n = kwargs['n']
- self.sample(self.n)
-
- def sample(self, n):
- self.xtest = self.interval(n)
- GP = stpy.continuous_processes.gauss_procc.GaussianProcess(s=self.sigma, gamma=self.gamma,
- kernel=self.kernel_name)
- self.sample = GP.sample(self.xtest).numpy()
-
- def isin(self, element, test_elements, assume_unique=False):
- (n, d) = element.shape
- (m, d) = test_elements.shape
- maskFull = np.full((n), False, dtype=bool)
- for j in range(m):
- mask = np.full((n), True, dtype=bool)
- for i in range(d):
- mask = np.logical_and(mask, np.in1d(element[:, i], test_elements[j, i], assume_unique=assume_unique))
- # mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-02))
- # print (j, i, mask)
- maskFull = np.logical_or(mask, maskFull)
- # print (maskFull)
- return maskFull
-
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- mask = self.isin(self.xtest.numpy(), X.numpy())
- y = torch.from_numpy(self.sample[mask, :]).view(-1, 1)
- return y / self.scale
-
- def initial_guess(self, N, adv_inv=False):
- x = self.xtest[np.random.permutation(np.arange(0, self.xtest.size()[0], 1))[0:N], :]
- x = torch.sort(x, dim=0)[0]
- return x
-
- def scale_max(self, xtest=None):
- pass
-
- def optimize(self, xtest, sigma, restarts=5):
- pass
+ def __init__(self, **kwargs):
+ super().__init__()
+ self.d = kwargs["d"]
+ self.kernel_name = kwargs["name"]
+ self.gamma = kwargs["gamma"]
+ self.sigma = kwargs["sigma"]
+ self.n = kwargs["n"]
+ self.sample(self.n)
+
+ def sample(self, n):
+ self.xtest = self.interval(n)
+ GP = stpy.continuous_processes.gauss_procc.GaussianProcess(
+ s=self.sigma, gamma=self.gamma, kernel=self.kernel_name
+ )
+ self.sample = GP.sample(self.xtest).numpy()
+
+ def isin(self, element, test_elements, assume_unique=False):
+ (n, d) = element.shape
+ (m, d) = test_elements.shape
+ maskFull = np.full((n), False, dtype=bool)
+ for j in range(m):
+ mask = np.full((n), True, dtype=bool)
+ for i in range(d):
+ mask = np.logical_and(
+ mask,
+ np.in1d(
+ element[:, i], test_elements[j, i], assume_unique=assume_unique
+ ),
+ )
+ # mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-02))
+ # print (j, i, mask)
+ maskFull = np.logical_or(mask, maskFull)
+ # print (maskFull)
+ return maskFull
+
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ mask = self.isin(self.xtest.numpy(), X.numpy())
+ y = torch.from_numpy(self.sample[mask, :]).view(-1, 1)
+ return y / self.scale
+
+ def initial_guess(self, N, adv_inv=False):
+ x = self.xtest[
+ np.random.permutation(np.arange(0, self.xtest.size()[0], 1))[0:N], :
+ ]
+ x = torch.sort(x, dim=0)[0]
+ return x
+
+ def scale_max(self, xtest=None):
+ pass
+
+ def optimize(self, xtest, sigma, restarts=5):
+ pass
class KernelizedSample(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__()
- self.d = kwargs['d']
- # self.kernel_name = kwargs['name']
- # self.gamma = kwargs['gamma']
- self.sigma = kwargs['sigma']
- # self.n = kwargs['n']
- self.embed = kwargs['embed']
- self.m = kwargs['m']
- self.sample()
+ def __init__(self, **kwargs):
+ super().__init__()
+ self.d = kwargs["d"]
+ # self.kernel_name = kwargs['name']
+ # self.gamma = kwargs['gamma']
+ self.sigma = kwargs["sigma"]
+ # self.n = kwargs['n']
+ self.embed = kwargs["embed"]
+ self.m = kwargs["m"]
+ self.sample()
- def set_theta(self, theta):
- self.theta = theta
+ def set_theta(self, theta):
+ self.theta = theta
- def set_cutoff(self, cutoff):
- self.theta[cutoff:, 0] = 0
+ def set_cutoff(self, cutoff):
+ self.theta[cutoff:, 0] = 0
- def sample(self):
- print("basis size:", self.m)
- GP = stpy.continuous_processes.kernelized_features.KernelizedFeatures(d=self.d, m=self.m, embeding=self.embed)
- self.theta = GP.sample_theta(size=1)
- print(self.theta)
+ def sample(self):
+ print("basis size:", self.m)
+ GP = stpy.continuous_processes.kernelized_features.KernelizedFeatures(
+ d=self.d, m=self.m, embeding=self.embed
+ )
+ self.theta = GP.sample_theta(size=1)
+ print(self.theta)
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- y = torch.mm(self.embed(X), self.theta)
- return y / self.scale
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ y = torch.mm(self.embed(X), self.theta)
+ return y / self.scale
- def scale_max(self, xtest=None):
- pass
+ def scale_max(self, xtest=None):
+ pass
- def optimize(self, xtest, sigma, restarts=5):
- pass
+ def optimize(self, xtest, sigma, restarts=5):
+ pass
class Simple1DFunction(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__()
- self.d = kwargs['d']
+ def __init__(self, **kwargs):
+ super().__init__()
+ self.d = kwargs["d"]
+
+ def eval_noiseless(self, X):
+ super().eval_noiseless(X)
+ z = (X + 0.5) * 1.2
+ y = -(1.4 - 3 * z) * torch.sin(18 * z)
+ return y
- def eval_noiseless(self, X):
- super().eval_noiseless(X)
- z = (X+0.5)*1.2
- y = -(1.4-3*z)*torch.sin(18*z)
- return y
+ def maximum(self, xtest):
+ return torch.max(torch.abs(self.eval_noiseless(xtest)))
- def maximum(self, xtest):
- return torch.max(torch.abs(self.eval_noiseless(xtest)))
class MultiRKHS(BenchmarkFunction):
- def __init__(self, **kwargs):
- super().__init__()
- self.d = 1
+ def __init__(self, **kwargs):
+ super().__init__()
+ self.d = 1
- def eval_noiseless(self, X):
- y = 10 * X ** 2 # + 0.1*torch.sin(10*X) #+ torch.sum(torch.exp(-(X-Xi)**2)*Wi)
- return y
+ def eval_noiseless(self, X):
+ y = 10 * X**2 # + 0.1*torch.sin(10*X) #+ torch.sum(torch.exp(-(X-Xi)**2)*Wi)
+ return y
- def maximum(self, xtest=None):
- pass
+ def maximum(self, xtest=None):
+ pass
class LinearBenchmark(BenchmarkFunction):
- def __init__(self, d, s):
- self.d = d
- self.s = s
- # sample a plane
- self.theta = torch.randn(d, 1, dtype=torch.float64)
-
- def eval_noiseless(self, X):
- y = torch.mm(X, self.theta)
- return y
-
- def eval(self, X, sigma=None):
- if sigma is None:
- sigma = self.s
- z = self.eval_noiseless(X)
- y = z + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64)
- return y
+ def __init__(self, d, s):
+ self.d = d
+ self.s = s
+ # sample a plane
+ self.theta = torch.randn(d, 1, dtype=torch.float64)
+
+ def eval_noiseless(self, X):
+ y = torch.mm(X, self.theta)
+ return y
+
+ def eval(self, X, sigma=None):
+ if sigma is None:
+ sigma = self.s
+ z = self.eval_noiseless(X)
+ y = z + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64)
+ return y
diff --git a/stpy/test_functions/neural_net.py b/stpy/test_functions/neural_net.py
index 82807cd..82d770d 100755
--- a/stpy/test_functions/neural_net.py
+++ b/stpy/test_functions/neural_net.py
@@ -4,206 +4,232 @@
def matlab_style_gauss2D(shape=(3, 3), sigma=0.5):
- m, n = [(ss - 1.) / 2. for ss in shape]
- y, x = np.ogrid[-m:m + 1, -n:n + 1]
- h = np.exp(-(x * x + y * y) / (2. * sigma * sigma))
- h[h < np.finfo(h.dtype).eps * h.max()] = 0
- sumh = h.sum()
- if sumh != 0:
- h /= sumh
- return h
+ m, n = [(ss - 1.0) / 2.0 for ss in shape]
+ y, x = np.ogrid[-m : m + 1, -n : n + 1]
+ h = np.exp(-(x * x + y * y) / (2.0 * sigma * sigma))
+ h[h < np.finfo(h.dtype).eps * h.max()] = 0
+ sumh = h.sum()
+ if sumh != 0:
+ h /= sumh
+ return h
def gaussian_filters(shape, sigmas):
- (height, width, enter, leave) = shape
- G = np.zeros(shape=shape)
- for q in range(enter):
- for index, val in enumerate(sigmas):
- G[:, :, q, index] = matlab_style_gauss2D(shape=(height, width), sigma=val)
- return G
+ (height, width, enter, leave) = shape
+ G = np.zeros(shape=shape)
+ for q in range(enter):
+ for index, val in enumerate(sigmas):
+ G[:, :, q, index] = matlab_style_gauss2D(shape=(height, width), sigma=val)
+ return G
def gaussian_filters_tf(shape, sigmas):
- G = gaussian_filters(shape, sigmas)
- return tf.cast(tf.Variable(G), tf.float32)
+ G = gaussian_filters(shape, sigmas)
+ return tf.cast(tf.Variable(G), tf.float32)
def deepnn(x, initialization_params, no_filters_1=32, no_filters_2=64):
- """deepnn builds the graph for a deep net for classifying digits.
- Args:
- x: an input tensor with the dimensions (N_examples, 784), where 784 is the
- number of pixels in a standard MNIST image.
- Returns:
- A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
- equal to the logits of classifying the digit into one of 10 classes (the
- digits 0-9). keep_prob is a scalar placeholder for the probability of
- dropout.
- """
-
- # Reshape to use within a convolutional neural net.
- # Last dimension is for "features" - there is only one here, since images are
- # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
- with tf.name_scope('reshape'):
- x_image = tf.reshape(x, [-1, 28, 28, 1])
-
- # First convolutional layer - maps one grayscale image to 32 feature maps.
- with tf.name_scope('conv1'):
- # W_conv1 = weight_variable([5, 5, 1, no_filters_1])
- W_conv1 = gaussian_filters_tf([5, 5, 1, no_filters_1], initialization_params[0:no_filters_1])
- b_conv1 = bias_variable([no_filters_1])
- h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
-
- # Pooling layer - downsamples by 2X.
- with tf.name_scope('pool1'):
- h_pool1 = max_pool_2x2(h_conv1)
-
- # Second convolutional layer -- maps 32 feature maps to 64.
- with tf.name_scope('conv2'):
- # W_conv2 = weight_variable([5, 5, no_filters_1, no_filters_2])
- W_conv2 = gaussian_filters_tf([5, 5, no_filters_1, no_filters_2], initialization_params[no_filters_1:])
- b_conv2 = bias_variable([no_filters_2])
- h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
-
- # Second pooling layer.
- with tf.name_scope('pool2'):
- h_pool2 = max_pool_2x2(h_conv2)
-
- # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
- # is down to 7x7x64 feature maps -- maps this to 1024 features.
- with tf.name_scope('fc1'):
- W_fc1 = weight_variable([7 * 7 * no_filters_2, 1024])
- b_fc1 = bias_variable([1024])
-
- h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * no_filters_2])
- h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
-
- # Dropout - controls the complexity of the model, prevents co-adaptation of
- # features.
- with tf.name_scope('dropout'):
- keep_prob = tf.placeholder(tf.float32)
- h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
- # Map the 1024 features to 10 classes, one for each digit
- with tf.name_scope('fc2'):
- W_fc2 = weight_variable([1024, 10])
- b_fc2 = bias_variable([10])
-
- y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
- return y_conv, keep_prob
+ """deepnn builds the graph for a deep net for classifying digits.
+ Args:
+ x: an input tensor with the dimensions (N_examples, 784), where 784 is the
+ number of pixels in a standard MNIST image.
+ Returns:
+ A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
+ equal to the logits of classifying the digit into one of 10 classes (the
+ digits 0-9). keep_prob is a scalar placeholder for the probability of
+ dropout.
+ """
+
+ # Reshape to use within a convolutional neural net.
+ # Last dimension is for "features" - there is only one here, since images are
+ # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
+ with tf.name_scope("reshape"):
+ x_image = tf.reshape(x, [-1, 28, 28, 1])
+
+ # First convolutional layer - maps one grayscale image to 32 feature maps.
+ with tf.name_scope("conv1"):
+ # W_conv1 = weight_variable([5, 5, 1, no_filters_1])
+ W_conv1 = gaussian_filters_tf(
+ [5, 5, 1, no_filters_1], initialization_params[0:no_filters_1]
+ )
+ b_conv1 = bias_variable([no_filters_1])
+ h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
+
+ # Pooling layer - downsamples by 2X.
+ with tf.name_scope("pool1"):
+ h_pool1 = max_pool_2x2(h_conv1)
+
+ # Second convolutional layer -- maps 32 feature maps to 64.
+ with tf.name_scope("conv2"):
+ # W_conv2 = weight_variable([5, 5, no_filters_1, no_filters_2])
+ W_conv2 = gaussian_filters_tf(
+ [5, 5, no_filters_1, no_filters_2], initialization_params[no_filters_1:]
+ )
+ b_conv2 = bias_variable([no_filters_2])
+ h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
+
+ # Second pooling layer.
+ with tf.name_scope("pool2"):
+ h_pool2 = max_pool_2x2(h_conv2)
+
+ # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
+ # is down to 7x7x64 feature maps -- maps this to 1024 features.
+ with tf.name_scope("fc1"):
+ W_fc1 = weight_variable([7 * 7 * no_filters_2, 1024])
+ b_fc1 = bias_variable([1024])
+
+ h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * no_filters_2])
+ h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
+
+ # Dropout - controls the complexity of the model, prevents co-adaptation of
+ # features.
+ with tf.name_scope("dropout"):
+ keep_prob = tf.placeholder(tf.float32)
+ h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
+
+ # Map the 1024 features to 10 classes, one for each digit
+ with tf.name_scope("fc2"):
+ W_fc2 = weight_variable([1024, 10])
+ b_fc2 = bias_variable([10])
+
+ y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
+ return y_conv, keep_prob
def conv2d(x, W):
- """conv2d returns a 2d convolution layer with full stride."""
- return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
+ """conv2d returns a 2d convolution layer with full stride."""
+ return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2x2(x):
- """max_pool_2x2 downsamples a feature map by 2X."""
- return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1], padding='SAME')
+ """max_pool_2x2 downsamples a feature map by 2X."""
+ return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def weight_variable(shape):
- """weight_variable generates a weight variable of a given shape."""
- initial = tf.truncated_normal(shape, stddev=0.1)
- return tf.Variable(initial)
+ """weight_variable generates a weight variable of a given shape."""
+ initial = tf.truncated_normal(shape, stddev=0.1)
+ return tf.Variable(initial)
def bias_variable(shape):
- """bias_variable generates a bias variable of a given shape."""
- initial = tf.constant(0.1, shape=shape)
- return tf.Variable(initial)
-
-
-def train_network(mnist, verbose=True, initialization_params=None, min_steps_val=10,
- val_size=3000, dropout=0.5, learning_rate=10e-4, maxiter=500, val_count=1, batch_size=80, **kwargs):
- # Import data
- # Create the model
- x = tf.placeholder(tf.float32, [None, 784])
-
- # Define loss and optimizer
- y_ = tf.placeholder(tf.float32, [None, 10])
-
- # Build the graph for the deep net
- y_conv, keep_prob = deepnn(x, initialization_params)
-
- with tf.name_scope('loss'):
- cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)
-
- cross_entropy = tf.reduce_mean(cross_entropy)
-
- with tf.name_scope('adam_optimizer'):
- train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
-
- with tf.name_scope('accuracy'):
- correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
- correct_prediction = tf.cast(correct_prediction, tf.float32)
- accuracy = tf.reduce_mean(correct_prediction)
-
- # graph_location = tempfile.mkdtemp()
- # print('Saving graph to: %s' % graph_location)
- # train_writer = tf.summary.FileWriter(graph_location)
- # train_writer.add_graph(tf.get_default_graph())
-
- init = tf.initialize_all_variables()
-
- with tf.Session() as sess:
- sess.run(init)
- # sess.run(tf.global_variables_initializer())
-
- oldval_scores = np.zeros((min_steps_val))
- j = 0
- for i in range(maxiter):
- batch = mnist.train.next_batch(batch_size)
- train_accuracy = accuracy.eval(feed_dict={
- x: batch[0], y_: batch[1], keep_prob: 1.0})
- if i % val_count == 0:
-
- val_accuracy = accuracy.eval(feed_dict={
- x: mnist.validation.images[0:val_size],
- y_: mnist.validation.labels[0:val_size], keep_prob: 1.0})
- oldval_scores[j % min_steps_val] = val_accuracy
- j = j + 1
- if verbose == True:
- print('step %d, training accuracy: %f, validation accuracy: %f' % (i, train_accuracy, val_accuracy))
- ## validation stopping
- if i > min_steps_val:
- if np.mean(oldval_scores) > val_accuracy:
- if verbose == True:
- print("Validation stopping")
- break
- train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: dropout})
-
- test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
- if verbose == True:
- print('test accuracy %g' % test_accuracy)
-
- return (i, test_accuracy)
+ """bias_variable generates a bias variable of a given shape."""
+ initial = tf.constant(0.1, shape=shape)
+ return tf.Variable(initial)
+
+
+def train_network(
+ mnist,
+ verbose=True,
+ initialization_params=None,
+ min_steps_val=10,
+ val_size=3000,
+ dropout=0.5,
+ learning_rate=10e-4,
+ maxiter=500,
+ val_count=1,
+ batch_size=80,
+ **kwargs
+):
+ # Import data
+ # Create the model
+ x = tf.placeholder(tf.float32, [None, 784])
+
+ # Define loss and optimizer
+ y_ = tf.placeholder(tf.float32, [None, 10])
+
+ # Build the graph for the deep net
+ y_conv, keep_prob = deepnn(x, initialization_params)
+
+ with tf.name_scope("loss"):
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ labels=y_, logits=y_conv
+ )
+
+ cross_entropy = tf.reduce_mean(cross_entropy)
+
+ with tf.name_scope("adam_optimizer"):
+ train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
+
+ with tf.name_scope("accuracy"):
+ correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
+ correct_prediction = tf.cast(correct_prediction, tf.float32)
+ accuracy = tf.reduce_mean(correct_prediction)
+
+ # graph_location = tempfile.mkdtemp()
+ # print('Saving graph to: %s' % graph_location)
+ # train_writer = tf.summary.FileWriter(graph_location)
+ # train_writer.add_graph(tf.get_default_graph())
+
+ init = tf.initialize_all_variables()
+
+ with tf.Session() as sess:
+ sess.run(init)
+ # sess.run(tf.global_variables_initializer())
+
+ oldval_scores = np.zeros((min_steps_val))
+ j = 0
+ for i in range(maxiter):
+ batch = mnist.train.next_batch(batch_size)
+ train_accuracy = accuracy.eval(
+ feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}
+ )
+ if i % val_count == 0:
+
+ val_accuracy = accuracy.eval(
+ feed_dict={
+ x: mnist.validation.images[0:val_size],
+ y_: mnist.validation.labels[0:val_size],
+ keep_prob: 1.0,
+ }
+ )
+ oldval_scores[j % min_steps_val] = val_accuracy
+ j = j + 1
+ if verbose == True:
+ print(
+ "step %d, training accuracy: %f, validation accuracy: %f"
+ % (i, train_accuracy, val_accuracy)
+ )
+ ## validation stopping
+ if i > min_steps_val:
+ if np.mean(oldval_scores) > val_accuracy:
+ if verbose == True:
+ print("Validation stopping")
+ break
+ train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: dropout})
+
+ test_accuracy = accuracy.eval(
+ feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}
+ )
+ if verbose == True:
+ print("test accuracy %g" % test_accuracy)
+
+ return (i, test_accuracy)
if __name__ == "__main__":
- N = 16
- N2 = 32
- sigmas = np.random.randn(N + N2)
- sigmas = sigmas ** 2
- # print (train_network("/tmp/tensorflow", dropout = 0.7, verbose = False, val_size = 1, initialization_params = sigmas, no_filters_1=N, no_filters_2=N2))
-
- ##############################################
- ######## Visualization of Filters ###########
- ##############################################
- import matplotlib as mpl
-
- V = gaussian_filters((5, 5, 1, N), sigmas[0:N])
- fig, axes = plt.subplots(nrows=4, ncols=int(N / 4))
- for index, ax in enumerate(axes.flat):
- im = ax.imshow(V[:, :, 0, index], interpolation='nearest', vmin=0, vmax=1)
- cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat])
- plt.colorbar(im, cax=cax, **kw)
-
- V2 = gaussian_filters((5, 5, N, N2), sigmas[N:])
- fig, axes = plt.subplots(nrows=8, ncols=int(N2 / 8))
- for index, ax in enumerate(axes.flat):
- im = ax.imshow(V2[:, :, 0, index], interpolation='nearest', vmin=0, vmax=1)
- cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat])
- plt.colorbar(im, cax=cax, **kw)
- plt.show()
+ N = 16
+ N2 = 32
+ sigmas = np.random.randn(N + N2)
+ sigmas = sigmas**2
+ # print (train_network("/tmp/tensorflow", dropout = 0.7, verbose = False, val_size = 1, initialization_params = sigmas, no_filters_1=N, no_filters_2=N2))
+
+ ##############################################
+ ######## Visualization of Filters ###########
+ ##############################################
+ import matplotlib as mpl
+
+ V = gaussian_filters((5, 5, 1, N), sigmas[0:N])
+ fig, axes = plt.subplots(nrows=4, ncols=int(N / 4))
+ for index, ax in enumerate(axes.flat):
+ im = ax.imshow(V[:, :, 0, index], interpolation="nearest", vmin=0, vmax=1)
+ cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat])
+ plt.colorbar(im, cax=cax, **kw)
+
+ V2 = gaussian_filters((5, 5, N, N2), sigmas[N:])
+ fig, axes = plt.subplots(nrows=8, ncols=int(N2 / 8))
+ for index, ax in enumerate(axes.flat):
+ im = ax.imshow(V2[:, :, 0, index], interpolation="nearest", vmin=0, vmax=1)
+ cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat])
+ plt.colorbar(im, cax=cax, **kw)
+ plt.show()
diff --git a/stpy/test_functions/parallel_coordinates_plot.py b/stpy/test_functions/parallel_coordinates_plot.py
index e9d9a15..836ed92 100755
--- a/stpy/test_functions/parallel_coordinates_plot.py
+++ b/stpy/test_functions/parallel_coordinates_plot.py
@@ -8,63 +8,65 @@
def parallel_coordinates_bo(X, Y, names=None, scaling=None, fig_size=(20, 10)):
- """
- Parallel plot graph
+ """
+ Parallel plot graph
- X : 2D numpy array of parameters [points,parameters]
- Y : 1D numpy array of values
- names: list of names size of (parameters)
- scaling:
- "stat": statistical scaling
- None : no scaling
- (low,hig): tuple, scales to [-1,1]
- fig_size: fig size in inches
- """
+ X : 2D numpy array of parameters [points,parameters]
+ Y : 1D numpy array of values
+ names: list of names size of (parameters)
+ scaling:
+ "stat": statistical scaling
+ None : no scaling
+ (low,hig): tuple, scales to [-1,1]
+ fig_size: fig size in inches
+ """
- if scaling == "stat":
- scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
- scaler.fit(X)
- Z = scaler.transform(X)
- elif scaling is None:
- Z = X
- else:
- try:
- Z = X
- up, low = scaling
- d = X.shape[1]
- for i in range(d):
- Z[:, i] = (2 * X[:, i]) / (up[i] - low[i]) + (1.0 - 2 * up[i] / (up[i] - low[i]))
- except:
- pass
+ if scaling == "stat":
+ scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
+ scaler.fit(X)
+ Z = scaler.transform(X)
+ elif scaling is None:
+ Z = X
+ else:
+ try:
+ Z = X
+ up, low = scaling
+ d = X.shape[1]
+ for i in range(d):
+ Z[:, i] = (2 * X[:, i]) / (up[i] - low[i]) + (
+ 1.0 - 2 * up[i] / (up[i] - low[i])
+ )
+ except:
+ pass
- D = np.append(Z, Y, axis=1)
- data = pd.DataFrame(D)
- data = data.sort_values(by=Z.shape[1])
- names = copy.copy(names)
- names.append(Z.shape[1])
- if names is not None:
- data.columns = names
- plt.figure(figsize=(fig_size))
- plt.xticks(rotation=45)
- ax = parallel_coordinates(data, Z.shape[1], colormap="summer")
- ax.get_legend().remove()
- plt.show()
+ D = np.append(Z, Y, axis=1)
+ data = pd.DataFrame(D)
+ data = data.sort_values(by=Z.shape[1])
+ names = copy.copy(names)
+ names.append(Z.shape[1])
+ if names is not None:
+ data.columns = names
+ plt.figure(figsize=(fig_size))
+ plt.xticks(rotation=45)
+ ax = parallel_coordinates(data, Z.shape[1], colormap="summer")
+ ax.get_legend().remove()
+ plt.show()
if __name__ == "__main__":
- from stpy.test_functions.protein_benchmark import ProteinBenchmark
+ from stpy.test_functions.protein_benchmark import ProteinBenchmark
- Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=3, ref=['A', 'B', 'C', 'D'])
- names = Benchmark.data['P1'].values
- Benchmark.self_translate()
- vals = Benchmark.data['P1'].values
+ Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=3, ref=["A", "B", "C", "D"])
+ names = Benchmark.data["P1"].values
+ Benchmark.self_translate()
+ vals = Benchmark.data["P1"].values
- print(Benchmark.data)
- X = Benchmark.data.values[0:8000, 0:3]
- Y = Benchmark.data.values[0:8000, 5].reshape(-1, 1)
- print(X.shape, Y.shape)
- names = ["P1", "P2", "P3"]
- # plt.yticks(vals, names)
- parallel_coordinates_bo(X, Y, names=names)
+ print(Benchmark.data)
+ X = Benchmark.data.values[0:8000, 0:3]
+ Y = Benchmark.data.values[0:8000, 5].reshape(-1, 1)
+ print(X.shape, Y.shape)
+ names = ["P1", "P2", "P3"]
+ # plt.yticks(vals, names)
+ parallel_coordinates_bo(X, Y, names=names)
- plt.show()
+ plt.show()
diff --git a/stpy/test_functions/protein_benchmark.py b/stpy/test_functions/protein_benchmark.py
index 1dc4189..008bfe4 100755
--- a/stpy/test_functions/protein_benchmark.py
+++ b/stpy/test_functions/protein_benchmark.py
@@ -3,407 +3,489 @@
import pandas as pd
import torch
-#import stpy.helpers.helper as helper
-#from stpy.test_functions.benchmarks import BenchmarkFunction
-
-
-class ProteinOperator():
-
- def __init__(self):
-
- self.real_names = {'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', 'Q': 'Gln', 'E': 'Glu',
- 'G': 'Gly',
- 'H': 'His', 'I': 'Iso', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe',
- 'P': 'Pro', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val', 'B': 'Asx'}
-
- self.dictionary = {'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7,
- 'H': 8, 'I': 9, 'L': 10, 'K': 11, 'M': 12, 'F': 13,
- 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19, 'B': 3}
-
- self.inv_dictionary = {v: k for k, v in self.dictionary.items()}
-
- self.inv_real_names = {v: k for k, v in self.real_names.items()}
-
- self.Negative = ['D', 'E']
- self.Positive = ['R', 'K', 'H']
- self.Aromatic = ['F', 'W', 'Y', 'H']
- self.Polar = ['N', 'Q', 'S', 'T', 'Y']
- self.Aliphatic = ['A', 'G', 'I', 'L', 'V']
- self.Amide = ['N', 'Q']
- self.Sulfur = ['C', 'M']
- self.Hydroxil = ['S', 'T']
- self.Small = ['A', 'S', 'T', 'P', 'G', 'V']
- self.Medium = ['M', 'L', 'I', 'C', 'N', 'Q', 'K', 'D', 'E']
- self.Large = ['R', 'H', 'W', 'F', 'Y']
- self.Hydro = ['M', 'L', 'I', 'V', 'A']
- self.Cyclic = ['P']
- self.Random = ['F', 'W', 'L', 'S', 'D']
-
- def translate(self, X):
- f = lambda x: self.dictionary[x]
- Y = np.zeros(shape=X.shape).astype(int)
- for i in range(X.shape[0]):
- for j in range(X.shape[1]):
- Y[i, j] = f(X[i, j])
- return Y
-
- def remove_wild_type_mutations(self, mutation):
- mutation_split = mutation.split("+")
- output = []
- for mut in mutation_split:
- if mut[0] != mut[-1]:
- output.append(mut)
- return "+".join(output)
-
- def get_variant_code(self, mutation):
- mutation_split = mutation.split("+")
- return "".join([mut[-1] for mut in mutation_split])
-
- def get_substitutes_from_mutation(self, mutation):
- mutation_split = mutation.split("+")
- original = []
- new = []
- positions = []
-
- for mut in mutation_split:
- original.append(mut[0])
- new.append(mut[-1])
- positions.append(int(mut[1:-1]))
-
- return (original, new, positions)
-
- def mutation(self, original_seq, positions, new_seq):
- old_seq = list(original_seq)
- new_seq = list(new_seq)
- identifier = []
- for old, new, position in zip(old_seq, new_seq, positions):
- if old != new:
- identifier.append(old + str(position) + new)
- return '+'.join(identifier)
-
- def interval_number(self, dim=None):
- if dim is None:
- dim = self.dim
- arr = self.interval_letters(dim=dim)
- out = self.translate(arr)
- return out
-
- def interval_onehot(self, dim=None):
- if dim is None:
- dim = self.dim
- arr = self.interval_letters(dim=dim)
- out = self.translate_one_hot(arr)
- return out
-
- def interval_letters(self, dim=None):
- if dim is None:
- dim = self.dim
-
- names = list(self.dictionary.keys())
- names.remove('B')
- arr = []
- for i in range(dim):
- arr.append(names)
- out = helper.cartesian(arr)
- return out
-
- def translate_amino_acid(self, letter):
- return self.dictionary[letter]
-
- def translate_mutation_series(self, series):
- f = lambda x: np.array(list(map(int, [self.dictionary[a] for a in list(str(x))]))).reshape(-1, 1)
- xtest = np.concatenate(series.apply(f).values, axis=1).T
- return xtest
-
- def translate_one_hot(self, X):
- try:
- Y = self.translate(X)
- except:
- Y = X
- n, d = list(X.shape)
- Z = np.zeros(shape=(n, d * self.total))
- for i in range(n):
- for j in range(d):
- Z[i, Y[i, j] + j * self.total] = 1.0
-
- return Z
-
- def get_real_name(self, name):
- out = []
- for i in name:
- out.append(self.real_names[i])
- return out
-
-
-class ProteinBenchmark():
-
- def __init__(self, fname, dim=1, ref=['D', 'D', 'D', 'D'], avg=False, scale=True):
- """
- initialize the protein benchmark
-
- fname : dataset name
- dim : dimension of the dataset
- ref : for smaller dimensions what is the reference in the 4 dim space?
- avg : average the effect over other combinations in lower dimensions
- """
-
- """
+# import stpy.helpers.helper as helper
+# from stpy.test_functions.benchmarks import BenchmarkFunction
+
+
+class ProteinOperator:
+
+ def __init__(self):
+
+ self.real_names = {
+ "A": "Ala",
+ "R": "Arg",
+ "N": "Asn",
+ "D": "Asp",
+ "C": "Cys",
+ "Q": "Gln",
+ "E": "Glu",
+ "G": "Gly",
+ "H": "His",
+ "I": "Iso",
+ "L": "Leu",
+ "K": "Lys",
+ "M": "Met",
+ "F": "Phe",
+ "P": "Pro",
+ "S": "Ser",
+ "T": "Thr",
+ "W": "Trp",
+ "Y": "Tyr",
+ "V": "Val",
+ "B": "Asx",
+ }
+
+ self.dictionary = {
+ "A": 0,
+ "R": 1,
+ "N": 2,
+ "D": 3,
+ "C": 4,
+ "Q": 5,
+ "E": 6,
+ "G": 7,
+ "H": 8,
+ "I": 9,
+ "L": 10,
+ "K": 11,
+ "M": 12,
+ "F": 13,
+ "P": 14,
+ "S": 15,
+ "T": 16,
+ "W": 17,
+ "Y": 18,
+ "V": 19,
+ "B": 3,
+ }
+
+ self.inv_dictionary = {v: k for k, v in self.dictionary.items()}
+
+ self.inv_real_names = {v: k for k, v in self.real_names.items()}
+
+ self.Negative = ["D", "E"]
+ self.Positive = ["R", "K", "H"]
+ self.Aromatic = ["F", "W", "Y", "H"]
+ self.Polar = ["N", "Q", "S", "T", "Y"]
+ self.Aliphatic = ["A", "G", "I", "L", "V"]
+ self.Amide = ["N", "Q"]
+ self.Sulfur = ["C", "M"]
+ self.Hydroxil = ["S", "T"]
+ self.Small = ["A", "S", "T", "P", "G", "V"]
+ self.Medium = ["M", "L", "I", "C", "N", "Q", "K", "D", "E"]
+ self.Large = ["R", "H", "W", "F", "Y"]
+ self.Hydro = ["M", "L", "I", "V", "A"]
+ self.Cyclic = ["P"]
+ self.Random = ["F", "W", "L", "S", "D"]
+
+ def translate(self, X):
+ f = lambda x: self.dictionary[x]
+ Y = np.zeros(shape=X.shape).astype(int)
+ for i in range(X.shape[0]):
+ for j in range(X.shape[1]):
+ Y[i, j] = f(X[i, j])
+ return Y
+
+ def remove_wild_type_mutations(self, mutation):
+ mutation_split = mutation.split("+")
+ output = []
+ for mut in mutation_split:
+ if mut[0] != mut[-1]:
+ output.append(mut)
+ return "+".join(output)
+
+ def get_variant_code(self, mutation):
+ mutation_split = mutation.split("+")
+ return "".join([mut[-1] for mut in mutation_split])
+
+ def get_substitutes_from_mutation(self, mutation):
+ mutation_split = mutation.split("+")
+ original = []
+ new = []
+ positions = []
+
+ for mut in mutation_split:
+ original.append(mut[0])
+ new.append(mut[-1])
+ positions.append(int(mut[1:-1]))
+
+ return (original, new, positions)
+
+ def mutation(self, original_seq, positions, new_seq):
+ old_seq = list(original_seq)
+ new_seq = list(new_seq)
+ identifier = []
+ for old, new, position in zip(old_seq, new_seq, positions):
+ if old != new:
+ identifier.append(old + str(position) + new)
+ return "+".join(identifier)
+
+ def interval_number(self, dim=None):
+ if dim is None:
+ dim = self.dim
+ arr = self.interval_letters(dim=dim)
+ out = self.translate(arr)
+ return out
+
+ def interval_onehot(self, dim=None):
+ if dim is None:
+ dim = self.dim
+ arr = self.interval_letters(dim=dim)
+ out = self.translate_one_hot(arr)
+ return out
+
+ def interval_letters(self, dim=None):
+ if dim is None:
+ dim = self.dim
+
+ names = list(self.dictionary.keys())
+ names.remove("B")
+ arr = []
+ for i in range(dim):
+ arr.append(names)
+ out = helper.cartesian(arr)
+ return out
+
+ def translate_amino_acid(self, letter):
+ return self.dictionary[letter]
+
+ def translate_mutation_series(self, series):
+ f = lambda x: np.array(
+ list(map(int, [self.dictionary[a] for a in list(str(x))]))
+ ).reshape(-1, 1)
+ xtest = np.concatenate(series.apply(f).values, axis=1).T
+ return xtest
+
+ def translate_one_hot(self, X):
+ try:
+ Y = self.translate(X)
+ except:
+ Y = X
+ n, d = list(X.shape)
+ Z = np.zeros(shape=(n, d * self.total))
+ for i in range(n):
+ for j in range(d):
+ Z[i, Y[i, j] + j * self.total] = 1.0
+
+ return Z
+
+ def get_real_name(self, name):
+ out = []
+ for i in name:
+ out.append(self.real_names[i])
+ return out
+
+
+class ProteinBenchmark:
+
+ def __init__(self, fname, dim=1, ref=["D", "D", "D", "D"], avg=False, scale=True):
+ """
+ initialize the protein benchmark
+
+ fname : dataset name
+ dim : dimension of the dataset
+ ref : for smaller dimensions what is the reference in the 4 dim space?
+ avg : average the effect over other combinations in lower dimensions
+ """
+
+ """
Convention of the following dictionary is to map B->D as B can stand for N and D.
"""
- self.dictionary = {'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7,
- 'H': 8, 'I': 9, 'L': 10, 'K': 11, 'M': 12, 'F': 13,
- 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19, 'B': 3}
-
- f = lambda x: self.dictionary[x]
-
- self.total = 20
- self.dim = dim
- self.ref = ref
- self.ref_translated = [f(x) for x in self.ref]
-
- dset = pd.read_hdf(fname)
-
- # average the effect over others
- if avg == False:
- mask = np.full(dset.shape[0], True, dtype=bool)
- for j in range(4 - dim):
- mask = np.logical_and(mask, dset["P" + str(4 - j)] == ref[3 - j])
- self.data = dset[mask]
- else:
- # avg. not implemented
- pass
-
- if scale == True:
- maximum = np.max(self.data[:]['Fitness'])
- self.data[:]['Fitness'] = self.data[:]['Fitness'] / maximum
- else:
- pass
-
- self.real_names = {'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', 'Q': 'Gln', 'E': 'Glu',
- 'G': 'Gly',
- 'H': 'His', 'I': 'Iso', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe',
- 'P': 'Pro', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val', 'B': 'Asx'}
-
- self.inv_real_names = {v: k for k, v in self.real_names.items()}
-
- self.Negative = ['D', 'E']
- self.Positive = ['R', 'K', 'H']
- self.Aromatic = ['F', 'W', 'Y', 'H']
- self.Polar = ['N', 'Q', 'S', 'T', 'Y']
- self.Aliphatic = ['A', 'G', 'I', 'L', 'V']
- self.Amide = ['N', 'Q']
- self.Sulfur = ['C', 'M']
- self.Hydroxil = ['S', 'T']
- self.Small = ['A', 'S', 'T', 'P', 'G', 'V']
- self.Medium = ['M', 'L', 'I', 'C', 'N', 'Q', 'K', 'D', 'E']
- self.Large = ['R', 'H', 'W', 'F', 'Y']
- self.Hydro = ['M', 'L', 'I', 'V', 'A']
- self.Cyclic = ['P']
- self.Random = ['F', 'W', 'L', 'S', 'D']
-
- def get_real_name(self, name):
- out = []
- for i in name:
- out.append(self.real_names[i])
- return out
-
- def data_summary(self):
- y = self.data['Fitness'].values
- maximum = np.max(y)
- minimum = np.min(y)
- return (maximum, minimum)
-
- def translate(self, X):
- f = lambda x: self.dictionary[x]
- Y = np.zeros(shape=X.shape).astype(int)
- for i in range(X.shape[0]):
- for j in range(X.shape[1]):
- Y[i, j] = f(X[i, j])
- return Y
-
- def translate_one_hot(self, X):
- try:
- Y = self.translate(X)
- except:
- Y = X
- n, d = list(X.shape)
- Z = np.zeros(shape=(n, d * self.total))
- for i in range(n):
- for j in range(d):
- Z[i, Y[i, j] + j * self.total] = 1.0
-
- return Z
-
- def self_translate(self):
- """
- self translate from
- :return:
- """
- f = lambda x: self.dictionary[x]
- for j in range(4):
- self.data['P' + str(j + 1)] = self.data['P' + str(j + 1)].apply(f)
-
- def set_fidelity(self, F):
- self.Fidelity = F
-
- def scale(self):
- self.scale = 1
-
- def eval_noiseless(self, X):
- """
- evaluate depends on the dimension
- """
- res = []
-
- # append
- n = X.shape[0]
- C = np.tile(self.ref_translated[self.dim:4], (n, 1))
- X_ = np.concatenate((X, C), axis=1)
- for i in range(n):
- x = X_[i, :]
- mask = np.full(self.data.shape[0], True, dtype=bool)
- for j in range(4):
- # print (x[j],self.data["P" + str(j + 1)])
- mask = np.logical_and(mask, self.data["P" + str(j + 1)] == x[j])
- res.append(self.data[mask]['Fitness'].values)
- return np.array(res).reshape(-1, 1)
-
- # def actions(self):
- # number_of_actions = self.dim*(20**(self.dim-1))
- #
- # actions = []
- #
- # ## this includes (20,d) actions
- # one_dim = self.interval_onehot(dim = 1)
- # #print (one_dim)
- # #print ("one dim",one_dim.shape)
- # if self.dim - 1>0:
- # # this includes (20**(d-1), d) actions
- # others = self.interval_onehot(dim = self.dim - 1)
- # #print ("others:", others.shape)
- # for fix_dim in range(self.dim):
- # #print (fix_dim)
- # action = np.zeros(shape=(20 ** (self.dim - 1), 20 * self.dim))
- # for elem in one_dim:
- # #print (fix_dim*20+(fix_dim+1)*20)
- # action[:,fix_dim*20:(fix_dim+1)*20]=elem
- # action[:,0:fix_dim*20] = others[:,0:fix_dim*20]
- # action[:,(fix_dim+1) * 20:] = others[:,fix_dim*20:]
- # actions.append(action)
- # return actions
- # else:
- # return one_dim
-
- def actions(self):
- number_of_actions = self.dim * (20 ** (self.dim - 1))
-
- actions = []
-
- ## this includes (20,d) actions
- one_dim = self.interval_onehot(dim=1)
- # print (one_dim)
- # print ("one dim",one_dim.shape)
- if self.dim - 1 > 0:
- # this includes (20**(d-1), d) actions
- others = self.interval_onehot(dim=self.dim - 1)
- # print ("others:", others.shape)
- for elem in others:
- for fix_dim in range(self.dim):
- action = np.zeros(shape=(20, 20 * self.dim))
- action[:, fix_dim * 20:(fix_dim + 1) * 20] = one_dim
- j = 0
- for i in range(self.dim):
- if i != fix_dim:
- action[:, i * 20:(i + 1) * 20] = elem[j * 20:(j + 1) * 20]
- j = j + 1
-
- actions.append(action)
- return actions
- else:
- return one_dim
-
- def subsample_dts_indice_only(self, N, split=0.9):
- self.self_translate()
- xtest = self.interval_onehot()
-
- indices = np.arange(0, N, 1)
- sample = indices
- np.random.shuffle(indices)
-
- train = sample[0:int(np.round(split * N))]
- test = sample[int(np.round(split * N)):N]
-
- return (train, test)
-
- def subsample_dts(self, N, split=0.90):
- self.self_translate()
- xtest = self.interval_onehot()
- indices = np.arange(0, N, 1)
-
- indices = np.random.shuffle(indices)
- sample = xtest[indices, :]
-
- y_sample = self.eval_one_hot(sample)
-
- x_train = sample[0:int(np.round(split * N)), :]
- y_train = y_sample[0:int(np.round(split * N)), :]
- x_test = sample[int(np.round(split * N)):N, :]
- y_test = y_sample[int(np.round(split * N)):N, :]
-
- return (x_train, y_train, x_test, y_test)
-
- def eval_fidelity(self, X):
- return self.Fidelity(X)
-
- def eval(self, X):
- z = self.eval_noiseless(X)
- return z
-
- def eval_one_hot(self, X):
- n, d = list(X.shape)
- Z = np.zeros(shape=(n, self.dim))
- for i in range(n):
- for j in range(d):
- if X[i, j] > 0:
- Z[i, j // self.total] = j % self.total
- Z = Z.astype(int)
- Y = self.eval(Z)
- return Y
-
- def plot_one_site_map(self, kernel, save=None, dim=1):
- plt.figure()
- names = list(self.dictionary.keys())
- names.remove('B')
- real_names = self.get_real_name(names)
- real_names = helper.cartesian([real_names for i in range(dim)])
-
- xtest = torch.from_numpy(self.interval_onehot(dim=dim))
- real_names = [','.join(list(i)) for i in real_names]
- ax = plt.imshow(kernel(xtest, xtest).detach().numpy())
- plt.colorbar()
- plt.xticks(range(xtest.shape[0]), real_names, fontsize=10, rotation=60)
- plt.yticks(range(xtest.shape[0]), real_names, fontsize=10)
- plt.margins(0.2)
- if save is not None:
- plt.savefig(save)
- else:
- plt.show()
+ self.dictionary = {
+ "A": 0,
+ "R": 1,
+ "N": 2,
+ "D": 3,
+ "C": 4,
+ "Q": 5,
+ "E": 6,
+ "G": 7,
+ "H": 8,
+ "I": 9,
+ "L": 10,
+ "K": 11,
+ "M": 12,
+ "F": 13,
+ "P": 14,
+ "S": 15,
+ "T": 16,
+ "W": 17,
+ "Y": 18,
+ "V": 19,
+ "B": 3,
+ }
+
+ f = lambda x: self.dictionary[x]
+
+ self.total = 20
+ self.dim = dim
+ self.ref = ref
+ self.ref_translated = [f(x) for x in self.ref]
+
+ dset = pd.read_hdf(fname)
+
+ # average the effect over others
+ if avg == False:
+ mask = np.full(dset.shape[0], True, dtype=bool)
+ for j in range(4 - dim):
+ mask = np.logical_and(mask, dset["P" + str(4 - j)] == ref[3 - j])
+ self.data = dset[mask]
+ else:
+ # avg. not implemented
+ pass
+
+ if scale == True:
+ maximum = np.max(self.data[:]["Fitness"])
+ self.data[:]["Fitness"] = self.data[:]["Fitness"] / maximum
+ else:
+ pass
+
+ self.real_names = {
+ "A": "Ala",
+ "R": "Arg",
+ "N": "Asn",
+ "D": "Asp",
+ "C": "Cys",
+ "Q": "Gln",
+ "E": "Glu",
+ "G": "Gly",
+ "H": "His",
+ "I": "Iso",
+ "L": "Leu",
+ "K": "Lys",
+ "M": "Met",
+ "F": "Phe",
+ "P": "Pro",
+ "S": "Ser",
+ "T": "Thr",
+ "W": "Trp",
+ "Y": "Tyr",
+ "V": "Val",
+ "B": "Asx",
+ }
+
+ self.inv_real_names = {v: k for k, v in self.real_names.items()}
+
+ self.Negative = ["D", "E"]
+ self.Positive = ["R", "K", "H"]
+ self.Aromatic = ["F", "W", "Y", "H"]
+ self.Polar = ["N", "Q", "S", "T", "Y"]
+ self.Aliphatic = ["A", "G", "I", "L", "V"]
+ self.Amide = ["N", "Q"]
+ self.Sulfur = ["C", "M"]
+ self.Hydroxil = ["S", "T"]
+ self.Small = ["A", "S", "T", "P", "G", "V"]
+ self.Medium = ["M", "L", "I", "C", "N", "Q", "K", "D", "E"]
+ self.Large = ["R", "H", "W", "F", "Y"]
+ self.Hydro = ["M", "L", "I", "V", "A"]
+ self.Cyclic = ["P"]
+ self.Random = ["F", "W", "L", "S", "D"]
+
+ def get_real_name(self, name):
+ out = []
+ for i in name:
+ out.append(self.real_names[i])
+ return out
+
+ def data_summary(self):
+ y = self.data["Fitness"].values
+ maximum = np.max(y)
+ minimum = np.min(y)
+ return (maximum, minimum)
+
+ def translate(self, X):
+ f = lambda x: self.dictionary[x]
+ Y = np.zeros(shape=X.shape).astype(int)
+ for i in range(X.shape[0]):
+ for j in range(X.shape[1]):
+ Y[i, j] = f(X[i, j])
+ return Y
+
+ def translate_one_hot(self, X):
+ try:
+ Y = self.translate(X)
+ except:
+ Y = X
+ n, d = list(X.shape)
+ Z = np.zeros(shape=(n, d * self.total))
+ for i in range(n):
+ for j in range(d):
+ Z[i, Y[i, j] + j * self.total] = 1.0
+
+ return Z
+
+ def self_translate(self):
+ """
+ self translate from
+ :return:
+ """
+ f = lambda x: self.dictionary[x]
+ for j in range(4):
+ self.data["P" + str(j + 1)] = self.data["P" + str(j + 1)].apply(f)
+
+ def set_fidelity(self, F):
+ self.Fidelity = F
+
+ def scale(self):
+ self.scale = 1
+
+ def eval_noiseless(self, X):
+ """
+ evaluate depends on the dimension
+ """
+ res = []
+
+ # append
+ n = X.shape[0]
+ C = np.tile(self.ref_translated[self.dim : 4], (n, 1))
+ X_ = np.concatenate((X, C), axis=1)
+ for i in range(n):
+ x = X_[i, :]
+ mask = np.full(self.data.shape[0], True, dtype=bool)
+ for j in range(4):
+ # print (x[j],self.data["P" + str(j + 1)])
+ mask = np.logical_and(mask, self.data["P" + str(j + 1)] == x[j])
+ res.append(self.data[mask]["Fitness"].values)
+ return np.array(res).reshape(-1, 1)
+
+ # def actions(self):
+ # number_of_actions = self.dim*(20**(self.dim-1))
+ #
+ # actions = []
+ #
+ # ## this includes (20,d) actions
+ # one_dim = self.interval_onehot(dim = 1)
+ # #print (one_dim)
+ # #print ("one dim",one_dim.shape)
+ # if self.dim - 1>0:
+ # # this includes (20**(d-1), d) actions
+ # others = self.interval_onehot(dim = self.dim - 1)
+ # #print ("others:", others.shape)
+ # for fix_dim in range(self.dim):
+ # #print (fix_dim)
+ # action = np.zeros(shape=(20 ** (self.dim - 1), 20 * self.dim))
+ # for elem in one_dim:
+ # #print (fix_dim*20+(fix_dim+1)*20)
+ # action[:,fix_dim*20:(fix_dim+1)*20]=elem
+ # action[:,0:fix_dim*20] = others[:,0:fix_dim*20]
+ # action[:,(fix_dim+1) * 20:] = others[:,fix_dim*20:]
+ # actions.append(action)
+ # return actions
+ # else:
+ # return one_dim
+
+ def actions(self):
+ number_of_actions = self.dim * (20 ** (self.dim - 1))
+
+ actions = []
+
+ ## this includes (20,d) actions
+ one_dim = self.interval_onehot(dim=1)
+ # print (one_dim)
+ # print ("one dim",one_dim.shape)
+ if self.dim - 1 > 0:
+ # this includes (20**(d-1), d) actions
+ others = self.interval_onehot(dim=self.dim - 1)
+ # print ("others:", others.shape)
+ for elem in others:
+ for fix_dim in range(self.dim):
+ action = np.zeros(shape=(20, 20 * self.dim))
+ action[:, fix_dim * 20 : (fix_dim + 1) * 20] = one_dim
+ j = 0
+ for i in range(self.dim):
+ if i != fix_dim:
+ action[:, i * 20 : (i + 1) * 20] = elem[
+ j * 20 : (j + 1) * 20
+ ]
+ j = j + 1
+
+ actions.append(action)
+ return actions
+ else:
+ return one_dim
+
+ def subsample_dts_indice_only(self, N, split=0.9):
+ self.self_translate()
+ xtest = self.interval_onehot()
+
+ indices = np.arange(0, N, 1)
+ sample = indices
+ np.random.shuffle(indices)
+
+ train = sample[0 : int(np.round(split * N))]
+ test = sample[int(np.round(split * N)) : N]
+
+ return (train, test)
+
+ def subsample_dts(self, N, split=0.90):
+ self.self_translate()
+ xtest = self.interval_onehot()
+ indices = np.arange(0, N, 1)
+
+ indices = np.random.shuffle(indices)
+ sample = xtest[indices, :]
+
+ y_sample = self.eval_one_hot(sample)
+
+ x_train = sample[0 : int(np.round(split * N)), :]
+ y_train = y_sample[0 : int(np.round(split * N)), :]
+ x_test = sample[int(np.round(split * N)) : N, :]
+ y_test = y_sample[int(np.round(split * N)) : N, :]
+
+ return (x_train, y_train, x_test, y_test)
+
+ def eval_fidelity(self, X):
+ return self.Fidelity(X)
+
+ def eval(self, X):
+ z = self.eval_noiseless(X)
+ return z
+
+ def eval_one_hot(self, X):
+ n, d = list(X.shape)
+ Z = np.zeros(shape=(n, self.dim))
+ for i in range(n):
+ for j in range(d):
+ if X[i, j] > 0:
+ Z[i, j // self.total] = j % self.total
+ Z = Z.astype(int)
+ Y = self.eval(Z)
+ return Y
+
+ def plot_one_site_map(self, kernel, save=None, dim=1):
+ plt.figure()
+ names = list(self.dictionary.keys())
+ names.remove("B")
+ real_names = self.get_real_name(names)
+ real_names = helper.cartesian([real_names for i in range(dim)])
+
+ xtest = torch.from_numpy(self.interval_onehot(dim=dim))
+ real_names = [",".join(list(i)) for i in real_names]
+ ax = plt.imshow(kernel(xtest, xtest).detach().numpy())
+ plt.colorbar()
+ plt.xticks(range(xtest.shape[0]), real_names, fontsize=10, rotation=60)
+ plt.yticks(range(xtest.shape[0]), real_names, fontsize=10)
+ plt.margins(0.2)
+ if save is not None:
+ plt.savefig(save)
+ else:
+ plt.show()
if __name__ == "__main__":
- Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=2, ref=['A', 'B', 'C', 'D'])
- # print (Benchmark.data)
- Benchmark.self_translate()
- Benchmark.data.plot.scatter(x='P1', y='P2', c=Benchmark.data['Fitness'], s=200)
- # print (Benchmark.data)
- X = np.array([['F', 'C'], ['D', 'C']])
- X_ = Benchmark.translate(X)
- print(X, X_)
- X__ = Benchmark.translate_one_hot(X)
+ Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=2, ref=["A", "B", "C", "D"])
+ # print (Benchmark.data)
+ Benchmark.self_translate()
+ Benchmark.data.plot.scatter(x="P1", y="P2", c=Benchmark.data["Fitness"], s=200)
+ # print (Benchmark.data)
+ X = np.array([["F", "C"], ["D", "C"]])
+ X_ = Benchmark.translate(X)
+ print(X, X_)
+ X__ = Benchmark.translate_one_hot(X)
- print(Benchmark.translate_one_hot(X))
+ print(Benchmark.translate_one_hot(X))
- print(Benchmark.eval(X_))
+ print(Benchmark.eval(X_))
- print(Benchmark.eval_one_hot(X__))
+ print(Benchmark.eval_one_hot(X__))
diff --git a/stpy/test_functions/swissfel_simulator.py b/stpy/test_functions/swissfel_simulator.py
index f7eaec1..f82c51d 100755
--- a/stpy/test_functions/swissfel_simulator.py
+++ b/stpy/test_functions/swissfel_simulator.py
@@ -5,116 +5,123 @@
from stpy.helpers.helper import *
-class FelSimulator():
-
- def __init__(self, d, sigma, name):
- self.d = d
- self.sigma = sigma
- self.exp_name = name
-
- def help(self, reload=False):
- print("Help for the FelSimulator")
-
- def load_pickle(self, file_name):
-
- self.GP = pickle.load(open(file_name, "rb"))
- self.d = self.GP.d
- self.exp_name = self.GP.exp_name
-
- def save(self, file_name):
- self.GP.exp_name = self.exp_name
- pickle.dump(self.GP, open(file_name, "wb"), -1)
-
- def load_fresh(self, file_name, dts='1'):
- f = File(file_name, 'r')
- dset = f[dts]
- print(dset)
- n = dset[str("x")].shape[0]
- mask = np.full(n, False, dtype=bool)
- for j in range(self.d):
- maskNew = dset["line_id"] == j
- mask = np.logical_or(mask, maskNew)
- print("Using ", np.sum(mask), "points to fit the model.")
- self.x = dset["x"][mask, 0:self.d].reshape(-1, self.d)
- self.y = dset["y"][mask].reshape(-1, 1)
- # y response and scale, x scale to [-0.5,0.5]
- scale = np.max(np.abs(self.y))
- self.y = self.y / scale
- for j in range(self.d):
- a = np.min(self.x[:, j])
- b = np.max(self.x[:, j])
- self.x[:, j] = (self.x[:, j] / (b - a)) - 0.5 - a / (b - a)
- # noise structure
- self.s = np.max(dset["y_std"][mask] / scale)
- print("The noise level estimated to be:", self.s)
- self.x = torch.from_numpy(self.x)
- self.y = torch.from_numpy(self.y)
-
- f.close()
-
- def fit_simulator(self, GP, optimize="bandwidth", restarts=10):
- self.GP = GP
- self.GP.s = self.s
- self.GP.fit(self.x, self.y)
- print("Model fitted.")
- self.GP.optimize_params(type=optimize, restarts=restarts)
- self.GP.back_prop = True
-
- def bounds(self, N, n):
- x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.GP.d)))
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(self.GP.d)]
- xtest = cartesian(arrays)
- xtest = torch.from_numpy(xtest)
- return (x, xtest, self.GP.d, None)
-
- def opt_bounds(self):
- bounds = tuple([(-0.5, 0.5) for i in range(self.GP.d)])
- return bounds
-
- def constraint(self, X):
- return True
-
- def eval(self, X, sigma=None):
- if sigma is None:
- sigma = self.sigma
- [mu, _] = self.GP.mean_std(X)
- return mu + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64)
-
- def eval_sample(self, X, sigma=None):
- if sigma is None:
- sigma = self.sigma
- f = self.GP.sample(X)
- self.x = torch.cat((self.x, X), dim=0)
- self.y = torch.cat((self.y, f), dim=0)
- self.GP.fit(self.x, self.y)
- return f
-
- def optimum(self):
- ## find optimum using backpropagation optimize eval_sample given X
- x = torch.randn(self.d, 1, requires_grad=True)
- x0 = x
-
- from scipy.optimize import minimize
-
- def fun(x):
- x = np.array([x])
- return -self.eval(torch.from_numpy(x)).numpy()[0][0]
-
- def grad(x):
- z = torch.from_numpy(np.array([x]))
- z.requires_grad_(True)
- y = -self.eval(z)
- y.backward()
- return z.grad.numpy()[0]
-
- mybounds = self.opt_bounds()
- res = minimize(fun, x0.detach().numpy(), method="L-BFGS-B", jac=grad, tol=0.0001, bounds=mybounds)
- solution = res.x
-
- val = self.eval(torch.from_numpy(solution).unsqueeze(0))
- loc = torch.from_numpy(solution).unsqueeze(0)
-
- return (val, loc)
+class FelSimulator:
+
+ def __init__(self, d, sigma, name):
+ self.d = d
+ self.sigma = sigma
+ self.exp_name = name
+
+ def help(self, reload=False):
+ print("Help for the FelSimulator")
+
+ def load_pickle(self, file_name):
+
+ self.GP = pickle.load(open(file_name, "rb"))
+ self.d = self.GP.d
+ self.exp_name = self.GP.exp_name
+
+ def save(self, file_name):
+ self.GP.exp_name = self.exp_name
+ pickle.dump(self.GP, open(file_name, "wb"), -1)
+
+ def load_fresh(self, file_name, dts="1"):
+ f = File(file_name, "r")
+ dset = f[dts]
+ print(dset)
+ n = dset[str("x")].shape[0]
+ mask = np.full(n, False, dtype=bool)
+ for j in range(self.d):
+ maskNew = dset["line_id"] == j
+ mask = np.logical_or(mask, maskNew)
+ print("Using ", np.sum(mask), "points to fit the model.")
+ self.x = dset["x"][mask, 0 : self.d].reshape(-1, self.d)
+ self.y = dset["y"][mask].reshape(-1, 1)
+ # y response and scale, x scale to [-0.5,0.5]
+ scale = np.max(np.abs(self.y))
+ self.y = self.y / scale
+ for j in range(self.d):
+ a = np.min(self.x[:, j])
+ b = np.max(self.x[:, j])
+ self.x[:, j] = (self.x[:, j] / (b - a)) - 0.5 - a / (b - a)
+ # noise structure
+ self.s = np.max(dset["y_std"][mask] / scale)
+ print("The noise level estimated to be:", self.s)
+ self.x = torch.from_numpy(self.x)
+ self.y = torch.from_numpy(self.y)
+
+ f.close()
+
+ def fit_simulator(self, GP, optimize="bandwidth", restarts=10):
+ self.GP = GP
+ self.GP.s = self.s
+ self.GP.fit(self.x, self.y)
+ print("Model fitted.")
+ self.GP.optimize_params(type=optimize, restarts=restarts)
+ self.GP.back_prop = True
+
+ def bounds(self, N, n):
+ x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.GP.d)))
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(self.GP.d)]
+ xtest = cartesian(arrays)
+ xtest = torch.from_numpy(xtest)
+ return (x, xtest, self.GP.d, None)
+
+ def opt_bounds(self):
+ bounds = tuple([(-0.5, 0.5) for i in range(self.GP.d)])
+ return bounds
+
+ def constraint(self, X):
+ return True
+
+ def eval(self, X, sigma=None):
+ if sigma is None:
+ sigma = self.sigma
+ [mu, _] = self.GP.mean_std(X)
+ return mu + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64)
+
+ def eval_sample(self, X, sigma=None):
+ if sigma is None:
+ sigma = self.sigma
+ f = self.GP.sample(X)
+ self.x = torch.cat((self.x, X), dim=0)
+ self.y = torch.cat((self.y, f), dim=0)
+ self.GP.fit(self.x, self.y)
+ return f
+
+ def optimum(self):
+ ## find optimum using backpropagation optimize eval_sample given X
+ x = torch.randn(self.d, 1, requires_grad=True)
+ x0 = x
+
+ from scipy.optimize import minimize
+
+ def fun(x):
+ x = np.array([x])
+ return -self.eval(torch.from_numpy(x)).numpy()[0][0]
+
+ def grad(x):
+ z = torch.from_numpy(np.array([x]))
+ z.requires_grad_(True)
+ y = -self.eval(z)
+ y.backward()
+ return z.grad.numpy()[0]
+
+ mybounds = self.opt_bounds()
+ res = minimize(
+ fun,
+ x0.detach().numpy(),
+ method="L-BFGS-B",
+ jac=grad,
+ tol=0.0001,
+ bounds=mybounds,
+ )
+ solution = res.x
+
+ val = self.eval(torch.from_numpy(solution).unsqueeze(0))
+ loc = torch.from_numpy(solution).unsqueeze(0)
+
+ return (val, loc)
diff --git a/stpy/test_functions/test_functions.py b/stpy/test_functions/test_functions.py
index 11e6fed..84121ab 100755
--- a/stpy/test_functions/test_functions.py
+++ b/stpy/test_functions/test_functions.py
@@ -4,677 +4,792 @@
import stpy
import stpy.continuous_processes.gauss_procc
+
# from tensorflow.examples.tutorials.mnist import input_data
from stpy.helpers.helper import *
from stpy.test_functions.neural_net import train_network
def isin(element, test_elements, assume_unique=False):
- (n, d) = element.shape
- (m, d) = test_elements.shape
- maskFull = np.full((n), False, dtype=bool)
- for j in range(m):
- mask = np.full((n), True, dtype=bool)
- for i in range(d):
- # mask = np.logical_and(mask,np.in1d(element[:,i],test_elements[j,i], assume_unique=assume_unique))
- mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-01))
- # print (j, i, mask)
- maskFull = np.logical_or(mask, maskFull)
- # print (maskFull)
- return maskFull
+ (n, d) = element.shape
+ (m, d) = test_elements.shape
+ maskFull = np.full((n), False, dtype=bool)
+ for j in range(m):
+ mask = np.full((n), True, dtype=bool)
+ for i in range(d):
+ # mask = np.logical_and(mask,np.in1d(element[:,i],test_elements[j,i], assume_unique=assume_unique))
+ mask = np.logical_and(
+ mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-01)
+ )
+ # print (j, i, mask)
+ maskFull = np.logical_or(mask, maskFull)
+ # print (maskFull)
+ return maskFull
class test_function:
- def __init__(self):
- "nothing"
- self.sampled = False
- self.init = False
- self.scale = 1.0
-
- ## General F
- def f(self, X, sigma=0.00001, a=0.5):
- # in X rows are points, cols are features
- X = X * 8
- y = -np.sin(a * np.sum(X ** 2, axis=1)).reshape(X.shape[0], 1)
- y = y + sigma * np.random.randn(X.shape[0], 1)
- return y
-
- def f_bounds(self, N, n, d=1, L_infinity_ball=1.):
- x = np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))
- # grid
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
- return (d, xtest, x, 0.15)
-
- def f_opt_bounds(self, d=1, L_infinity_ball=1):
- b = tuple([(-L_infinity_ball, L_infinity_ball) for i in range(d)])
- return b
-
- def optimize_f(self, d=1, a=0.5, L_infinity_ball=1):
- from scipy.optimize import minimize
-
- grad = lambda x: np.cos(np.sum(x ** 2) / 2) * x
- fun = lambda x: np.sin(np.sum(x ** 2) / 2) + 1
-
- bounds = self.f_opt_bounds(d=d, L_infinity_ball=L_infinity_ball)
- r = []
- for _ in range(500):
- (d, _, x, _) = self.f_bounds(1, None, d=d, L_infinity_ball=L_infinity_ball)
- x0 = x[0, :]
- res = minimize(fun, x0, method="SLSQP", jac=grad, tol=0.0001, bounds=bounds)
- r.append(fun(res.x))
-
- print(d, max(r))
-
- def sample_ss(self, X, sigma=0.001, gamma=1.0, GP=None):
- # in X rows are points, cols are features
- if self.sampled == False:
- # print ("sampling")
- if GP == None:
- GP = stpy.continuous_processes.gauss_procc.GaussianProcess(s=sigma, gamma=gamma)
- self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy()
- mask = isin(self.xtest, X)
- self.sampled = True
- return self.sample[mask, :].numpy() + np.random.randn(X.shape[0], 1) * sigma
- else:
- self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy()
- mask = isin(self.xtest, X)
- self.sampled = True
- return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma
- else:
- mask = isin(self.xtest, X)
- return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma
-
- def sample_ss_bounds(self, N, n, d=1, L_infinity_ball=1., gamma=1.0):
- # self.sampled = False
- # grid
- arrays = [np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
- self.xtest = xtest
- self.n = n
- # x = self.xtest[np.random.randint(0,n,size = N),:]
- x = self.xtest[np.random.permutation(np.arange(0, self.xtest.shape[0], 1))[0:N], :]
- x = np.sort(x, axis=0)
- return (d, xtest, x, gamma)
-
- def sample_ss_reset(self):
- self.samples = False
-
- def optimize(self, xtest, ytest, groups, s):
- (n, d) = xtest.size()
- kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(d, dtype=torch.float64) * 0.1,
- groups=groups)
- GP = stpy.continuous_processes.gauss_procc.GaussianProcess(kernel_custom=kernel, s=s, d=d)
- GP.fit_gp(xtest, ytest)
- GP.optimize_params(type="bandwidth")
- print("Optimized")
- return torch.min(kernel.gamma)
-
- ## Branin Function
- def branin(self, X, sigma=0.1):
- if X.shape[1] != 2:
- raise AssertionError("Invalid dimension of grid with Branin Function")
- else:
- xx = X[:, 0]
- yy = X[:, 1]
- y = ((yy - (5.1 / (4. * np.pi)) * (xx ** 2) + 5. / np.pi - 6.) ** 2 + 10. * (
- 1. - 1. / (8. * np.pi)) * np.cos(xx) + 10.) / 150
- y = -y.reshape(X.shape[0], 1)
- return y
-
- def branin_bounds(self, N, n):
- x = np.random.uniform(0, 10, size=(N, 2))
- # grid
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-5, 10, n).reshape(n, 1), np.linspace(0, 15, n).reshape(n, 1)]
- xtest = cartesian(arrays)
- return (2, xtest, x, 2.5)
-
- def branin_opt_bounds(self):
- b = tuple([(-5, 10), (0, 15)])
- return b
-
- ## Camelback Function
- def camelback(self, X, sigma=0.1):
- if X.shape[1] != 2:
- raise AssertionError("Invalid dimension of grid with Branin Function")
- else:
- xx = X[:, 0] * 4
- yy = X[:, 1] * 2
- y = (4. - 2.1 * xx ** 2 + (xx ** 4) / 3.) * (xx ** 2) + xx * yy + (-4. + 4 * (yy ** 2)) * (yy ** 2)
- y = -y.reshape(X.shape[0], 1)
- # y = np.tanh(y)
- y = y / 5.
- return y / self.scale + sigma * np.random.randn(X.shape[0], 1)
-
- def camelback_bounds(self, N, n, adv_inv=False):
- if adv_inv == False:
- x = np.random.uniform(-0.5, 0.5, size=(N, 2))
- else:
- x = np.random.uniform(-0.5, -0.4, size=(N, 2))
- # grid
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1), np.linspace(-0.5, 0.5, n).reshape(n, 1)]
- xtest = cartesian(arrays)
- return (2, xtest, x, 0.1)
-
- def camelback_opt_bounds(self):
- b = tuple([(-0.5, 0.5), (-0.5, 0.5)])
- return b
-
- def camelback_scale(self, xtest):
- self.scale = np.max((self.camelback(xtest, sigma=0)))
- print("Scaling:", self.scale)
-
- ## Hartmann 6
- def hartmann6(self, X, sigma=0.1):
- if X.shape[1] != 6:
- raise AssertionError("Invalid dimension of grid with Branin Function")
- else:
- # opt = np.array([[0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573]])
- # fopt = np.array([[-3.32237]])
-
- alpha = [1.00, 1.20, 3.00, 3.20]
- A = np.array([[10.00, 3.00, 17.00, 3.50, 1.70, 8.00],
- [0.05, 10.00, 17.00, 0.10, 8.00, 14.00],
- [3.00, 3.50, 1.70, 10.00, 17.00, 8.00],
- [17.00, 8.00, 0.05, 10.00, 0.10, 14.00]])
- P = 0.0001 * np.array([[1312, 1696, 5569, 124, 8283, 5886],
- [2329, 4135, 8307, 3736, 1004, 9991],
- [2348, 1451, 3522, 2883, 3047, 6650],
- [4047, 8828, 8732, 5743, 1091, 381]])
-
- """6d Hartmann test function
+ def __init__(self):
+ "nothing"
+ self.sampled = False
+ self.init = False
+ self.scale = 1.0
+
+ ## General F
+ def f(self, X, sigma=0.00001, a=0.5):
+ # in X rows are points, cols are features
+ X = X * 8
+ y = -np.sin(a * np.sum(X**2, axis=1)).reshape(X.shape[0], 1)
+ y = y + sigma * np.random.randn(X.shape[0], 1)
+ return y
+
+ def f_bounds(self, N, n, d=1, L_infinity_ball=1.0):
+ x = np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))
+ # grid
+ if n == None:
+ xtest = None
+ else:
+ arrays = [
+ np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1)
+ for i in range(d)
+ ]
+ xtest = cartesian(arrays)
+ return (d, xtest, x, 0.15)
+
+ def f_opt_bounds(self, d=1, L_infinity_ball=1):
+ b = tuple([(-L_infinity_ball, L_infinity_ball) for i in range(d)])
+ return b
+
+ def optimize_f(self, d=1, a=0.5, L_infinity_ball=1):
+ from scipy.optimize import minimize
+
+ grad = lambda x: np.cos(np.sum(x**2) / 2) * x
+ fun = lambda x: np.sin(np.sum(x**2) / 2) + 1
+
+ bounds = self.f_opt_bounds(d=d, L_infinity_ball=L_infinity_ball)
+ r = []
+ for _ in range(500):
+ (d, _, x, _) = self.f_bounds(1, None, d=d, L_infinity_ball=L_infinity_ball)
+ x0 = x[0, :]
+ res = minimize(fun, x0, method="SLSQP", jac=grad, tol=0.0001, bounds=bounds)
+ r.append(fun(res.x))
+
+ print(d, max(r))
+
+ def sample_ss(self, X, sigma=0.001, gamma=1.0, GP=None):
+ # in X rows are points, cols are features
+ if self.sampled == False:
+ # print ("sampling")
+ if GP == None:
+ GP = stpy.continuous_processes.gauss_procc.GaussianProcess(
+ s=sigma, gamma=gamma
+ )
+ self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy()
+ mask = isin(self.xtest, X)
+ self.sampled = True
+ return (
+ self.sample[mask, :].numpy()
+ + np.random.randn(X.shape[0], 1) * sigma
+ )
+ else:
+ self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy()
+ mask = isin(self.xtest, X)
+ self.sampled = True
+ return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma
+ else:
+ mask = isin(self.xtest, X)
+ return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma
+
+ def sample_ss_bounds(self, N, n, d=1, L_infinity_ball=1.0, gamma=1.0):
+ # self.sampled = False
+ # grid
+ arrays = [
+ np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1)
+ for i in range(d)
+ ]
+ xtest = cartesian(arrays)
+ self.xtest = xtest
+ self.n = n
+ # x = self.xtest[np.random.randint(0,n,size = N),:]
+ x = self.xtest[
+ np.random.permutation(np.arange(0, self.xtest.shape[0], 1))[0:N], :
+ ]
+ x = np.sort(x, axis=0)
+ return (d, xtest, x, gamma)
+
+ def sample_ss_reset(self):
+ self.samples = False
+
+ def optimize(self, xtest, ytest, groups, s):
+ (n, d) = xtest.size()
+ kernel = stpy.kernels.KernelFunction(
+ kernel_name="ard",
+ gamma=torch.ones(d, dtype=torch.float64) * 0.1,
+ groups=groups,
+ )
+ GP = stpy.continuous_processes.gauss_procc.GaussianProcess(
+ kernel_custom=kernel, s=s, d=d
+ )
+ GP.fit_gp(xtest, ytest)
+ GP.optimize_params(type="bandwidth")
+ print("Optimized")
+ return torch.min(kernel.gamma)
+
+ ## Branin Function
+ def branin(self, X, sigma=0.1):
+ if X.shape[1] != 2:
+ raise AssertionError("Invalid dimension of grid with Branin Function")
+ else:
+ xx = X[:, 0]
+ yy = X[:, 1]
+ y = (
+ (yy - (5.1 / (4.0 * np.pi)) * (xx**2) + 5.0 / np.pi - 6.0) ** 2
+ + 10.0 * (1.0 - 1.0 / (8.0 * np.pi)) * np.cos(xx)
+ + 10.0
+ ) / 150
+ y = -y.reshape(X.shape[0], 1)
+ return y
+
+ def branin_bounds(self, N, n):
+ x = np.random.uniform(0, 10, size=(N, 2))
+ # grid
+ if n == None:
+ xtest = None
+ else:
+ arrays = [
+ np.linspace(-5, 10, n).reshape(n, 1),
+ np.linspace(0, 15, n).reshape(n, 1),
+ ]
+ xtest = cartesian(arrays)
+ return (2, xtest, x, 2.5)
+
+ def branin_opt_bounds(self):
+ b = tuple([(-5, 10), (0, 15)])
+ return b
+
+ ## Camelback Function
+ def camelback(self, X, sigma=0.1):
+ if X.shape[1] != 2:
+ raise AssertionError("Invalid dimension of grid with Branin Function")
+ else:
+ xx = X[:, 0] * 4
+ yy = X[:, 1] * 2
+ y = (
+ (4.0 - 2.1 * xx**2 + (xx**4) / 3.0) * (xx**2)
+ + xx * yy
+ + (-4.0 + 4 * (yy**2)) * (yy**2)
+ )
+ y = -y.reshape(X.shape[0], 1)
+ # y = np.tanh(y)
+ y = y / 5.0
+ return y / self.scale + sigma * np.random.randn(X.shape[0], 1)
+
+ def camelback_bounds(self, N, n, adv_inv=False):
+ if adv_inv == False:
+ x = np.random.uniform(-0.5, 0.5, size=(N, 2))
+ else:
+ x = np.random.uniform(-0.5, -0.4, size=(N, 2))
+ # grid
+ if n == None:
+ xtest = None
+ else:
+ arrays = [
+ np.linspace(-0.5, 0.5, n).reshape(n, 1),
+ np.linspace(-0.5, 0.5, n).reshape(n, 1),
+ ]
+ xtest = cartesian(arrays)
+ return (2, xtest, x, 0.1)
+
+ def camelback_opt_bounds(self):
+ b = tuple([(-0.5, 0.5), (-0.5, 0.5)])
+ return b
+
+ def camelback_scale(self, xtest):
+ self.scale = np.max((self.camelback(xtest, sigma=0)))
+ print("Scaling:", self.scale)
+
+ ## Hartmann 6
+ def hartmann6(self, X, sigma=0.1):
+ if X.shape[1] != 6:
+ raise AssertionError("Invalid dimension of grid with Branin Function")
+ else:
+ # opt = np.array([[0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573]])
+ # fopt = np.array([[-3.32237]])
+
+ alpha = [1.00, 1.20, 3.00, 3.20]
+ A = np.array(
+ [
+ [10.00, 3.00, 17.00, 3.50, 1.70, 8.00],
+ [0.05, 10.00, 17.00, 0.10, 8.00, 14.00],
+ [3.00, 3.50, 1.70, 10.00, 17.00, 8.00],
+ [17.00, 8.00, 0.05, 10.00, 0.10, 14.00],
+ ]
+ )
+ P = 0.0001 * np.array(
+ [
+ [1312, 1696, 5569, 124, 8283, 5886],
+ [2329, 4135, 8307, 3736, 1004, 9991],
+ [2348, 1451, 3522, 2883, 3047, 6650],
+ [4047, 8828, 8732, 5743, 1091, 381],
+ ]
+ )
+
+ """6d Hartmann test function
input bounds: 0 <= xi <= 1, i = 1..6
global optimum: (0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573),
min function value = -3.32237
"""
- external_sum = 0
- for i in range(4):
- internal_sum = 0
- for j in range(6):
- internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2
- external_sum = external_sum + alpha[i] * np.exp(-internal_sum)
-
- return external_sum[:, np.newaxis]
-
- def hartmann6_bounds(self, N, n):
- x = np.random.uniform(0, 1, size=(N, 6))
- # grid
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(6)]
- xtest = cartesian(arrays)
- return (6, xtest, x, 0.5)
-
- def hartmann6_opt_bounds(self):
- b = tuple([(0, 1) for i in range(6)])
- return b
-
- ## Hartmann 4
- def hartmann4(self, X, sigma=0.1):
- if X.shape[1] != 4:
- raise AssertionError("Invalid dimension of grid with Branin Function")
- else:
-
- alpha = [1.00, 1.20, 3.00, 3.20]
-
- A = np.array([[10.00, 3.00, 17.00, 3.50, 1.70, 8.00],
- [0.05, 10.00, 17.00, 0.10, 8.00, 14.00],
- [3.00, 3.50, 1.70, 10.00, 17.00, 8.00],
- [17.00, 8.00, 0.05, 10.00, 0.10, 14.00]])
-
- P = 0.0001 * np.array([[1312, 1696, 5569, 124, 8283, 5886],
- [2329, 4135, 8307, 3736, 1004, 9991],
- [2348, 1451, 3522, 2883, 3047, 6650],
- [4047, 8828, 8732, 5743, 1091, 381]])
-
- """6d Hartmann test function
+ external_sum = 0
+ for i in range(4):
+ internal_sum = 0
+ for j in range(6):
+ internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2
+ external_sum = external_sum + alpha[i] * np.exp(-internal_sum)
+
+ return external_sum[:, np.newaxis]
+
+ def hartmann6_bounds(self, N, n):
+ x = np.random.uniform(0, 1, size=(N, 6))
+ # grid
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(6)]
+ xtest = cartesian(arrays)
+ return (6, xtest, x, 0.5)
+
+ def hartmann6_opt_bounds(self):
+ b = tuple([(0, 1) for i in range(6)])
+ return b
+
+ ## Hartmann 4
+ def hartmann4(self, X, sigma=0.1):
+ if X.shape[1] != 4:
+ raise AssertionError("Invalid dimension of grid with Branin Function")
+ else:
+
+ alpha = [1.00, 1.20, 3.00, 3.20]
+
+ A = np.array(
+ [
+ [10.00, 3.00, 17.00, 3.50, 1.70, 8.00],
+ [0.05, 10.00, 17.00, 0.10, 8.00, 14.00],
+ [3.00, 3.50, 1.70, 10.00, 17.00, 8.00],
+ [17.00, 8.00, 0.05, 10.00, 0.10, 14.00],
+ ]
+ )
+
+ P = 0.0001 * np.array(
+ [
+ [1312, 1696, 5569, 124, 8283, 5886],
+ [2329, 4135, 8307, 3736, 1004, 9991],
+ [2348, 1451, 3522, 2883, 3047, 6650],
+ [4047, 8828, 8732, 5743, 1091, 381],
+ ]
+ )
+
+ """6d Hartmann test function
input bounds: 0 <= xi <= 1, i = 1..6
global optimum: (0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573),
min function value = -3.32237
"""
- external_sum = 0
- for i in range(4):
- internal_sum = 0
- for j in range(4):
- internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2
- external_sum = external_sum + alpha[i] * np.exp(-internal_sum)
-
- return external_sum[:, np.newaxis]
-
- def hartmann4_bounds(self, N, n):
- x = np.random.uniform(0, 1, size=(N, 4))
- # grid
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(4)]
- xtest = cartesian(arrays)
- return (4, xtest, x, 0.5)
-
- def hartmann4_opt_bounds(self):
- b = tuple([(0, 1) for i in range(4)])
- return b
-
- def hartmann3(self, X, sigma=0.1):
-
- X_lower = np.array([0, 0, 0])
- X_upper = np.array([1, 1, 1])
- # opt = np.array([[0.114614, 0.555649, 0.852547]])
- # fopt = np.array([[-3.86278]])
- alpha = [1.0, 1.2, 3.0, 3.2]
- A = np.array([[3.0, 10.0, 30.0],
- [0.1, 10.0, 35.0],
- [3.0, 10.0, 30.0],
- [0.1, 10.0, 35.0]])
- P = 0.0001 * np.array([[3689, 1170, 2673],
- [4699, 4387, 7470],
- [1090, 8732, 5547],
- [381, 5743, 8828]])
-
- external_sum = 0
- for i in range(4):
- internal_sum = 0
- for j in range(3):
- internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2
-
- external_sum = external_sum + alpha[i] * np.exp(-internal_sum)
-
- return external_sum[:, np.newaxis]
-
- def hartmann3_bounds(self, N, n):
- x = np.random.uniform(0, 1, size=(N, 3))
- # grid
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(3)]
- xtest = cartesian(arrays)
-
- return (3, xtest, x, 0.4)
-
- def hartmann3_opt_bounds(self):
- b = tuple([(0, 1) for i in range(3)])
- return b
-
- def michal_old(self, X, sigma=0.1):
- (n, d) = X.shape
- sum_ = np.zeros(shape=(X.shape[0], 1))
-
- for ii in range(d):
- xi = X[:, ii]
- # print ("xi",xi)
- i = ii + 1
- new = np.sin(xi) * np.power((np.sin(i * np.power(xi, 2) / np.pi)), (2 * d))
- sum_ += new.reshape(n, 1)
- return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def stang_old(self, X, sigma=0.1):
- (n, d) = X.shape
- sum_ = np.zeros(shape=(X.shape[0], 1))
-
- for ii in range(d):
- xi = X[:, ii]
- new = xi ** 4 - 16. * xi ** 2 + 5 * xi
- sum_ += new.reshape(n, 1)
-
- sum_ = sum_ / (38.7122 * d)
- # sum_ = sum_/d
-
- return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def michal_un(self, X, sigma=0.1):
- (n, d) = X.shape
- X = (X + 0.5) * np.pi
- ar = np.arange(1, d + 1, 1)
- sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d))
- sum_ = np.sum(sum_, axis=1).reshape(-1, 1)
- return sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def michal(self, X, sigma=0.1):
- (n, d) = X.shape
- X = (X + 0.5) * np.pi
- ar = np.arange(1, d + 1, 1)
- sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d))
- sum_ = np.sum(sum_, axis=1).reshape(-1, 1)
- sum_ = sum_ / self.michal_optimum(d)[1]
- return sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def michal_bounds(self, N, n, d=1, adv_inv=False):
- if adv_inv == False:
- x = np.random.uniform(-0.5, 0.5, size=(N, d))
- else:
- x = np.random.uniform(-0.5, 0., size=(N, d))
-
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
-
- return (d, xtest, x, 0.3)
-
- def michal_opt_bounds(self, d):
- b = tuple([(-0.5, 0.5) for i in range(d)])
- return b
-
- def michal_optimum(self, d):
- q = 20
- opt = np.ones(shape=(q))
- opt[0] = 2.93254
- opt[1] = 2.34661
- opt[2] = 1.64107
- opt[3] = 1.24415
- opt[4] = 0.999643
- opt[5] = 0.834879
- opt[6] = 2.1089
- opt[7] = 1.84835
- opt[8] = 1.64448
- opt[9] = 1.48089
- opt[10] = 1.34678
- opt[11] = 1.2349
- opt[12] = 1.89701
- opt[13] = 1.76194
- opt[14] = 1.64477
- opt[15] = 1.54218
- opt[16] = 1.45162
- opt[17] = 1.37109
- opt[18] = 1.81774
- opt = opt[0:d].reshape(1, -1)
- opt = (opt / np.pi) - 0.5
- value = self.michal_un(opt, sigma=0)
- return (opt, value[0][0])
-
- def stang_un(self, X, sigma=0.1):
- (n, d) = X.shape
- X = X * 8
- Y = X ** 2
- sum_ = np.sum(Y ** 2 - 16. * Y + 5 * X, axis=1).reshape(-1, 1)
- sum_ = sum_
- return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def stang(self, X, sigma=0.1):
- (n, d) = X.shape
- X = X * 8
- Y = X ** 2
- sum_ = np.sum(Y ** 2 - 16. * Y + 5 * X, axis=1).reshape(-1, 1)
- sum_ = sum_ / self.stang_optimum(d)[1]
- return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def stang_bounds(self, N, n, d=1, adv_inv=False):
- if adv_inv == False:
- x = np.random.uniform(-0.5, 0.5, size=(N, d))
- else:
- print("Adversarially initiallized")
- x = np.random.uniform(0.4, 0.5, size=(N, d))
-
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
-
- return (d, xtest, x, 0.6)
-
- def stang_opt_bounds(self, d):
- b = tuple([(-0.5, 0.5) for i in range(d)])
- return b
-
- def stang_optimum(self, d):
- opt = np.ones(shape=(d)) * (-2.9035)
- opt = opt / 8
- opt = opt.reshape(1, -1)
-
- value = self.stang_un(opt, sigma=0.0)
- return (opt, value[0][0])
-
- def double_group_un(self, X, sigma=0.1):
- sum_ = np.sum(np.exp(-(np.diff(X, axis=1) / 0.25) ** 2), axis=1).reshape(-1, 1)
- return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def double_group(self, X, sigma=0.1):
- (n, d) = X.shape
- sum_ = np.sum(np.exp(-(np.diff(X, axis=1) / 0.25) ** 2), axis=1).reshape(-1, 1)
- sum_ = sum_ / self.double_group_optimum(d)[1]
- return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
-
- def double_group_bounds(self, N, n, d=1, adv_inv=False):
- if adv_inv == False:
- x = np.random.uniform(-0.5, 0.5, size=(N, d))
- else:
- print("Adversarially initiallized")
- x = np.random.uniform(-0.5, -0.4, size=(N, d))
-
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
-
- return (d, xtest, x, 0.6)
-
- def double_group_opt_bounds(self, d):
- b = tuple([(-0.5, 0.5) for i in range(d)])
- return b
-
- def double_group_optimum(self, d):
- opt = np.zeros(shape=(1, d))
- value = self.double_group_un(opt, 0)[0][0]
- return (opt, value)
-
- def swissfel(self, X, sigma=0.1):
- if self.init == False:
- raise AssertionError("Need to run bounds first.")
- else:
- if sigma == 0.0:
- return self.model.predict(X)[0]
- else:
- return self.model.predict(X)[0] + np.random.randn(X.shape[0], 1) * self.noise
-
- def swissfel_bounds(self, N, n):
- if self.init == False:
- import os.path
- fname = "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p"
- if not os.path.isfile(fname):
- f = File('/home/mojko/Documents/PhD/RFFinBO/code/test_problems/evaluations.hdf5')
- dset = f['1']
- X = dset["x"][:].reshape(-1, 5)
-
- # y response and scale
- Y = dset["y"][:].reshape(-1, 1)
- Y = Y / np.max(np.abs(Y))
-
- # noise structure
- Yerr = dset["y_std"] / np.max(np.abs(Y))
- self.noise = np.std(Yerr)
- print("Estimated noise level", self.noise)
-
- # data scale to [-0.5,0.5]
- X = dset["x"][:].reshape(-1, 5)
- for j in range(5):
- a = np.min(X[:, j])
- b = np.max(X[:, j])
- X[:, j] = (X[:, j] / (b - a)) - 0.5 - a / (b - a)
-
- ## fully additive kernel s
- self.kernel = GPy.kern.RBF(1, active_dims=[0]) + GPy.kern.RBF(1, active_dims=[1]) \
- + GPy.kern.RBF(1, active_dims=[2]) + GPy.kern.RBF(1, active_dims=[3]) \
- + GPy.kern.RBF(1, active_dims=[4])
- self.model = GPy.models.GPRegression(X, Y, self.kernel)
- print("Model fit")
- self.model.optimize(messages=True)
- print("ML likelihood fit")
- self.init = True
- # save pickle
- pickle.dump(self.model,
- open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p", "wb"))
- pickle.dump(self.noise,
- open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p", "wb"))
- else:
- self.init = True
- self.model = pickle.load(
- open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p", "rb"))
- self.noise = pickle.load(
- open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p", "rb"))
-
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(5)]
- xtest = cartesian(arrays)
-
- # bw = np.min(self.kernel.lengthscale)
- x = np.random.uniform(-0.5, 0.5, size=(N, 5))
- return (5, xtest, x, 0.1)
-
- def swissfel_opt_bounds(self):
- b = tuple([(-0.5, 0.5) for i in range(5)])
- return b
-
- def swissfel_optimum(self):
- from scipy.optimize import minimize
- # maximize the function
- mybounds = self.swissfel_opt_bounds()
- fun = lambda x: -self.swissfel(x.reshape(1, -1), sigma=0.0)[0][0]
-
- best = -10.
- repeats = 10
- for i in range(repeats):
- x0 = np.random.uniform(-0.5, 0.5, size=(5,))
- res = minimize(fun, x0, method="L-BFGS-B", tol=0.0001, bounds=mybounds)
- value = self.swissfel(res.x.reshape(1, -1), sigma=0)
- if value > best:
- best = value
- self.opt_loc = res.x.reshape(1, -1)
- return (self.opt_loc, best)
-
- def neural_net(self, X, sigma=0.1):
- (n, d) = X.shape
- res = []
- val_size = 400
- if self.sampled == False:
- self.sampled = True
- try:
- self.mnist = input_data.read_data_sets("~/.", one_hot=True, validation_size=val_size)
- except:
- self.mnist = input_data.read_data_sets("~/.", one_hot=True)
-
- for x in X:
- (it, acc) = train_network(self.mnist, dropout=x[0], verbose=False,
- val_size=val_size, maxiter=300, initialization_params=x[1:], no_filters_1=self.NN,
- no_filters_2=self.NN2, val_count=30)
- res.append(acc)
-
- return np.array(acc).reshape(n, 1)
-
- def neural_net_bounds(self, N, n, NN=16, NN2=22):
- self.NN = NN
- self.NN2 = NN2
- d = self.NN + self.NN2
-
- x = np.random.uniform(0, 10, size=(N, d))
- dropout = np.random.uniform(0, 1, size=(N, 1))
- x = np.concatenate((x, dropout), axis=1)
-
- if n == None:
- xtest = None
- else:
- arrays = [np.linspace(0, 1, n).reshape(n, 1)] + [np.linspace(0, 10, n).reshape(n, 1) for i in range(d)]
- xtest = cartesian(arrays)
-
- return (d + 1, xtest, x, 0.9)
-
- def neural_net_opt_bounds(self):
- d = self.NN + self.NN2
- b = tuple([(0, 1)] + [(0, 10) for i in range(d)])
- return b
+ external_sum = 0
+ for i in range(4):
+ internal_sum = 0
+ for j in range(4):
+ internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2
+ external_sum = external_sum + alpha[i] * np.exp(-internal_sum)
+
+ return external_sum[:, np.newaxis]
+
+ def hartmann4_bounds(self, N, n):
+ x = np.random.uniform(0, 1, size=(N, 4))
+ # grid
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(4)]
+ xtest = cartesian(arrays)
+ return (4, xtest, x, 0.5)
+
+ def hartmann4_opt_bounds(self):
+ b = tuple([(0, 1) for i in range(4)])
+ return b
+
+ def hartmann3(self, X, sigma=0.1):
+
+ X_lower = np.array([0, 0, 0])
+ X_upper = np.array([1, 1, 1])
+ # opt = np.array([[0.114614, 0.555649, 0.852547]])
+ # fopt = np.array([[-3.86278]])
+ alpha = [1.0, 1.2, 3.0, 3.2]
+ A = np.array(
+ [[3.0, 10.0, 30.0], [0.1, 10.0, 35.0], [3.0, 10.0, 30.0], [0.1, 10.0, 35.0]]
+ )
+ P = 0.0001 * np.array(
+ [
+ [3689, 1170, 2673],
+ [4699, 4387, 7470],
+ [1090, 8732, 5547],
+ [381, 5743, 8828],
+ ]
+ )
+
+ external_sum = 0
+ for i in range(4):
+ internal_sum = 0
+ for j in range(3):
+ internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2
+
+ external_sum = external_sum + alpha[i] * np.exp(-internal_sum)
+
+ return external_sum[:, np.newaxis]
+
+ def hartmann3_bounds(self, N, n):
+ x = np.random.uniform(0, 1, size=(N, 3))
+ # grid
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(3)]
+ xtest = cartesian(arrays)
+
+ return (3, xtest, x, 0.4)
+
+ def hartmann3_opt_bounds(self):
+ b = tuple([(0, 1) for i in range(3)])
+ return b
+
+ def michal_old(self, X, sigma=0.1):
+ (n, d) = X.shape
+ sum_ = np.zeros(shape=(X.shape[0], 1))
+
+ for ii in range(d):
+ xi = X[:, ii]
+ # print ("xi",xi)
+ i = ii + 1
+ new = np.sin(xi) * np.power((np.sin(i * np.power(xi, 2) / np.pi)), (2 * d))
+ sum_ += new.reshape(n, 1)
+ return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def stang_old(self, X, sigma=0.1):
+ (n, d) = X.shape
+ sum_ = np.zeros(shape=(X.shape[0], 1))
+
+ for ii in range(d):
+ xi = X[:, ii]
+ new = xi**4 - 16.0 * xi**2 + 5 * xi
+ sum_ += new.reshape(n, 1)
+
+ sum_ = sum_ / (38.7122 * d)
+ # sum_ = sum_/d
+
+ return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def michal_un(self, X, sigma=0.1):
+ (n, d) = X.shape
+ X = (X + 0.5) * np.pi
+ ar = np.arange(1, d + 1, 1)
+ sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d))
+ sum_ = np.sum(sum_, axis=1).reshape(-1, 1)
+ return sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def michal(self, X, sigma=0.1):
+ (n, d) = X.shape
+ X = (X + 0.5) * np.pi
+ ar = np.arange(1, d + 1, 1)
+ sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d))
+ sum_ = np.sum(sum_, axis=1).reshape(-1, 1)
+ sum_ = sum_ / self.michal_optimum(d)[1]
+ return sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def michal_bounds(self, N, n, d=1, adv_inv=False):
+ if adv_inv == False:
+ x = np.random.uniform(-0.5, 0.5, size=(N, d))
+ else:
+ x = np.random.uniform(-0.5, 0.0, size=(N, d))
+
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)]
+ xtest = cartesian(arrays)
+
+ return (d, xtest, x, 0.3)
+
+ def michal_opt_bounds(self, d):
+ b = tuple([(-0.5, 0.5) for i in range(d)])
+ return b
+
+ def michal_optimum(self, d):
+ q = 20
+ opt = np.ones(shape=(q))
+ opt[0] = 2.93254
+ opt[1] = 2.34661
+ opt[2] = 1.64107
+ opt[3] = 1.24415
+ opt[4] = 0.999643
+ opt[5] = 0.834879
+ opt[6] = 2.1089
+ opt[7] = 1.84835
+ opt[8] = 1.64448
+ opt[9] = 1.48089
+ opt[10] = 1.34678
+ opt[11] = 1.2349
+ opt[12] = 1.89701
+ opt[13] = 1.76194
+ opt[14] = 1.64477
+ opt[15] = 1.54218
+ opt[16] = 1.45162
+ opt[17] = 1.37109
+ opt[18] = 1.81774
+ opt = opt[0:d].reshape(1, -1)
+ opt = (opt / np.pi) - 0.5
+ value = self.michal_un(opt, sigma=0)
+ return (opt, value[0][0])
+
+ def stang_un(self, X, sigma=0.1):
+ (n, d) = X.shape
+ X = X * 8
+ Y = X**2
+ sum_ = np.sum(Y**2 - 16.0 * Y + 5 * X, axis=1).reshape(-1, 1)
+ sum_ = sum_
+ return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def stang(self, X, sigma=0.1):
+ (n, d) = X.shape
+ X = X * 8
+ Y = X**2
+ sum_ = np.sum(Y**2 - 16.0 * Y + 5 * X, axis=1).reshape(-1, 1)
+ sum_ = sum_ / self.stang_optimum(d)[1]
+ return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def stang_bounds(self, N, n, d=1, adv_inv=False):
+ if adv_inv == False:
+ x = np.random.uniform(-0.5, 0.5, size=(N, d))
+ else:
+ print("Adversarially initiallized")
+ x = np.random.uniform(0.4, 0.5, size=(N, d))
+
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)]
+ xtest = cartesian(arrays)
+
+ return (d, xtest, x, 0.6)
+
+ def stang_opt_bounds(self, d):
+ b = tuple([(-0.5, 0.5) for i in range(d)])
+ return b
+
+ def stang_optimum(self, d):
+ opt = np.ones(shape=(d)) * (-2.9035)
+ opt = opt / 8
+ opt = opt.reshape(1, -1)
+
+ value = self.stang_un(opt, sigma=0.0)
+ return (opt, value[0][0])
+
+ def double_group_un(self, X, sigma=0.1):
+ sum_ = np.sum(np.exp(-((np.diff(X, axis=1) / 0.25) ** 2)), axis=1).reshape(
+ -1, 1
+ )
+ return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def double_group(self, X, sigma=0.1):
+ (n, d) = X.shape
+ sum_ = np.sum(np.exp(-((np.diff(X, axis=1) / 0.25) ** 2)), axis=1).reshape(
+ -1, 1
+ )
+ sum_ = sum_ / self.double_group_optimum(d)[1]
+ return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma
+
+ def double_group_bounds(self, N, n, d=1, adv_inv=False):
+ if adv_inv == False:
+ x = np.random.uniform(-0.5, 0.5, size=(N, d))
+ else:
+ print("Adversarially initiallized")
+ x = np.random.uniform(-0.5, -0.4, size=(N, d))
+
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)]
+ xtest = cartesian(arrays)
+
+ return (d, xtest, x, 0.6)
+
+ def double_group_opt_bounds(self, d):
+ b = tuple([(-0.5, 0.5) for i in range(d)])
+ return b
+
+ def double_group_optimum(self, d):
+ opt = np.zeros(shape=(1, d))
+ value = self.double_group_un(opt, 0)[0][0]
+ return (opt, value)
+
+ def swissfel(self, X, sigma=0.1):
+ if self.init == False:
+ raise AssertionError("Need to run bounds first.")
+ else:
+ if sigma == 0.0:
+ return self.model.predict(X)[0]
+ else:
+ return (
+ self.model.predict(X)[0]
+ + np.random.randn(X.shape[0], 1) * self.noise
+ )
+
+ def swissfel_bounds(self, N, n):
+ if self.init == False:
+ import os.path
+
+ fname = (
+ "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p"
+ )
+ if not os.path.isfile(fname):
+ f = File(
+ "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/evaluations.hdf5"
+ )
+ dset = f["1"]
+ X = dset["x"][:].reshape(-1, 5)
+
+ # y response and scale
+ Y = dset["y"][:].reshape(-1, 1)
+ Y = Y / np.max(np.abs(Y))
+
+ # noise structure
+ Yerr = dset["y_std"] / np.max(np.abs(Y))
+ self.noise = np.std(Yerr)
+ print("Estimated noise level", self.noise)
+
+ # data scale to [-0.5,0.5]
+ X = dset["x"][:].reshape(-1, 5)
+ for j in range(5):
+ a = np.min(X[:, j])
+ b = np.max(X[:, j])
+ X[:, j] = (X[:, j] / (b - a)) - 0.5 - a / (b - a)
+
+ ## fully additive kernel s
+ self.kernel = (
+ GPy.kern.RBF(1, active_dims=[0])
+ + GPy.kern.RBF(1, active_dims=[1])
+ + GPy.kern.RBF(1, active_dims=[2])
+ + GPy.kern.RBF(1, active_dims=[3])
+ + GPy.kern.RBF(1, active_dims=[4])
+ )
+ self.model = GPy.models.GPRegression(X, Y, self.kernel)
+ print("Model fit")
+ self.model.optimize(messages=True)
+ print("ML likelihood fit")
+ self.init = True
+ # save pickle
+ pickle.dump(
+ self.model,
+ open(
+ "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p",
+ "wb",
+ ),
+ )
+ pickle.dump(
+ self.noise,
+ open(
+ "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p",
+ "wb",
+ ),
+ )
+ else:
+ self.init = True
+ self.model = pickle.load(
+ open(
+ "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p",
+ "rb",
+ )
+ )
+ self.noise = pickle.load(
+ open(
+ "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p",
+ "rb",
+ )
+ )
+
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(5)]
+ xtest = cartesian(arrays)
+
+ # bw = np.min(self.kernel.lengthscale)
+ x = np.random.uniform(-0.5, 0.5, size=(N, 5))
+ return (5, xtest, x, 0.1)
+
+ def swissfel_opt_bounds(self):
+ b = tuple([(-0.5, 0.5) for i in range(5)])
+ return b
+
+ def swissfel_optimum(self):
+ from scipy.optimize import minimize
+
+ # maximize the function
+ mybounds = self.swissfel_opt_bounds()
+ fun = lambda x: -self.swissfel(x.reshape(1, -1), sigma=0.0)[0][0]
+
+ best = -10.0
+ repeats = 10
+ for i in range(repeats):
+ x0 = np.random.uniform(-0.5, 0.5, size=(5,))
+ res = minimize(fun, x0, method="L-BFGS-B", tol=0.0001, bounds=mybounds)
+ value = self.swissfel(res.x.reshape(1, -1), sigma=0)
+ if value > best:
+ best = value
+ self.opt_loc = res.x.reshape(1, -1)
+ return (self.opt_loc, best)
+
+ def neural_net(self, X, sigma=0.1):
+ (n, d) = X.shape
+ res = []
+ val_size = 400
+ if self.sampled == False:
+ self.sampled = True
+ try:
+ self.mnist = input_data.read_data_sets(
+ "~/.", one_hot=True, validation_size=val_size
+ )
+ except:
+ self.mnist = input_data.read_data_sets("~/.", one_hot=True)
+
+ for x in X:
+ (it, acc) = train_network(
+ self.mnist,
+ dropout=x[0],
+ verbose=False,
+ val_size=val_size,
+ maxiter=300,
+ initialization_params=x[1:],
+ no_filters_1=self.NN,
+ no_filters_2=self.NN2,
+ val_count=30,
+ )
+ res.append(acc)
+
+ return np.array(acc).reshape(n, 1)
+
+ def neural_net_bounds(self, N, n, NN=16, NN2=22):
+ self.NN = NN
+ self.NN2 = NN2
+ d = self.NN + self.NN2
+
+ x = np.random.uniform(0, 10, size=(N, d))
+ dropout = np.random.uniform(0, 1, size=(N, 1))
+ x = np.concatenate((x, dropout), axis=1)
+
+ if n == None:
+ xtest = None
+ else:
+ arrays = [np.linspace(0, 1, n).reshape(n, 1)] + [
+ np.linspace(0, 10, n).reshape(n, 1) for i in range(d)
+ ]
+ xtest = cartesian(arrays)
+
+ return (d + 1, xtest, x, 0.9)
+
+ def neural_net_opt_bounds(self):
+ d = self.NN + self.NN2
+ b = tuple([(0, 1)] + [(0, 10) for i in range(d)])
+ return b
if __name__ == "__main__":
- s = 0
- TT = test_function()
- Fs = [lambda x: TT.f(x, sigma=s), lambda x: TT.branin(x, sigma=s), lambda x: TT.camelback(x, sigma=s),
- lambda x: TT.hartmann3(x, sigma=s), lambda x: TT.hartmann4(x, sigma=s), lambda x: TT.hartmann6(x, sigma=s)]
- Fbounds = [lambda n: TT.f_bounds(1, n), lambda n: TT.branin_bounds(1, n), lambda n: TT.camelback_bounds(1, n),
- lambda n: TT.hartmann3_bounds(1, n), lambda n: TT.hartmann4_bounds(1, n),
- lambda n: TT.hartmann6_bounds(1, n)]
- ns = [4000, 200, 200, 100, 50, 10]
- tests = ["1D", "Branin", "Camelback", "Hartmann3", "Hartmann4", "Hartmann6"]
- z = []
- for i in range(6):
- (d, xtest, x, _) = Fbounds[i](ns[i])
- z.append(np.max(Fs[i](xtest)))
- print(tests[i], np.max(Fs[i](xtest)))
- print(z)
-
- for d, n in zip([1, 2, 3, 4], [900, 100, 50, 3]):
- G = lambda x: TT.stang(x, sigma=s)
- (q, xtest, x, _) = TT.stang_bounds(1, n, d=d)
- print(d, np.max(G(xtest)), np.max(G(xtest)) / d)
-
- # G = lambda x: TT.michal(x, sigma = s)
- # (d,xtest,x,_) = TT.michal_bounds(1,5, d = 10)
- # print (d, np.max(G(xtest)), np.max(G(xtest))/d)
-
- # for d in np.arange(1,31,1):
- # TT.optimize_f(d = d)
-
- print("==== Optimized vs Non-Optimized ==== ")
- print("Michal")
- multistart = 400
- d = 10
- G1 = lambda x: TT.michal(x, sigma=0.)
- fun = lambda x: -TT.michal(x.reshape(-1, 1), sigma=0.)[0][0]
- (d, xtest, x, _) = TT.michal_bounds(20, None, d=d)
- mybounds = TT.michal_opt_bounds(d=d)
-
- from scipy.optimize import minimize
-
- results = []
- for i in range(multistart):
- x0 = np.random.randn(d)
- for i in range(d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds)
- # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds)
- solution = res.x
- results.append([solution, -fun(solution)])
- results = np.array(results)
- print(np.max(results[:, 1]))
-
- print("Stybtang")
- for d in [10, 20]:
- multistart = 400
- G1 = lambda x: TT.stang(x, sigma=0.)
- fun = lambda x: -TT.stang(x.reshape(-1, 1), sigma=0.)[0][0]
- (d, xtest, x, _) = TT.stang_bounds(20, None, d=d)
- mybounds = TT.stang_opt_bounds(d=d)
- from scipy.optimize import minimize
-
- results = []
- for i in range(multistart):
- x0 = np.random.randn(d)
- for i in range(d):
- x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
- res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds)
- # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds)
- solution = res.x
- results.append([solution, -fun(solution)])
-
- results = np.array(results)
- print(d, np.max(results[:, 1]))
+ s = 0
+ TT = test_function()
+ Fs = [
+ lambda x: TT.f(x, sigma=s),
+ lambda x: TT.branin(x, sigma=s),
+ lambda x: TT.camelback(x, sigma=s),
+ lambda x: TT.hartmann3(x, sigma=s),
+ lambda x: TT.hartmann4(x, sigma=s),
+ lambda x: TT.hartmann6(x, sigma=s),
+ ]
+ Fbounds = [
+ lambda n: TT.f_bounds(1, n),
+ lambda n: TT.branin_bounds(1, n),
+ lambda n: TT.camelback_bounds(1, n),
+ lambda n: TT.hartmann3_bounds(1, n),
+ lambda n: TT.hartmann4_bounds(1, n),
+ lambda n: TT.hartmann6_bounds(1, n),
+ ]
+ ns = [4000, 200, 200, 100, 50, 10]
+ tests = ["1D", "Branin", "Camelback", "Hartmann3", "Hartmann4", "Hartmann6"]
+ z = []
+ for i in range(6):
+ (d, xtest, x, _) = Fbounds[i](ns[i])
+ z.append(np.max(Fs[i](xtest)))
+ print(tests[i], np.max(Fs[i](xtest)))
+ print(z)
+
+ for d, n in zip([1, 2, 3, 4], [900, 100, 50, 3]):
+ G = lambda x: TT.stang(x, sigma=s)
+ (q, xtest, x, _) = TT.stang_bounds(1, n, d=d)
+ print(d, np.max(G(xtest)), np.max(G(xtest)) / d)
+
+ # G = lambda x: TT.michal(x, sigma = s)
+ # (d,xtest,x,_) = TT.michal_bounds(1,5, d = 10)
+ # print (d, np.max(G(xtest)), np.max(G(xtest))/d)
+
+ # for d in np.arange(1,31,1):
+ # TT.optimize_f(d = d)
+
+ print("==== Optimized vs Non-Optimized ==== ")
+ print("Michal")
+ multistart = 400
+ d = 10
+ G1 = lambda x: TT.michal(x, sigma=0.0)
+ fun = lambda x: -TT.michal(x.reshape(-1, 1), sigma=0.0)[0][0]
+ (d, xtest, x, _) = TT.michal_bounds(20, None, d=d)
+ mybounds = TT.michal_opt_bounds(d=d)
+
+ from scipy.optimize import minimize
+
+ results = []
+ for i in range(multistart):
+ x0 = np.random.randn(d)
+ for i in range(d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds
+ )
+ # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds)
+ solution = res.x
+ results.append([solution, -fun(solution)])
+ results = np.array(results)
+ print(np.max(results[:, 1]))
+
+ print("Stybtang")
+ for d in [10, 20]:
+ multistart = 400
+ G1 = lambda x: TT.stang(x, sigma=0.0)
+ fun = lambda x: -TT.stang(x.reshape(-1, 1), sigma=0.0)[0][0]
+ (d, xtest, x, _) = TT.stang_bounds(20, None, d=d)
+ mybounds = TT.stang_opt_bounds(d=d)
+ from scipy.optimize import minimize
+
+ results = []
+ for i in range(multistart):
+ x0 = np.random.randn(d)
+ for i in range(d):
+ x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1])
+ res = minimize(
+ fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds
+ )
+ # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds)
+ solution = res.x
+ results.append([solution, -fun(solution)])
+
+ results = np.array(results)
+ print(d, np.max(results[:, 1]))
# print (G1(x))
# print (G2(x))
diff --git a/tests/SRI_test.py b/tests/SRI_test.py
index 2598ca0..e6b97ae 100755
--- a/tests/SRI_test.py
+++ b/tests/SRI_test.py
@@ -4,89 +4,93 @@
def get_angle(R):
- v = torch.Tensor([1.0,1.0]).double()
- a1 = np.arccos((torch.dot(v,R@v)/torch.dot(v,v)).numpy())
- a2 = np.arccos(-(torch.dot(v,R@v)/torch.dot(v,v)).numpy())
- return np.min([a1,a2])
+ v = torch.tensor([1.0, 1.0]).double()
+ a1 = np.arccos((torch.dot(v, R @ v) / torch.dot(v, v)).numpy())
+ a2 = np.arccos(-(torch.dot(v, R @ v) / torch.dot(v, v)).numpy())
+ return np.min([a1, a2])
if __name__ == "__main__":
- from stpy.embeddings.embedding import HermiteEmbedding
- N = 1
- s = 0.0001
- n = 20
- L_infinity_ball = 0.5
-
- d = 2
-
- thetae = np.radians(35.)
- ce, se = np.cos(thetae), np.sin(thetae)
- R = torch.from_numpy(np.array(((ce, -se), (se, ce))))
-
- BenchmarkFunc = MichalBenchmark(d = d, R = R)
-
- x = BenchmarkFunc.initial_guess(N)
- xtest = BenchmarkFunc.interval(n)
- gamma = BenchmarkFunc.bandwidth()
- bounds = BenchmarkFunc.bounds()
- BenchmarkFunc.scale_max(xtest=xtest)
-
- print ("Gamma:",gamma)
-
- F = lambda x: BenchmarkFunc.eval(x, sigma=s)
- F0 = lambda x: BenchmarkFunc.eval(x, sigma=0)
-
-
- rot_out = open("rotOut.txt",'w')
-
-
- m = 64
- GP = GaussianProcessFF(d=d, s=s, m = torch.ones(d)*m, gamma=gamma*torch.ones(d), bounds=bounds, groups = stpy.helpers.helper.full_group(d))
- #GP = GaussianProcess(d =d ,s = s, gamma = gamma*torch.ones(d) ,groups = stpy.helper.full_group(d))
- #GP = GaussianProcess(d=d, s=s, gamma=gamma, groups=None)
-
- m = 512
- embedding = HermiteEmbedding(gamma=gamma, m=m, d=d, diameter=1, approx = "hermite")
- Map = lambda x: embedding.embed(x)
-
-
-
- x0 = torch.Tensor([0., 0.]).double().view(-1, d)
-# Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=False, s = 10e-8)
- Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=True, s = s, GPMap = True)
-
- Rep = 2
- Bandit.decolerate(x0,10e-9,Rep)
-
- print ("True:",thetae)
- print (R)
- print("Angle:",get_angle(R))
-
- rot_out.write(str(get_angle(R))+"\n")
-
- print ("E design:\n",Bandit.Q)
- print("Angle:",get_angle(Bandit.Q.detach()))
- rot_out.write(str(get_angle(Bandit.Q.detach()))+"\n")
-
- # Gaussian Design
- #Design = torch.randn(size = (Nd,d),dtype = torch.float64)*0.1
-
- Design = Bandit.design
- y = Bandit.value_design
- for repeats in range(5):
- B = Bandit.inverse_sliced_regression(Design,y,slices = Rep)
- print ("Recovered from SRI:\n",B)
- print (get_angle(B))
- rot_out.write(str(get_angle(B)) + " ")
-
- rot_out.write("\n")
- BB = Bandit.bootstrap_inverse_sliced_regression(Design,y,slices = Rep,repeats = 20)
- print ("Bootstrap",BB)
- rot_out.write(str(get_angle(torch.from_numpy(BB)))+"\n")
-
- for _ in range(5):
- Bandit.GP2.optimize_params(type="rots", restarts=1)
- print (Bandit.GP2.Rot)
- rot_out.write(str(get_angle(Bandit.GP2.Rot))+" ")
- rot_out.write("\n")
- rot_out.close()
+ from stpy.embeddings.embedding import HermiteEmbedding
+
+ N = 1
+ s = 0.0001
+ n = 20
+ L_infinity_ball = 0.5
+
+ d = 2
+
+ thetae = np.radians(35.0)
+ ce, se = np.cos(thetae), np.sin(thetae)
+ R = torch.from_numpy(np.array(((ce, -se), (se, ce))))
+
+ BenchmarkFunc = MichalBenchmark(d=d, R=R)
+
+ x = BenchmarkFunc.initial_guess(N)
+ xtest = BenchmarkFunc.interval(n)
+ gamma = BenchmarkFunc.bandwidth()
+ bounds = BenchmarkFunc.bounds()
+ BenchmarkFunc.scale_max(xtest=xtest)
+
+ print("Gamma:", gamma)
+
+ F = lambda x: BenchmarkFunc.eval(x, sigma=s)
+ F0 = lambda x: BenchmarkFunc.eval(x, sigma=0)
+
+ rot_out = open("rotOut.txt", "w")
+
+ m = 64
+ GP = GaussianProcessFF(
+ d=d,
+ s=s,
+ m=torch.ones(d) * m,
+ gamma=gamma * torch.ones(d),
+ bounds=bounds,
+ groups=stpy.helpers.helper.full_group(d),
+ )
+ # GP = GaussianProcess(d =d ,s = s, gamma = gamma*torch.ones(d) ,groups = stpy.helper.full_group(d))
+ # GP = GaussianProcess(d=d, s=s, gamma=gamma, groups=None)
+
+ m = 512
+ embedding = HermiteEmbedding(gamma=gamma, m=m, d=d, diameter=1, approx="hermite")
+ Map = lambda x: embedding.embed(x)
+
+ x0 = torch.tensor([0.0, 0.0]).double().view(-1, d)
+ # Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=False, s = 10e-8)
+ Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=True, s=s, GPMap=True)
+
+ Rep = 2
+ Bandit.decolerate(x0, 10e-9, Rep)
+
+ print("True:", thetae)
+ print(R)
+ print("Angle:", get_angle(R))
+
+ rot_out.write(str(get_angle(R)) + "\n")
+
+ print("E design:\n", Bandit.Q)
+ print("Angle:", get_angle(Bandit.Q.detach()))
+ rot_out.write(str(get_angle(Bandit.Q.detach())) + "\n")
+
+ # Gaussian Design
+ # Design = torch.randn(size = (Nd,d),dtype = torch.float64)*0.1
+
+ Design = Bandit.design
+ y = Bandit.value_design
+ for repeats in range(5):
+ B = Bandit.inverse_sliced_regression(Design, y, slices=Rep)
+ print("Recovered from SRI:\n", B)
+ print(get_angle(B))
+ rot_out.write(str(get_angle(B)) + " ")
+
+ rot_out.write("\n")
+ BB = Bandit.bootstrap_inverse_sliced_regression(Design, y, slices=Rep, repeats=20)
+ print("Bootstrap", BB)
+ rot_out.write(str(get_angle(torch.from_numpy(BB))) + "\n")
+
+ for _ in range(5):
+ Bandit.GP2.optimize_params(type="rots", restarts=1)
+ print(Bandit.GP2.Rot)
+ rot_out.write(str(get_angle(Bandit.GP2.Rot)) + " ")
+ rot_out.write("\n")
+ rot_out.close()
diff --git a/tests/clenshaw_curtis_test.py b/tests/clenshaw_curtis_test.py
index b3f96d1..c684070 100644
--- a/tests/clenshaw_curtis_test.py
+++ b/tests/clenshaw_curtis_test.py
@@ -3,35 +3,40 @@
if __name__ == "__main__":
- ### Generate data - a sample from a Gaussian process
- n = 1024
- N = 5
- gamma = 0.09
- #gamma = 1.
- s = 0.2
- # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n)
- benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s)
- for j in range(10):
- m = (2*(j+1)) ** 2
- #m = 64
- x = benchmark.initial_guess(N, adv_inv=False)
- y = benchmark.eval(x)
- xtest = benchmark.interval(1024)
+ ### Generate data - a sample from a Gaussian process
+ n = 1024
+ N = 5
+ gamma = 0.09
+ # gamma = 1.
+ s = 0.2
+ # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n)
+ benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s)
+ for j in range(10):
+ m = (2 * (j + 1)) ** 2
+ # m = 64
+ x = benchmark.initial_guess(N, adv_inv=False)
+ y = benchmark.eval(x)
+ xtest = benchmark.interval(1024)
- #print (x)
- CFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(gamma=gamma, approx="ccff", m=m, s=s)
- QFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(gamma=gamma, approx="hermite", m=m, s=s)
- TFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(gamma=gamma, approx="trapezoidal", m=m, s=s)
+ # print (x)
+ CFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(
+ gamma=gamma, approx="ccff", m=m, s=s
+ )
+ QFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(
+ gamma=gamma, approx="hermite", m=m, s=s
+ )
+ TFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(
+ gamma=gamma, approx="trapezoidal", m=m, s=s
+ )
- K1 = TFF.embed(x)@TFF.embed(x).T
- K2 = QFF.embed(x) @ QFF.embed(x).T
- K3 = CFF.embed(x) @ CFF.embed(x).T
- # print(K2)
- # print("----------------")
- #print(K3)
- # print("----------------")
- print(m, torch.norm(K1 - K2), torch.norm(K2 -K3))
-
- #CFF.fit_gp(x,y)
- #CFF.visualize(xtest)
+ K1 = TFF.embed(x) @ TFF.embed(x).T
+ K2 = QFF.embed(x) @ QFF.embed(x).T
+ K3 = CFF.embed(x) @ CFF.embed(x).T
+ # print(K2)
+ # print("----------------")
+ # print(K3)
+ # print("----------------")
+ print(m, torch.norm(K1 - K2), torch.norm(K2 - K3))
+ # CFF.fit_gp(x,y)
+ # CFF.visualize(xtest)
diff --git a/tests/constrained_mean.py b/tests/constrained_mean.py
index 1bec7ee..7491a48 100644
--- a/tests/constrained_mean.py
+++ b/tests/constrained_mean.py
@@ -5,19 +5,19 @@
import matplotlib.pyplot as plt
if __name__ == "__main__":
- d = 1
- p = 4
- embed_p = ChebyschevEmbedding(d=d, p=p)
- m = embed_p.size
- GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
+ d = 1
+ p = 4
+ embed_p = ChebyschevEmbedding(d=d, p=p)
+ m = embed_p.size
+ GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
- x = torch.from_numpy(interval(10,d))
- xtest = torch.from_numpy(interval(1024, d))
- GP.fit_gp(x, x**8)
+ x = torch.from_numpy(interval(10, d))
+ xtest = torch.from_numpy(interval(1024, d))
+ GP.fit_gp(x, x**8)
- mu = GP.mean_constrained(xtest, B = 0.5)
+ mu = GP.mean_constrained(xtest, B=0.5)
- GP.visualize(xtest, show = False)
- #plt.plot(x, x**8,'o')
- plt.plot(xtest,mu)
- plt.show()
\ No newline at end of file
+ GP.visualize(xtest, show=False)
+ # plt.plot(x, x**8,'o')
+ plt.plot(xtest, mu)
+ plt.show()
diff --git a/tests/continous_processes/psd_minimization/eigenvector_constraint.py b/tests/continous_processes/psd_minimization/eigenvector_constraint.py
index 5cadc87..cad3c1f 100644
--- a/tests/continous_processes/psd_minimization/eigenvector_constraint.py
+++ b/tests/continous_processes/psd_minimization/eigenvector_constraint.py
@@ -4,7 +4,12 @@
import torch
-from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding, ConcatEmbedding, MaskedEmbedding
+from stpy.embeddings.embedding import (
+ HermiteEmbedding,
+ RFFEmbedding,
+ ConcatEmbedding,
+ MaskedEmbedding,
+)
from stpy.kernels import KernelFunction
from stpy.helpers.helper import interval, interval_torch
from stpy.probability.gaussian_likelihood import GaussianLikelihood
@@ -21,12 +26,14 @@
m = 32
def stable_rank(A):
- return np.trace(A)/np.max(np.linalg.eigh(A)[0])
-
+ return np.trace(A) / np.max(np.linalg.eigh(A)[0])
V = torch.linalg.qr(torch.randn(size=(m, m)).double())[0]
- f = lambda x: 0.5*torch.sin(x * 20) * (x > 0).double() + 0.5*torch.sin(x * 30) * (x > 0).double()
+ f = (
+ lambda x: 0.5 * torch.sin(x * 20) * (x > 0).double()
+ + 0.5 * torch.sin(x * 30) * (x > 0).double()
+ )
Xtrain = interval_torch(n=N, d=1)
ytrain = f(Xtrain)
@@ -45,8 +52,8 @@ def stable_rank(A):
A1 = cp.Variable((m // 2, m // 2), PSD=True)
A2 = cp.Variable((m // 2, m // 2), PSD=True)
A3 = cp.Variable((m // 2, m // 2))
- l = cp.Variable((1,1))
- s = cp.Parameter((1, 1), nonneg = True)
+ l = cp.Variable((1, 1))
+ s = cp.Parameter((1, 1), nonneg=True)
likelihood = GaussianLikelihood(sigma=s)
estimator = RegularizedDictionary(embedding, likelihood)
@@ -55,43 +62,61 @@ def stable_rank(A):
likelihood = estimator.likelihood
likelihood.load_data(data)
- total_trace = 2.
+ total_trace = 2.0
objective = likelihood.get_objective_cvxpy()(theta)
A = cp.bmat([[A1, A3], [A3, A2]])
- s.value = np.array([[1.]])
- constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace*l, A >> 0,cp.lambda_max(A)<=l]
+ s.value = np.array([[1.0]])
+ constraints = [
+ cp.matrix_frac(theta, A) <= 1,
+ cp.trace(A) <= total_trace * l,
+ A >> 0,
+ cp.lambda_max(A) <= l,
+ ]
prob = cp.Problem(cp.Minimize(objective), constraints)
prob.solve(solver=cp.MOSEK, verbose=True)
estimator.theta_fit = theta.value
estimator.fitted = True
- print (prob.value)
- print (np.max(np.linalg.eigh(A.value)[0]))
- print (l.value)
+ print(prob.value)
+ print(np.max(np.linalg.eigh(A.value)[0]))
+ print(l.value)
print("--------------")
if theta.value is not None:
mu = estimator.mean(xtest)
- plt.plot(xtest,mu, 'b', lw = 3, label = 'opt')
-
- plt.plot(Xtrain,ytrain,'ko', lw = 3)
- plt.plot(xtest,f(xtest),'k--', lw = 3)
-
- constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace*l, A >> 0,cp.lambda_max(A)<=l, l<=s]
+ plt.plot(xtest, mu, "b", lw=3, label="opt")
+
+ plt.plot(Xtrain, ytrain, "ko", lw=3)
+ plt.plot(xtest, f(xtest), "k--", lw=3)
+
+ constraints = [
+ cp.matrix_frac(theta, A) <= 1,
+ cp.trace(A) <= total_trace * l,
+ A >> 0,
+ cp.lambda_max(A) <= l,
+ l <= s,
+ ]
prob = cp.Problem(cp.Minimize(objective), constraints)
prob.solve(solver=cp.MOSEK, verbose=True)
def cost(z):
s.value = z
prob.solve()
- return prob.value, total_trace * l.value, l.value, (np.max(np.linalg.eigh(A.value)[0])), np.trace(A.value), stable_rank(A.value)
-
- z_vals = np.logspace(-5,5,20, base = 2)
+ return (
+ prob.value,
+ total_trace * l.value,
+ l.value,
+ (np.max(np.linalg.eigh(A.value)[0])),
+ np.trace(A.value),
+ stable_rank(A.value),
+ )
+
+ z_vals = np.logspace(-5, 5, 20, base=2)
l_vals = []
eigvals = []
differences = []
for z in z_vals:
- prob_val, _, l_val, eigv, _ , _ = cost(np.array([[z]]))
+ prob_val, _, l_val, eigv, _, _ = cost(np.array([[z]]))
estimator.theta_fit = theta.value
estimator.fitted = True
mu = estimator.mean(xtest)
@@ -99,19 +124,18 @@ def cost(z):
eigvals.append(float(eigv))
differences.append(float(l_val) - float(eigv))
- print (z, float(l_val) - float(eigv))
+ print(z, float(l_val) - float(eigv))
- if float(l_val) - float(eigv) <= 1e-2 and float(l_val) - float(eigv)>=0:
- plt.plot(xtest,mu, 'g--', lw = 3, label = 'stable-rank')
+ if float(l_val) - float(eigv) <= 1e-2 and float(l_val) - float(eigv) >= 0:
+ plt.plot(xtest, mu, "g--", lw=3, label="stable-rank")
plt.show()
- plt.plot(z_vals.reshape(-1),l_vals, label = 'lvals')
- plt.plot(z_vals.reshape(-1),eigvals, label = 'eig')
+ plt.plot(z_vals.reshape(-1), l_vals, label="lvals")
+ plt.plot(z_vals.reshape(-1), eigvals, label="eig")
# plt.plot(z_vals.reshape(-1), differences, label='diff')
plt.legend()
plt.show()
-
#
# # Fix an eigenvector
# v_init = np.zeros(shape=(m, 1))
@@ -173,4 +197,4 @@ def cost(z):
# # grad = euclidean_gradient(w)
# # w = w - eta * grad
# # w = proj(w)
- # # print (i, value(w))
\ No newline at end of file
+ # # print (i, value(w))
diff --git a/tests/continous_processes/psd_minimization/psd_minimization.py b/tests/continous_processes/psd_minimization/psd_minimization.py
index 80c9e70..686ccdf 100644
--- a/tests/continous_processes/psd_minimization/psd_minimization.py
+++ b/tests/continous_processes/psd_minimization/psd_minimization.py
@@ -1,11 +1,21 @@
-from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding, ConcatEmbedding, MaskedEmbedding
+from stpy.embeddings.embedding import (
+ HermiteEmbedding,
+ RFFEmbedding,
+ ConcatEmbedding,
+ MaskedEmbedding,
+)
import pymanopt
import cvxpy as cp
import numpy as np
import torch
from cvxpylayers.torch import CvxpyLayer
-from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding, ConcatEmbedding, MaskedEmbedding
+from stpy.embeddings.embedding import (
+ HermiteEmbedding,
+ RFFEmbedding,
+ ConcatEmbedding,
+ MaskedEmbedding,
+)
from stpy.kernels import KernelFunction
from stpy.helpers.helper import interval, interval_torch
from stpy.probability.gaussian_likelihood import GaussianLikelihood
@@ -15,7 +25,6 @@
if __name__ == "__main__":
-
N = 10
n = 256
d = 1
@@ -32,71 +41,71 @@
xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1))
kernel_object = KernelFunction(gamma=0.05, d=1)
- #embedding = HermiteEmbedding(m=m, gamma = 1.)
+ # embedding = HermiteEmbedding(m=m, gamma = 1.)
- embedding1 = NystromFeatures(kernel_object=kernel_object, m=m//2)
+ embedding1 = NystromFeatures(kernel_object=kernel_object, m=m // 2)
embedding1.fit_gp(xtest / 2 - 0.5, None)
- embedding2 = NystromFeatures(kernel_object=kernel_object, m=m//2)
+ embedding2 = NystromFeatures(kernel_object=kernel_object, m=m // 2)
embedding2.fit_gp(xtest / 2 + 0.5, None)
embedding = ConcatEmbedding([embedding1, embedding2])
-
- theta = cp.Variable((m,1))
- A1 = cp.Variable((m//2,m//2), PSD = True)
- A2 = cp.Variable((m//2, m//2), PSD=True)
- A3 = cp.Variable((m//2, m//2))
+ theta = cp.Variable((m, 1))
+ A1 = cp.Variable((m // 2, m // 2), PSD=True)
+ A2 = cp.Variable((m // 2, m // 2), PSD=True)
+ A3 = cp.Variable((m // 2, m // 2))
t = cp.Variable()
likelihood = GaussianLikelihood(sigma=s)
estimator = RegularizedDictionary(embedding, likelihood)
- data = (embedding.embed(Xtrain),ytrain)
+ data = (embedding.embed(Xtrain), ytrain)
estimator.load_data(data)
likelihood = estimator.likelihood
likelihood.load_data(data)
- total_trace = 5.
+ total_trace = 5.0
objective = likelihood.get_objective_cvxpy()(theta)
- A = cp.bmat([[A1,A3],[A3,A2]])
- constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace, A >> 0]
+ A = cp.bmat([[A1, A3], [A3, A2]])
+ constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace, A >> 0]
prob = cp.Problem(cp.Minimize(objective), constraints)
- prob.solve(solver = cp.MOSEK, verbose = True)
+ prob.solve(solver=cp.MOSEK, verbose=True)
estimator.theta_fit = theta.value
estimator.fitted = True
- print (prob.value)
- #plt.imshow(A.value)
- #plt.show()
+ print(prob.value)
+ # plt.imshow(A.value)
+ # plt.show()
if theta.value is not None:
mu = estimator.mean(xtest)
- plt.plot(xtest,mu, 'b', lw = 3, label = 'opt')
-
- plt.plot(Xtrain,ytrain,'ko', lw = 3)
- plt.plot(xtest,f(xtest),'k--', lw = 3)
+ plt.plot(xtest, mu, "b", lw=3, label="opt")
+ plt.plot(Xtrain, ytrain, "ko", lw=3)
+ plt.plot(xtest, f(xtest), "k--", lw=3)
- theta = cp.Variable((m,1))
+ theta = cp.Variable((m, 1))
V = cp.Parameter((m, m))
objective = likelihood.get_objective_cvxpy()(theta)
a = cp.Variable(m)
- A = cp.Variable((m,m))
+ A = cp.Variable((m, m))
- constraints = [cp.matrix_frac(V.T@theta, cp.diag(a)) <= 1., a>=0, cp.sum(a)<=total_trace]
+ constraints = [
+ cp.matrix_frac(V.T @ theta, cp.diag(a)) <= 1.0,
+ a >= 0,
+ cp.sum(a) <= total_trace,
+ ]
prob = cp.Problem(cp.Minimize(objective), constraints)
-
-
- manifold = pymanopt.manifolds.Stiefel(m,m)
+ manifold = pymanopt.manifolds.Stiefel(m, m)
def opt(V_val):
V.value = V_val
- prob.solve(solver = cp.MOSEK, verbose = False)
+ prob.solve(solver=cp.MOSEK, verbose=False)
return theta.value
@pymanopt.function.numpy(manifold)
def cost(V_val):
V.value = V_val
- prob.solve(requires_grad=True, solver = cp.SCS)
+ prob.solve(requires_grad=True, solver=cp.SCS)
return prob.value
@pymanopt.function.numpy(manifold)
@@ -106,56 +115,53 @@ def euclidean_gradient(V_val):
prob.backward()
return V.gradient
- print ("INITIAL COST:", cost(np.eye(m)))
+ print("INITIAL COST:", cost(np.eye(m)))
problem = pymanopt.Problem(manifold, cost, euclidean_gradient=euclidean_gradient)
optimizer = pymanopt.optimizers.SteepestDescent(min_step_size=1e-15)
- result = optimizer.run(problem, initial_point = np.eye(m))
+ result = optimizer.run(problem, initial_point=np.eye(m))
V_val = result.point
- #V_val = np.eye(m)
- #print (result)
- print (V_val@V_val.T)
- print ("END COST:", cost(V_val))
+ # V_val = np.eye(m)
+ # print (result)
+ print(V_val @ V_val.T)
+ print("END COST:", cost(V_val))
estimator.theta_fit = opt(V_val)
estimator.fitted = True
mu = estimator.mean(xtest)
- plt.plot(xtest,mu, 'r--', lw = 3, label = 'ortho opt')
-
-
-
-
-
-
+ plt.plot(xtest, mu, "r--", lw=3, label="ortho opt")
estimator.theta_fit = opt(np.eye(m))
mu = estimator.mean(xtest)
- plt.plot(xtest,mu, 'g--', lw = 3, label = 'A identity')
-
+ plt.plot(xtest, mu, "g--", lw=3, label="A identity")
# simplified objective
- theta = cp.Variable((m,1))
+ theta = cp.Variable((m, 1))
objective = likelihood.get_objective_cvxpy()(theta)
- constraints = [cp.sum_squares(theta) <= total_trace/m]
+ constraints = [cp.sum_squares(theta) <= total_trace / m]
prob_simple = cp.Problem(cp.Minimize(objective), constraints)
prob_simple.solve()
- print ("SIMPLE COST:",prob_simple.value)
+ print("SIMPLE COST:", prob_simple.value)
estimator.theta_fit = theta.value
mu = estimator.mean(xtest)
- plt.plot(xtest,mu, 'tab:purple', lw = 3, label = 'simple solution')
+ plt.plot(xtest, mu, "tab:purple", lw=3, label="simple solution")
- theta = cp.Variable((m,1))
+ theta = cp.Variable((m, 1))
V = cp.Parameter((m, m))
objective = likelihood.get_objective_cvxpy()(theta)
a = cp.Variable(m)
- A = cp.Variable((m,m), PSD=True)
- constraints = [cp.matrix_frac(theta, cp.diag(a)) <= 1., a>=0, cp.sum(a)<=total_trace]
+ A = cp.Variable((m, m), PSD=True)
+ constraints = [
+ cp.matrix_frac(theta, cp.diag(a)) <= 1.0,
+ a >= 0,
+ cp.sum(a) <= total_trace,
+ ]
prob_complicated = cp.Problem(cp.Minimize(objective), constraints)
- prob_complicated.solve(solver = cp.MOSEK , verbose = True)
+ prob_complicated.solve(solver=cp.MOSEK, verbose=True)
estimator.theta_fit = theta.value
mu = estimator.mean(xtest)
- plt.plot(xtest,mu, 'tab:brown', lw = 3, label = 'soln')
+ plt.plot(xtest, mu, "tab:brown", lw=3, label="soln")
plt.legend()
- plt.show()
\ No newline at end of file
+ plt.show()
diff --git a/tests/continous_processes/test_estimators/domain_non_stationarity.py b/tests/continous_processes/test_estimators/domain_non_stationarity.py
index 035d8be..b772dfb 100644
--- a/tests/continous_processes/test_estimators/domain_non_stationarity.py
+++ b/tests/continous_processes/test_estimators/domain_non_stationarity.py
@@ -13,19 +13,23 @@
m = 200
d = 1
sigma = 0.005
-lam = 1.
+lam = 1.0
n = 256
I = torch.eye(m).double()
-budget = m*1
-kernel_object = KernelFunction(gamma = 0.1, d = 1)
+budget = m * 1
+kernel_object = KernelFunction(gamma=0.1, d=1)
-embedding1 = TriangleEmbedding(m = m, d = 1, kernel_object=kernel_object, interval=[-1,0], offset=0.0)
-embedding2 = TriangleEmbedding(m = m, d = 1, kernel_object=kernel_object, interval=[0,1], offset=0.0)
+embedding1 = TriangleEmbedding(
+ m=m, d=1, kernel_object=kernel_object, interval=[-1, 0], offset=0.0
+)
+embedding2 = TriangleEmbedding(
+ m=m, d=1, kernel_object=kernel_object, interval=[0, 1], offset=0.0
+)
-embedding = ConcatEmbedding([embedding1,embedding2])
+embedding = ConcatEmbedding([embedding1, embedding2])
-likelihood_base = GaussianLikelihood(sigma = sigma)
+likelihood_base = GaussianLikelihood(sigma=sigma)
# for w,g in zip(weights,new_groups):
@@ -38,17 +42,19 @@
N = 20
torch.manual_seed(2)
+
def zeroing(X):
Y = X.clone()
- Y[ X < 0.] = 0.
+ Y[X < 0.0] = 0.0
return Y
-F = lambda X: (np.cos(X*10.)+np.sin(X*10.))*zeroing(X)
+
+F = lambda X: (np.cos(X * 10.0) + np.sin(X * 10.0)) * zeroing(X)
# X = torch.rand(size = (N,d)).double()*0.25+0.5
# y = F(X)
#
-# Xpoint = torch.Tensor([[0.],[0.5]]).double()
-# ypoint = torch.Tensor([[0.],[0.]]).double()
+# Xpoint = torch.tensor([[0.],[0.5]]).double()
+# ypoint = torch.tensor([[0.],[0.]]).double()
#
# X = torch.vstack([X,Xpoint])
# y = torch.vstack([y,ypoint])
@@ -58,73 +64,99 @@ def zeroing(X):
# F = lambda X: estimator.mean(X)
-Xtrain = torch.rand(size=(10, d)).double()/2
+Xtrain = torch.rand(size=(10, d)).double() / 2
ytrain = F(Xtrain) + sigma * torch.randn(size=(Xtrain.size()[0], 1))
-
def update():
pass
-alphas = [5,10]#,0.01,0.001]
-lams_uns = [0.01,0.05,0.1]
+
+
+alphas = [5, 10] # ,0.01,0.001]
+lams_uns = [0.01, 0.05, 0.1]
# alphas = [0.01]
# lams_uns = [0.1]
fig, axs = plt.subplots(len(alphas), len(lams_uns))
for index1, alpha in enumerate(alphas):
- lams = [la/alpha for la in lams_uns]#, 0.01/alpha]#,16.,32.,64.,128.]
+ lams = [la / alpha for la in lams_uns] # , 0.01/alpha]#,16.,32.,64.,128.]
for index2, lam in enumerate(lams):
- print ("Regularizer:", alpha, lam)
+ print("Regularizer:", alpha, lam)
- xtest = interval_torch(n = n,d = 1)
+ xtest = interval_torch(n=n, d=1)
groups = [list(range(m)), list(range(m, 2 * m, 1))]
new_groups = groups.copy()
weights = [alpha**2 for g in groups]
for j in range(len(groups)):
for i in range(j + 1, len(groups), 1):
new_groups.append(groups[j] + groups[i])
- weights.append(1.)
+ weights.append(1.0)
- regularizer = NestedGroupL1L2Regularizer(lam = lam, groups = new_groups, weights = weights)
+ regularizer = NestedGroupL1L2Regularizer(
+ lam=lam, groups=new_groups, weights=weights
+ )
constraint = regularizer.get_constraint_object(budget)
likelihood = GaussianLikelihood(sigma=sigma)
- estimator_train = RegularizedDictionary(embedding, likelihood, regularizer, constraints = constraint, use_constraint=True)
-
- estimator_train.load_data((Xtrain,ytrain))
+ estimator_train = RegularizedDictionary(
+ embedding,
+ likelihood,
+ regularizer,
+ constraints=constraint,
+ use_constraint=True,
+ )
+
+ estimator_train.load_data((Xtrain, ytrain))
estimator_train.fit()
mean = estimator_train.mean(xtest)
-
-
-
- if max(len(alphas),len(lams_uns))>1:
- #axs[index1,index2].subplot(len(lams),len(alphas),index1+1, index2+1)
- axs[index1,index2].plot(Xtrain, ytrain, 'ro', ms=15)
- axs[index1,index2].plot(xtest, F(xtest), lw = 4)
- p = axs[index1,index2].plot(xtest, mean, lw = 4, label = "$\\lambda = "+str(lam)+", \\alpha ="+str(alpha)+" $")
+ if max(len(alphas), len(lams_uns)) > 1:
+ # axs[index1,index2].subplot(len(lams),len(alphas),index1+1, index2+1)
+ axs[index1, index2].plot(Xtrain, ytrain, "ro", ms=15)
+ axs[index1, index2].plot(xtest, F(xtest), lw=4)
+ p = axs[index1, index2].plot(
+ xtest,
+ mean,
+ lw=4,
+ label="$\\lambda = " + str(lam) + ", \\alpha =" + str(alpha) + " $",
+ )
# xtest1 = torch.linspace(0.0,0.5,n//4).double().view(-1,1)
# xtest2 = torch.linspace(-1.0,-0.5,n//4).double().view(-1,1)
# conf_xtest = torch.vstack([xtest1,xtest2])
- ucb = estimator_train.ucb(xtest, type = "LR_static")
- lcb = estimator_train.lcb(xtest, type = "LR_static")
- axs[index1,index2].fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha = 0.1, color = p[0].get_color())
- #axs[index1,index2].legend(fontsize = 15)
+ ucb = estimator_train.ucb(xtest, type="LR_static")
+ lcb = estimator_train.lcb(xtest, type="LR_static")
+ axs[index1, index2].fill_between(
+ xtest.view(-1),
+ lcb.view(-1),
+ ucb.view(-1),
+ alpha=0.1,
+ color=p[0].get_color(),
+ )
+ # axs[index1,index2].legend(fontsize = 15)
else:
- axs.plot(Xtrain, ytrain, 'ro', ms=15)
+ axs.plot(Xtrain, ytrain, "ro", ms=15)
axs.plot(xtest, F(xtest), lw=4)
- p = axs.plot(xtest, mean, lw=4,
- label="$\\lambda = " + str(lam) + ", \\alpha =" + str(alpha) + " $")
+ p = axs.plot(
+ xtest,
+ mean,
+ lw=4,
+ label="$\\lambda = " + str(lam) + ", \\alpha =" + str(alpha) + " $",
+ )
# xtest1 = torch.linspace(0.0,0.5,n//4).double().view(-1,1)
# xtest2 = torch.linspace(-1.0,-0.5,n//4).double().view(-1,1)
# conf_xtest = torch.vstack([xtest1,xtest2])
ucb = estimator_train.ucb(xtest, type="LR_static")
lcb = estimator_train.lcb(xtest, type="LR_static")
- axs.fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha=0.1,
- color=p[0].get_color())
- #axs.legend(fontsize=15)
-plt.savefig("image.png", dpi = 300)
-plt.show()
\ No newline at end of file
+ axs.fill_between(
+ xtest.view(-1),
+ lcb.view(-1),
+ ucb.view(-1),
+ alpha=0.1,
+ color=p[0].get_color(),
+ )
+ # axs.legend(fontsize=15)
+plt.savefig("image.png", dpi=300)
+plt.show()
diff --git a/tests/continous_processes/test_estimators/group l_q_estimator.py b/tests/continous_processes/test_estimators/group l_q_estimator.py
index 86a72e1..d431303 100644
--- a/tests/continous_processes/test_estimators/group l_q_estimator.py
+++ b/tests/continous_processes/test_estimators/group l_q_estimator.py
@@ -16,48 +16,48 @@
m = 128
d = 1
sigma = 0.01
-lam = 1.
+lam = 1.0
n = 256
N = 10
-kernel_object = KernelFunction(gamma = 0.05, d = 1)
-#embedding = HermiteEmbedding(m = m, d = 1)
-xtest = interval_torch(n = n,d = 1)
+kernel_object = KernelFunction(gamma=0.05, d=1)
+# embedding = HermiteEmbedding(m = m, d = 1)
+xtest = interval_torch(n=n, d=1)
-embedding1 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding1.fit_gp(xtest/2-0.5,None)
-embedding2 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding2.fit_gp(xtest/2+0.5,None)
-embedding = ConcatEmbedding([embedding1,embedding2])
+embedding1 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding1.fit_gp(xtest / 2 - 0.5, None)
+embedding2 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding2.fit_gp(xtest / 2 + 0.5, None)
+embedding = ConcatEmbedding([embedding1, embedding2])
-qs = [0.01, 0.2,0.5,0.8]
+qs = [0.01, 0.2, 0.5, 0.8]
groups = [list(range(m)), list(range(m, 2 * m, 1))]
-print (groups)
+print(groups)
regularizers = []
-#regularizers += [L1Regularizer(lam = lam), L2Regularizer(lam = lam)]
-#regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs]
-regularizers += [GroupNonCovexLqRegularizer(lam = lam, q = q, groups=groups) for q in qs]
+# regularizers += [L1Regularizer(lam = lam), L2Regularizer(lam = lam)]
+# regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs]
+regularizers += [GroupNonCovexLqRegularizer(lam=lam, q=q, groups=groups) for q in qs]
likelihood = GaussianLikelihood(sigma=sigma)
names = []
-#names += ["L1", "L2"]
-#names += ["L"+str(q) for q in qs]
-names += ["group L"+str(q) for q in qs]
+# names += ["L1", "L2"]
+# names += ["L"+str(q) for q in qs]
+names += ["group L" + str(q) for q in qs]
-f = lambda x: torch.sin(x*20)*(x>0).double()
-Xtrain = interval_torch(n = N, d= 1)
+f = lambda x: torch.sin(x * 20) * (x > 0).double()
+Xtrain = interval_torch(n=N, d=1)
ytrain = f(Xtrain)
-for name,regularizer in zip(names,regularizers):
+for name, regularizer in zip(names, regularizers):
estimator = RegularizedDictionary(embedding, likelihood, regularizer)
- estimator.load_data((Xtrain,ytrain))
+ estimator.load_data((Xtrain, ytrain))
estimator.fit()
mean = estimator.mean(xtest)
print(name, "support:", torch.sum(estimator.theta_fit > 1e-8))
- plt.plot(xtest, mean, label = name, lw = 3, alpha = 0.5)
+ plt.plot(xtest, mean, label=name, lw=3, alpha=0.5)
-plt.plot(Xtrain,ytrain,'ko', lw = 3)
-plt.plot(xtest,f(xtest),'k--', lw = 3)
+plt.plot(Xtrain, ytrain, "ko", lw=3)
+plt.plot(xtest, f(xtest), "k--", lw=3)
plt.legend()
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py b/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py
index c22ffc5..0798487 100644
--- a/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py
+++ b/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py
@@ -6,6 +6,7 @@
from stpy.kernels import KernelFunction
from stpy.regularization.simplex_regularizer import SupRegularizer
from stpy.continuous_processes.mkl_estimator import MultipleKernelLearner
+
"""
This script test and compares Lq estimators
compare L1, L2 and Lq estimators
@@ -14,35 +15,41 @@
m = 128
d = 1
sigma = 0.01
-lam = 1.
+lam = 1.0
n = 128
N = 10
-kernel_object = KernelFunction(gamma = 0.05, d = 1)
-#embedding = HermiteEmbedding(m = m, d = 1)
-xtest = interval_torch(n = n,d = 1)
+kernel_object = KernelFunction(gamma=0.05, d=1)
+# embedding = HermiteEmbedding(m = m, d = 1)
+xtest = interval_torch(n=n, d=1)
+
+embedding1 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding1.fit_gp(xtest / 2 - 0.7, None)
+embedding2 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding2.fit_gp(xtest / 2 + 0.7, None)
+embedding = ConcatEmbedding([embedding1, embedding2])
+
+
+def k1(x, y, **kwagrs):
+ return (embedding1.embed(x) @ embedding1.embed(y).T).T
-embedding1 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding1.fit_gp(xtest/2-0.7,None)
-embedding2 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding2.fit_gp(xtest/2+0.7,None)
-embedding = ConcatEmbedding([embedding1,embedding2])
-def k1(x,y,**kwagrs):
- return (embedding1.embed(x)@embedding1.embed(y).T).T
+def k2(x, y, **kwagrs):
+ return (embedding2.embed(x) @ embedding2.embed(y).T).T
-def k2(x,y,**kwagrs):
- return (embedding2.embed(x)@embedding2.embed(y).T).T
-kernel_object_1 = KernelFunction(kernel_function = k1)
-kernel_object_2 = KernelFunction(kernel_function = k2)
+kernel_object_1 = KernelFunction(kernel_function=k1)
+kernel_object_2 = KernelFunction(kernel_function=k2)
kernels = [kernel_object_1, kernel_object_2]
regularizer = SupRegularizer(d=len(kernels), lam=0.99, constrained=True)
mkl = MultipleKernelLearner(kernels, regularizer=regularizer)
-f = lambda x: torch.sin(x*20)*(x<0).double() + (1e-5)*torch.sin(x*20)*(x>0).double()
-Xtrain = interval_torch(n = N, d= 1, L_infinity_ball=0.25) - 0.75
+f = (
+ lambda x: torch.sin(x * 20) * (x < 0).double()
+ + (1e-5) * torch.sin(x * 20) * (x > 0).double()
+)
+Xtrain = interval_torch(n=N, d=1, L_infinity_ball=0.25) - 0.75
ytrain = f(Xtrain)
#
@@ -85,9 +92,9 @@ def k2(x,y,**kwagrs):
mkl.load_data((Xtrain, ytrain))
mkl.fit()
mean = mkl.mean(xtest)
-p = plt.plot(xtest, mean, label="MKL", linestyle="-", lw=3, color='tab:purple')
+p = plt.plot(xtest, mean, label="MKL", linestyle="-", lw=3, color="tab:purple")
-plt.plot(Xtrain,ytrain,'ko', lw = 3)
-plt.plot(xtest,f(xtest),'k--', lw = 3)
+plt.plot(Xtrain, ytrain, "ko", lw=3)
+plt.plot(xtest, f(xtest), "k--", lw=3)
plt.legend()
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/tests/continous_processes/test_estimators/l_q_estimator.py b/tests/continous_processes/test_estimators/l_q_estimator.py
index f04484d..931aed5 100644
--- a/tests/continous_processes/test_estimators/l_q_estimator.py
+++ b/tests/continous_processes/test_estimators/l_q_estimator.py
@@ -16,54 +16,69 @@
m = 64
d = 1
sigma = 0.01
-lam = 1.
+lam = 1.0
n = 4
N = 3
-total_norm = 1.
-xtest = interval_torch(n = n,d = 1)
-kernel_object = KernelFunction(gamma = 0.05, d = 1)
-embedding = HermiteEmbedding(m = m, d = 1)
+total_norm = 1.0
+xtest = interval_torch(n=n, d=1)
+kernel_object = KernelFunction(gamma=0.05, d=1)
+embedding = HermiteEmbedding(m=m, d=1)
total_norms = [1]
-for pos,total_norm in enumerate(total_norms):
- lasso_regularizer = L1Regularizer(lam = lam)
- l2_regularizer = L2Regularizer(lam = lam)
+for pos, total_norm in enumerate(total_norms):
+ lasso_regularizer = L1Regularizer(lam=lam)
+ l2_regularizer = L2Regularizer(lam=lam)
qs = [0.1]
- regularizers = [l2_regularizer,l2_regularizer]
- #regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs]
- constraints = [lasso_regularizer.get_constraint_object(total_norm), l2_regularizer.get_constraint_object(total_norm)]
- #constraints=+ [None for q in qs]
+ regularizers = [l2_regularizer, l2_regularizer]
+ # regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs]
+ constraints = [
+ lasso_regularizer.get_constraint_object(total_norm),
+ l2_regularizer.get_constraint_object(total_norm),
+ ]
+ # constraints=+ [None for q in qs]
-
- constraints += [ NonConvexNormConstraint(0.5, total_norm, m)]
- regularizers += [L2Regularizer(lam = lam)]
+ constraints += [NonConvexNormConstraint(0.5, total_norm, m)]
+ regularizers += [L2Regularizer(lam=lam)]
likelihood = GaussianLikelihood(sigma=sigma)
names = ["L1", "L2"]
- #names += ["L"+str(q) for q in qs]
+ # names += ["L"+str(q) for q in qs]
names += ["Lspecial"]
- f = lambda x: torch.sin(x*20)
- Xtrain = interval_torch(n = N, d= 1)
+ f = lambda x: torch.sin(x * 20)
+ Xtrain = interval_torch(n=N, d=1)
ytrain = f(Xtrain)
- linestyles = ['-.','-','--']
- #plt.subplot(2,len(total_norms)//2,pos+1)
- for name,regularizer,constraint, linestyle in zip(names,regularizers,constraints,linestyles):
- print (name)
- estimator = RegularizedDictionary(embedding, likelihood, regularizer, constraints=constraint, use_constraint=True)
- estimator.load_data((Xtrain,ytrain))
+ linestyles = ["-.", "-", "--"]
+ # plt.subplot(2,len(total_norms)//2,pos+1)
+ for name, regularizer, constraint, linestyle in zip(
+ names, regularizers, constraints, linestyles
+ ):
+ print(name)
+ estimator = RegularizedDictionary(
+ embedding,
+ likelihood,
+ regularizer,
+ constraints=constraint,
+ use_constraint=True,
+ )
+ estimator.load_data((Xtrain, ytrain))
estimator.fit()
mean = estimator.mean(xtest)
lcb = estimator.lcb(xtest)
ucb = estimator.ucb(xtest)
- p = plt.plot(xtest, mean, label=name, linestyle = linestyle)
- plt.fill_between(xtest.view(-1),lcb.view(-1),ucb.view(-1), alpha = 0.1, color = p[0].get_color())
+ p = plt.plot(xtest, mean, label=name, linestyle=linestyle)
+ plt.fill_between(
+ xtest.view(-1),
+ lcb.view(-1),
+ ucb.view(-1),
+ alpha=0.1,
+ color=p[0].get_color(),
+ )
print(name, "support:", torch.sum(estimator.theta_fit > 0.01))
- print (estimator.theta_fit.T)
-
+ print(estimator.theta_fit.T)
- plt.plot(Xtrain,ytrain,'o')
- plt.plot(xtest,f(xtest),'k--')
+ plt.plot(Xtrain, ytrain, "o")
+ plt.plot(xtest, f(xtest), "k--")
plt.legend()
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/tests/continous_processes/test_estimators/qff_nonstationary.py b/tests/continous_processes/test_estimators/qff_nonstationary.py
index 19407fc..97f0fb3 100644
--- a/tests/continous_processes/test_estimators/qff_nonstationary.py
+++ b/tests/continous_processes/test_estimators/qff_nonstationary.py
@@ -12,92 +12,105 @@
m = 128
d = 1
sigma = 0.01
-lam = 1.
+lam = 1.0
n = 256
n_small = 16
I = torch.eye(m).double()
budget = 100
-kernel_object = KernelFunction(gamma = 0.05, d = 1)
+kernel_object = KernelFunction(gamma=0.05, d=1)
-embedding_base = HermiteEmbedding(m = m, d = 1)
+embedding_base = HermiteEmbedding(m=m, d=1)
-def zero_out_interval(x,interval):
- mask1 = x[:,0] > interval[0]
- mask2 = x[:,0] < interval[1]
- #return torch.from_numpy(gaussian_filter(torch.logical_and(mask1,mask2).double(),sigma=10))
- return torch.logical_and(mask1,mask2).double()
+def zero_out_interval(x, interval):
+ mask1 = x[:, 0] > interval[0]
+ mask2 = x[:, 0] < interval[1]
+ # return torch.from_numpy(gaussian_filter(torch.logical_and(mask1,mask2).double(),sigma=10))
+ return torch.logical_and(mask1, mask2).double()
-xtest = interval_torch(n = n,d = 1)
+xtest = interval_torch(n=n, d=1)
-embedding1 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding1.fit_gp((xtest-1)/2-0.5,None)
-embedding2 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding2.fit_gp((xtest-1)/2,None)
-embedding3 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding3.fit_gp((xtest+1)/2,None)
-embedding4 = NystromFeatures(kernel_object = kernel_object, m = m )
-embedding4.fit_gp((xtest+1)/2+0.5,None)
-embedding = ConcatEmbedding([embedding1,embedding2,embedding3,embedding4])
+embedding1 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding1.fit_gp((xtest - 1) / 2 - 0.5, None)
+embedding2 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding2.fit_gp((xtest - 1) / 2, None)
+embedding3 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding3.fit_gp((xtest + 1) / 2, None)
+embedding4 = NystromFeatures(kernel_object=kernel_object, m=m)
+embedding4.fit_gp((xtest + 1) / 2 + 0.5, None)
-likelihood_base = GaussianLikelihood(sigma = sigma)
-groups = [list(range(m)),list(range(m,2*m,1)),list(range(2*m,3*m,1)),list(range(3*m,4*m,1))]
+embedding = ConcatEmbedding([embedding1, embedding2, embedding3, embedding4])
-regularizer_base = GroupL1L2Regularizer(lam=1., groups=groups)
+likelihood_base = GaussianLikelihood(sigma=sigma)
+groups = [
+ list(range(m)),
+ list(range(m, 2 * m, 1)),
+ list(range(2 * m, 3 * m, 1)),
+ list(range(3 * m, 4 * m, 1)),
+]
+
+regularizer_base = GroupL1L2Regularizer(lam=1.0, groups=groups)
constraint_base = regularizer_base.get_constraint_object(budget)
-estimator = RegularizedDictionary(embedding, likelihood_base, regularizer_base, constraints=constraint_base, use_constraint=False)
+estimator = RegularizedDictionary(
+ embedding,
+ likelihood_base,
+ regularizer_base,
+ constraints=constraint_base,
+ use_constraint=False,
+)
-lams = [1.]#,16.,32.,64.,128.]
+lams = [1.0] # ,16.,32.,64.,128.]
N = 3
-v = torch.randn(size = (embedding.get_m(),1)).double()
-for i in [0,1,3]:
- v[groups[i]] = 0.
-v = (v/np.sqrt(regularizer_base.eval(v)))
+v = torch.randn(size=(embedding.get_m(), 1)).double()
+for i in [0, 1, 3]:
+ v[groups[i]] = 0.0
+v = v / np.sqrt(regularizer_base.eval(v))
-F = lambda X: embedding.embed(X)@v*np.sqrt(budget)
-X = torch.rand(size = (10,d)).double()*0.25+0.1
+F = lambda X: embedding.embed(X) @ v * np.sqrt(budget)
+X = torch.rand(size=(10, d)).double() * 0.25 + 0.1
y = F(X)
-#Xpoint = torch.Tensor([[0.],[0.5]]).double()
-#ypoint = torch.Tensor([[0.],[0.]]).double()
+# Xpoint = torch.tensor([[0.],[0.5]]).double()
+# ypoint = torch.tensor([[0.],[0.]]).double()
-#X = torch.vstack([X,Xpoint])
-#y = torch.vstack([y,ypoint])
-estimator.load_data((X,y))
+# X = torch.vstack([X,Xpoint])
+# y = torch.vstack([y,ypoint])
+estimator.load_data((X, y))
estimator.fit()
F = lambda X: estimator.mean(X)
Xtrain = torch.rand(size=(N, d)).double() * 0.5
ytrain = F(Xtrain) + sigma * torch.randn(size=(Xtrain.size()[0], 1))
-lams = [8.,16.,32.]#,16.,32.,64.,128.]
+lams = [8.0, 16.0, 32.0] # ,16.,32.,64.,128.]
##lams = [1.,128.]
epsilon = 1e-1
-#lams = [1.]
+# lams = [1.]
for index, lam in enumerate(lams):
- print (index,':',lam)
- print ("budget:",budget)
+ print(index, ":", lam)
+ print("budget:", budget)
- plt.subplot(len(lams),1,index+1)
- plt.plot(Xtrain, ytrain, 'ro', ms=25)
- plt.ylim([-3,3])
- regularizer = GroupL1L2Regularizer(lam = lam, groups = groups)
+ plt.subplot(len(lams), 1, index + 1)
+ plt.plot(Xtrain, ytrain, "ro", ms=25)
+ plt.ylim([-3, 3])
+ regularizer = GroupL1L2Regularizer(lam=lam, groups=groups)
constraint = regularizer.get_constraint_object(budget)
likelihood = GaussianLikelihood(sigma=sigma)
- estimator_train = RegularizedDictionary(embedding, likelihood, regularizer, constraints = constraint, use_constraint=True)
-
+ estimator_train = RegularizedDictionary(
+ embedding, likelihood, regularizer, constraints=constraint, use_constraint=True
+ )
- xtest = interval_torch(n = n,d = 1)
- xtest_small = interval_torch(n = n_small, d = 1)
- plt.plot(xtest, F(xtest), lw = 5)
+ xtest = interval_torch(n=n, d=1)
+ xtest_small = interval_torch(n=n_small, d=1)
+ plt.plot(xtest, F(xtest), lw=5)
- estimator_train.load_data((Xtrain,ytrain))
+ estimator_train.load_data((Xtrain, ytrain))
estimator_train.fit()
mean = estimator_train.mean(xtest)
@@ -107,15 +120,21 @@ def zero_out_interval(x,interval):
print(regularizer.eval(estimator_train.theta_fit))
print(regularizer_base.eval(estimator_train.theta_fit))
- p = plt.plot(xtest, mean, lw = 4, label = "$||f|| \leq "+str(budget/lam)+"$")
- #p2 = plt.plot(xtest_small, mean_small,'o-', ms = 25, lw = 4, label = "$||f|| \leq "+str(budget/lam)+"$")
+ p = plt.plot(xtest, mean, lw=4, label="$||f|| \leq " + str(budget / lam) + "$")
+ # p2 = plt.plot(xtest_small, mean_small,'o-', ms = 25, lw = 4, label = "$||f|| \leq "+str(budget/lam)+"$")
#
- ucb = estimator_train.ucb(xtest_small, type = "LR_static")
- lcb = estimator_train.lcb(xtest_small, type = "LR_static")
+ ucb = estimator_train.ucb(xtest_small, type="LR_static")
+ lcb = estimator_train.lcb(xtest_small, type="LR_static")
#
- #plt.errorbar(xtest_small.view(-1), mean_small.view(-1),yerr = ucb.view(-1), ms = 25,alpha = 1., color = p[0].get_color(), lw=5)
- plt.fill_between(xtest_small.view(-1),lcb.view(-1), ucb.view(-1),alpha = 0.1, color = p[0].get_color())
- plt.plot(xtest, xtest*0 + epsilon, 'k--')
- plt.legend(fontsize = 35)
+ # plt.errorbar(xtest_small.view(-1), mean_small.view(-1),yerr = ucb.view(-1), ms = 25,alpha = 1., color = p[0].get_color(), lw=5)
+ plt.fill_between(
+ xtest_small.view(-1),
+ lcb.view(-1),
+ ucb.view(-1),
+ alpha=0.1,
+ color=p[0].get_color(),
+ )
+ plt.plot(xtest, xtest * 0 + epsilon, "k--")
+ plt.legend(fontsize=35)
plt.show()
diff --git a/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py b/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py
index 82467ad..cdcbe48 100644
--- a/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py
+++ b/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py
@@ -8,73 +8,99 @@
from stpy.probability.gaussian_likelihood import GaussianLikelihood
from stpy.regularization.regularizer import L2Regularizer, L1Regularizer
from stpy.helpers.helper import interval_torch
-from stpy.regularization.constraints import QuadraticInequalityConstraint, AbsoluteValueConstraint
+from stpy.regularization.constraints import (
+ QuadraticInequalityConstraint,
+ AbsoluteValueConstraint,
+)
from stpy.kernels import KernelFunction
m = 64
d = 1
sigma = 0.1
-lam = 1.
+lam = 1.0
n = 256
I = torch.eye(m).double()
-budget = m*10e10
-kernel_object = KernelFunction(gamma = 0.1, d = 1)
-#embedding = TriangleEmbedding(m = m, d = 1, B = 10, b = -10, kernel_object=kernel_object)
+budget = m * 10e10
+kernel_object = KernelFunction(gamma=0.1, d=1)
+# embedding = TriangleEmbedding(m = m, d = 1, B = 10, b = -10, kernel_object=kernel_object)
-embedding_base = FaberSchauderEmbedding(m = m, d = 1, kernel_object=None, offset=0)
-# this defines the decay of the functions
-def decay_function(emb):
- return (emb.hierarchical_mask()+1)**(-15)
+embedding_base = FaberSchauderEmbedding(m=m, d=1, kernel_object=None, offset=0)
-print (decay_function(embedding_base))
-embedding = WeightedEmbedding(embedding_base,weight_function=decay_function)
-
-# embedding = RFFEmbeddQing(m = m, d=1, gamma = 0.1)
+# this defines the decay of the functions
+def decay_function(emb):
+ return (emb.hierarchical_mask() + 1) ** (-15)
-likelihood = GaussianLikelihood(sigma = sigma)
-regularizer_L2 = L2Regularizer(lam = lam)
-regularizer_L1 = L1Regularizer(lam = lam)
+print(decay_function(embedding_base))
-constraint_L2 = QuadraticInequalityConstraint(Q = I, c = budget)
-constraint_L1 = AbsoluteValueConstraint(c = np.sqrt(budget))
+embedding = WeightedEmbedding(embedding_base, weight_function=decay_function)
-estimator_L2_L2 = RegularizedDictionary(embedding, likelihood, regularizer_L2,
- constraints = constraint_L2, use_constraint=False)
-estimator_L1_L2 = RegularizedDictionary(embedding, likelihood, regularizer_L1,
- constraints = constraint_L2, use_constraint=False)
-estimator_L2_L1 = RegularizedDictionary(embedding, likelihood, regularizer_L2,
- constraints = constraint_L1, use_constraint=False)
-estimator_L1_L1 = RegularizedDictionary(embedding, likelihood, regularizer_L1,
- constraints = constraint_L1, use_constraint=False)
+# embedding = RFFEmbeddQing(m = m, d=1, gamma = 0.1)
-estimators = [estimator_L2_L2,estimator_L2_L1,estimator_L1_L2,estimator_L1_L1]
+likelihood = GaussianLikelihood(sigma=sigma)
+regularizer_L2 = L2Regularizer(lam=lam)
+regularizer_L1 = L1Regularizer(lam=lam)
+
+
+constraint_L2 = QuadraticInequalityConstraint(Q=I, c=budget)
+constraint_L1 = AbsoluteValueConstraint(c=np.sqrt(budget))
+
+estimator_L2_L2 = RegularizedDictionary(
+ embedding,
+ likelihood,
+ regularizer_L2,
+ constraints=constraint_L2,
+ use_constraint=False,
+)
+estimator_L1_L2 = RegularizedDictionary(
+ embedding,
+ likelihood,
+ regularizer_L1,
+ constraints=constraint_L2,
+ use_constraint=False,
+)
+estimator_L2_L1 = RegularizedDictionary(
+ embedding,
+ likelihood,
+ regularizer_L2,
+ constraints=constraint_L1,
+ use_constraint=False,
+)
+estimator_L1_L1 = RegularizedDictionary(
+ embedding,
+ likelihood,
+ regularizer_L1,
+ constraints=constraint_L1,
+ use_constraint=False,
+)
+
+estimators = [estimator_L2_L2, estimator_L2_L1, estimator_L1_L2, estimator_L1_L1]
names = ["reg:L2 con:L2", "reg:L2 con:L1", "reg:L1 con:L2", "reg:L1 con:L1"]
-styles = ["-","--","-","--"]
+styles = ["-", "--", "-", "--"]
N = 1
-v = torch.randn(size = (m,1)).double()
-F = lambda X: embedding.embed(X)@v
-X = torch.Tensor([[0.5]]).double()
+v = torch.randn(size=(m, 1)).double()
+F = lambda X: embedding.embed(X) @ v
+X = torch.tensor([[0.5]]).double()
y = F(X)
-xtest = interval_torch(n = n,d = 1)
+xtest = interval_torch(n=n, d=1)
-plt.plot(xtest, F(xtest), lw = 5)
-plt.plot(X, y, 'ro', ms = 25)
+plt.plot(xtest, F(xtest), lw=5)
+plt.plot(X, y, "ro", ms=25)
-for j,estimator in enumerate(estimators):
- print ("Calculating:",names[j])
- estimator.load_data((X,y))
+for j, estimator in enumerate(estimators):
+ print("Calculating:", names[j])
+ estimator.load_data((X, y))
estimator.fit()
mean = estimator.mean(xtest)
- #ucb = estimator.ucb(xtest, type = "LR_static")
- #lcb = estimator.lcb(xtest, type = "LR_static")
+ # ucb = estimator.ucb(xtest, type = "LR_static")
+ # lcb = estimator.lcb(xtest, type = "LR_static")
- #plt.title("Norm: "+str(torch.norm(estimator.theta_fit)**2))
- plt.plot(xtest, mean, label = names[j], lw = 4, linestyle = styles[j])
- #plt.fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha = 0.1)
+ # plt.title("Norm: "+str(torch.norm(estimator.theta_fit)**2))
+ plt.plot(xtest, mean, label=names[j], lw=4, linestyle=styles[j])
+ # plt.fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha = 0.1)
-plt.legend(fontsize = 35)
+plt.legend(fontsize=35)
plt.show()
diff --git a/tests/continous_processes/test_huber_loss.py b/tests/continous_processes/test_huber_loss.py
index c76de41..edf105e 100644
--- a/tests/continous_processes/test_huber_loss.py
+++ b/tests/continous_processes/test_huber_loss.py
@@ -10,77 +10,97 @@
d = 1
eps = 0.01
s = 1
-x = torch.rand(N,d).double()*2 - 1
-xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1))
+x = torch.rand(N, d).double() * 2 - 1
+xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1))
# true
GP_true = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d)
ytest = GP_true.sample(xtest)
-GP_true.fit_gp(xtest,ytest)
+GP_true.fit_gp(xtest, ytest)
-plt.plot(xtest,GP_true.mean(xtest),'b-')
+plt.plot(xtest, GP_true.mean(xtest), "b-")
y = GP_true.mean(x).clone()
-GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=0.01, s = s)
+GP = GaussianProcess(
+ gamma=0.1,
+ kernel_name="squared_exponential",
+ d=d,
+ loss="huber",
+ huber_delta=0.01,
+ s=s,
+)
-xnew = x[0,:].view(1,1) + eps
-ynew = y[0,0].view(1,1) + 1
+xnew = x[0, :].view(1, 1) + eps
+ynew = y[0, 0].view(1, 1) + 1
-y2 = torch.vstack([y,ynew])
-x2 = torch.vstack([x,xnew])
+y2 = torch.vstack([y, ynew])
+x2 = torch.vstack([x, xnew])
-GP.fit_gp(x2,y2)
+GP.fit_gp(x2, y2)
GP2 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d)
GP3 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d)
-GP2.fit_gp(x2,y2)
-#GP2.visualize(xtest, show = False, fig = False)
-#plt.show()
-
+GP2.fit_gp(x2, y2)
+# GP2.visualize(xtest, show = False, fig = False)
+# plt.show()
### marginalized likelihood with normal loss_two_ways
# plot true function
-plt.plot(xtest,GP_true.mean(xtest),'b--',label = "truth", lw = 3)
+plt.plot(xtest, GP_true.mean(xtest), "b--", label="truth", lw=3)
# with noise optimize
-GP2.fit_gp(x2,y2)
-GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
+GP2.fit_gp(x2, y2)
+GP2.optimize_params(
+ type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0
+)
mu = GP2.mean(xtest)
-plt.plot(xtest,mu, 'r-', label = "squared-corupted", lw = 3)
-#GP2.visualize(xtest, show = False, fig = False, size = 0)
+plt.plot(xtest, mu, "r-", label="squared-corupted", lw=3)
+# GP2.visualize(xtest, show = False, fig = False, size = 0)
# no noise optimize
-GP2.fit_gp(x,y)
-GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
+GP2.fit_gp(x, y)
+GP2.optimize_params(
+ type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0
+)
mu = GP2.mean(xtest)
-plt.plot(xtest,mu, '--x', color ="tab:brown" , label = 'squared-uncorrupted', lw = 3)
+plt.plot(xtest, mu, "--x", color="tab:brown", label="squared-uncorrupted", lw=3)
# with huber optimize
-GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=1.3)
-GP.fit_gp(x2,y2)
-GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1., weight=1.)
+GP = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, loss="huber", huber_delta=1.3
+)
+GP.fit_gp(x2, y2)
+GP.optimize_params(
+ type="bandwidth",
+ restarts=5,
+ verbose=False,
+ optimizer="pytorch-minimize",
+ scale=1.0,
+ weight=1.0,
+)
mu = GP2.mean(xtest)
-plt.plot(xtest,mu, color = "tab:green", label = 'huber-corupted', lw = 3)
+plt.plot(xtest, mu, color="tab:green", label="huber-corupted", lw=3)
# GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=1.3)
# GP.fit_gp(x2,y2)
# mu = GP2.mean(xtest)
# plt.plot(xtest,mu, 'r-', label = 'huber-true-model-corupted')
-GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=1.3)
-GP.fit_gp(x,y)
+GP = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, loss="huber", huber_delta=1.3
+)
+GP.fit_gp(x, y)
mu = GP.mean(xtest)
-plt.plot(xtest,mu, '--', color = "tab:orange", label = 'huber-uncorrupted', lw = 3)
+plt.plot(xtest, mu, "--", color="tab:orange", label="huber-uncorrupted", lw=3)
plt.legend()
-plt.plot(x,y, 'ro', ms = 5)
+plt.plot(x, y, "ro", ms=5)
-plt.plot(xnew,ynew, 'ko', ms = 10)
+plt.plot(xnew, ynew, "ko", ms=10)
plt.show()
# GP.fit_gp(x,y2)
# GP.optimize_params(type="bandwidth", restarts=10, verbose = False, optimizer = 'pytorch-minimize', scale = 10.)
# GP.visualize(xtest, show = True, fig = False, color = 'yellow')
#
-
diff --git a/tests/continous_processes/test_marginalized_pytorch_minimize.py b/tests/continous_processes/test_marginalized_pytorch_minimize.py
index 8e97e47..beaeea0 100644
--- a/tests/continous_processes/test_marginalized_pytorch_minimize.py
+++ b/tests/continous_processes/test_marginalized_pytorch_minimize.py
@@ -3,27 +3,29 @@
from stpy.kernels import KernelFunction
from stpy.helpers.helper import interval
-#%%
+# %%
n = 100
d = 2
-x = torch.rand(n,d).double()*2 - 1
-xtest = torch.from_numpy(interval(50,2,L_infinity_ball=1))
+x = torch.rand(n, d).double() * 2 - 1
+xtest = torch.from_numpy(interval(50, 2, L_infinity_ball=1))
-#%%
+# %%
GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=2)
y = GP.sample(x)
-GP.fit_gp(x,y)
-GP.visualize_contour(xtest, ms = 10)
+GP.fit_gp(x, y)
+GP.visualize_contour(xtest, ms=10)
-#%%
+# %%
## Kernels can be defined as via kernel object
# 2 dimensional additive kernel with groups [0] and [1]
-k = KernelFunction(kernel_name = "ard", d = 2, groups = [[0,1]] )
+k = KernelFunction(kernel_name="ard", d=2, groups=[[0, 1]])
GP = GaussianProcess(kernel=k)
-GP.fit_gp(x,y)
-GP.optimize_params(type="bandwidth", restarts = 2, verbose = False, optimizer = 'pytorch-minimize')
-GP.visualize_contour(xtest, ms = 10)
+GP.fit_gp(x, y)
+GP.optimize_params(
+ type="bandwidth", restarts=2, verbose=False, optimizer="pytorch-minimize"
+)
+GP.visualize_contour(xtest, ms=10)
diff --git a/tests/continous_processes/test_svr_loss.py b/tests/continous_processes/test_svr_loss.py
index 4844b98..9d9d124 100644
--- a/tests/continous_processes/test_svr_loss.py
+++ b/tests/continous_processes/test_svr_loss.py
@@ -11,87 +11,102 @@
eps = 0.01
s = 0.1
B = 0.001
-x = torch.rand(N,d).double()*2 - 1
-xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1))
+x = torch.rand(N, d).double() * 2 - 1
+xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1))
# true
GP_true = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d)
ytest = GP_true.sample(xtest)
-GP_true.fit_gp(xtest,ytest)
-
+GP_true.fit_gp(xtest, ytest)
y = GP_true.mean(x).clone()
-xnew = x[0,:].view(1,1) + eps
-ynew = torch.rand(size = (1,1))*B
-y2 = torch.vstack([y,ynew])
-x2 = torch.vstack([x,xnew])
-
-GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'svr', svr_eps=0.1, s = s)
-GP2 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = "squared")
-GP3 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = 'unif', B = B)
-GP4 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = 'huber', huber_delta=1.35)
-
-GP.fit_gp(x,y)
-GP2.fit_gp(x,y)
-GP3.fit_gp(x,y)
-GP4.fit_gp(x,y)
-
-plt.plot(x,y, 'ro', label = 'points')
-plt.plot(xtest, ytest, 'b-', label = "truth")
-
-plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "svr")
-plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared")
-plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif")
-
-plt.plot(xtest, GP4.mean_std(xtest)[0], '--', color = "orange", label = "huber")
+xnew = x[0, :].view(1, 1) + eps
+ynew = torch.rand(size=(1, 1)) * B
+y2 = torch.vstack([y, ynew])
+x2 = torch.vstack([x, xnew])
+
+GP = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, loss="svr", svr_eps=0.1, s=s
+)
+GP2 = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="squared"
+)
+GP3 = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="unif", B=B
+)
+GP4 = GaussianProcess(
+ gamma=0.1,
+ kernel_name="squared_exponential",
+ d=d,
+ s=s,
+ loss="huber",
+ huber_delta=1.35,
+)
+
+GP.fit_gp(x, y)
+GP2.fit_gp(x, y)
+GP3.fit_gp(x, y)
+GP4.fit_gp(x, y)
+
+plt.plot(x, y, "ro", label="points")
+plt.plot(xtest, ytest, "b-", label="truth")
+
+plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="svr")
+plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared")
+plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif")
+
+plt.plot(xtest, GP4.mean_std(xtest)[0], "--", color="orange", label="huber")
plt.legend()
plt.show()
-GP.fit_gp(x2,y2)
-GP2.fit_gp(x2,y2)
-GP3.fit_gp(x2,y2)
-GP4.fit_gp(x2,y2)
+GP.fit_gp(x2, y2)
+GP2.fit_gp(x2, y2)
+GP3.fit_gp(x2, y2)
+GP4.fit_gp(x2, y2)
-plt.plot(x,y, 'ro', label = 'points')
-plt.plot(xnew,ynew, 'ko', label = 'corrupted')
-plt.plot(xtest, ytest, 'b-', label = "truth")
+plt.plot(x, y, "ro", label="points")
+plt.plot(xnew, ynew, "ko", label="corrupted")
+plt.plot(xtest, ytest, "b-", label="truth")
-plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "svr")
-plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared")
-plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif")
-plt.plot(xtest, GP4.mean_std(xtest)[0], '--', color = "orange", label = "huber")
+plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="svr")
+plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared")
+plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif")
+plt.plot(xtest, GP4.mean_std(xtest)[0], "--", color="orange", label="huber")
plt.legend()
plt.show()
-
-
-GP.fit_gp(x2,y2)
-GP2.fit_gp(x2,y2)
-GP3.fit_gp(x2,y2)
-GP4.fit_gp(x2,y2)
-
-plt.plot(x,y, 'ro', label = 'points')
-plt.plot(xnew,ynew, 'ko', label = 'corrupted')
-plt.plot(xtest, ytest, 'b-', label = "truth")
-
-GP.fit_gp(x2,y2)
-GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
-GP2.fit_gp(x2,y2)
-GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
-GP3.fit_gp(x2,y2)
-#GP3.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
-GP4.fit_gp(x2,y2)
-GP4.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
-
-plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "svr")
-plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared")
-plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif")
-plt.plot(xtest, GP4.mean_std(xtest)[0], '--', color = "orange", label = "huber")
-
+GP.fit_gp(x2, y2)
+GP2.fit_gp(x2, y2)
+GP3.fit_gp(x2, y2)
+GP4.fit_gp(x2, y2)
+
+plt.plot(x, y, "ro", label="points")
+plt.plot(xnew, ynew, "ko", label="corrupted")
+plt.plot(xtest, ytest, "b-", label="truth")
+
+GP.fit_gp(x2, y2)
+GP.optimize_params(
+ type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0
+)
+GP2.fit_gp(x2, y2)
+GP2.optimize_params(
+ type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0
+)
+GP3.fit_gp(x2, y2)
+# GP3.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
+GP4.fit_gp(x2, y2)
+GP4.optimize_params(
+ type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0
+)
+
+plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="svr")
+plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared")
+plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif")
+plt.plot(xtest, GP4.mean_std(xtest)[0], "--", color="orange", label="huber")
plt.legend()
diff --git a/tests/continous_processes/test_unif_marginalized.py b/tests/continous_processes/test_unif_marginalized.py
index a27c508..0369049 100644
--- a/tests/continous_processes/test_unif_marginalized.py
+++ b/tests/continous_processes/test_unif_marginalized.py
@@ -12,37 +12,45 @@
eps = 0.01
s = 0.1
B = 0.001
-x = torch.rand(N,d).double()*2 - 1
-xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1))
+x = torch.rand(N, d).double() * 2 - 1
+xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1))
# true
GP_true = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d)
ytest = GP_true.sample(xtest)
-GP_true.fit_gp(xtest,ytest)
-
+GP_true.fit_gp(xtest, ytest)
y = GP_true.mean(x).clone()
-xnew = x[0,:].view(1,1) + eps
-ynew = torch.rand(size = (1,1))*B
-y2 = torch.vstack([y,ynew])
-x2 = torch.vstack([x,xnew])
-
-GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = 'huber')
-GP2 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = "squared")
-GP3 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = "unif_new")
-
-
-
-GP.fit_gp(x2,y2)
-GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
-GP2.fit_gp(x2,y2)
-#GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
-GP3.fit_gp(x2,y2)
-GP3.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
-
-plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "huber")
-plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared")
-plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif")
+xnew = x[0, :].view(1, 1) + eps
+ynew = torch.rand(size=(1, 1)) * B
+y2 = torch.vstack([y, ynew])
+x2 = torch.vstack([x, xnew])
+
+GP = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="huber"
+)
+GP2 = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="squared"
+)
+GP3 = GaussianProcess(
+ gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="unif_new"
+)
+
+
+GP.fit_gp(x2, y2)
+GP.optimize_params(
+ type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0
+)
+GP2.fit_gp(x2, y2)
+# GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.)
+GP3.fit_gp(x2, y2)
+GP3.optimize_params(
+ type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0
+)
+
+plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="huber")
+plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared")
+plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif")
plt.legend()
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/tests/convergence_test.py b/tests/convergence_test.py
index ae65243..b630a63 100755
--- a/tests/convergence_test.py
+++ b/tests/convergence_test.py
@@ -15,30 +15,53 @@
# number of intial points
N = 3
# smoothness
-gamma = torch.ones(d, dtype= torch.float64)*1
+gamma = torch.ones(d, dtype=torch.float64) * 1
# test problem
xtest = torch.from_numpy(interval(n, d))
x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)))
f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1)
-f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.,
- out=None) * s
+f = (
+ lambda q: f_no_noise(q)
+ + torch.normal(
+ mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.0, out=None
+ )
+ * s
+)
# targets
y = f(x)
# GP model with squared exponential
m = 12
groups = None
-GP = GaussianProcess(kernel = "squared_exponential", s=s, gamma = gamma[0], d=d, groups = groups)
-GP_KL = GaussianProcessFF(kernel="squared_exponential", s=s, m=m, d=d, gamma=gamma[0], groups=groups, approx="kl")
-GP_He = GaussianProcessFF(kernel="squared_exponential", s=s, m=m, d=d, gamma=gamma[0], groups=groups, approx="hermite")
+GP = GaussianProcess(
+ kernel="squared_exponential", s=s, gamma=gamma[0], d=d, groups=groups
+)
+GP_KL = GaussianProcessFF(
+ kernel="squared_exponential",
+ s=s,
+ m=m,
+ d=d,
+ gamma=gamma[0],
+ groups=groups,
+ approx="kl",
+)
+GP_He = GaussianProcessFF(
+ kernel="squared_exponential",
+ s=s,
+ m=m,
+ d=d,
+ gamma=gamma[0],
+ groups=groups,
+ approx="hermite",
+)
# fit GP
GP.fit_gp(x, y)
GP_KL.fit_gp(x, y)
GP_He.fit_gp(x, y)
-print (GP.K)
-print (GP_KL.right_kernel())
-print (GP_He.right_kernel())
\ No newline at end of file
+print(GP.K)
+print(GP_KL.right_kernel())
+print(GP_He.right_kernel())
diff --git a/tests/cvxopt_integer_test.py b/tests/cvxopt_integer_test.py
index 977bd52..8591649 100755
--- a/tests/cvxopt_integer_test.py
+++ b/tests/cvxopt_integer_test.py
@@ -1,94 +1,99 @@
import numpy as np
import cvxopt
import torch
-from cvxopt import glpk,solvers
+from cvxopt import glpk, solvers
from stpy.continuous_processes.gauss_procc import GaussianProcess
import matplotlib.pyplot as plt
+
N = 128
d = 30
# Rotation
-theta = np.radians(45.)
+theta = np.radians(45.0)
thetainv = np.pi - theta
c, s = np.cos(theta), np.sin(theta)
Q = torch.from_numpy(np.array(((c, -s), (s, c))))
-M = torch.randn(size = (d,d), dtype = torch.float64)
-[Q,R] = torch.qr(M)
-
-
-def solve(Q,c,n = 10, verbose = True, up = None, low = None):
- print ("Starting Acq. Fucn solver...")
- print ("Resolution: ", n)
-
- # Grid
-
- tau = torch.from_numpy(np.arange(-n,n+1,1).astype(np.double))/n
- s = torch.ones(2*n+1)
- Tau = torch.zeros(size = (d,d*(2*n+1)), dtype = torch.float64)
- S = torch.zeros(size = (d,d*(2*n+1)), dtype = torch.float64)
- for j in range(d):
- Tau[j,j*(2*n+1):(j+1)*(2*n+1)] = tau
- S[j, j * (2 * n + 1):(j + 1) * (2 * n + 1)] = s
-
- B = Q @ Tau
-
- if (up is not None) or (low is not None):
- G = torch.cat((B, -B, S, -S, torch.t(c),-torch.t(c)))
- h = torch.ones(4 * d + 2)
- h[0:2 * d] = 1
- h[3 * d:4 * d] = -1
- h[4 * d ] = up
- h[4 * d + 1] = -low
- else:
- G = torch.cat((B, -B, S, -S))
- h = torch.ones(4 * d)
- h[0:2 * d] = 1
- h[3 * d:4 * d] = -1
- # Indicator variables
- x = torch.zeros(size = (d*(2*n+1),1),dtype = torch.float64)
- print (h)
- cc = cvxopt.matrix(c.view(-1).numpy().astype(np.double))
- Gc = cvxopt.matrix(G.numpy().astype(np.double))
- hc = cvxopt.matrix(h.numpy().astype(np.double))
-
- glpk.options['it_lim'] = 10
-
- solvers.solve(solver=cp.CBC)
- (status, x)= glpk.ilp(cc,Gc,hc,B=set(range(d*(2*n+1))) )
-
- return x
+M = torch.randn(size=(d, d), dtype=torch.float64)
+[Q, R] = torch.qr(M)
+
+
+def solve(Q, c, n=10, verbose=True, up=None, low=None):
+ print("Starting Acq. Fucn solver...")
+ print("Resolution: ", n)
+
+ # Grid
+
+ tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n
+ s = torch.ones(2 * n + 1)
+ Tau = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64)
+ S = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64)
+ for j in range(d):
+ Tau[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = tau
+ S[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = s
+
+ B = Q @ Tau
+
+ if (up is not None) or (low is not None):
+ G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c)))
+ h = torch.ones(4 * d + 2)
+ h[0 : 2 * d] = 1
+ h[3 * d : 4 * d] = -1
+ h[4 * d] = up
+ h[4 * d + 1] = -low
+ else:
+ G = torch.cat((B, -B, S, -S))
+ h = torch.ones(4 * d)
+ h[0 : 2 * d] = 1
+ h[3 * d : 4 * d] = -1
+ # Indicator variables
+ x = torch.zeros(size=(d * (2 * n + 1), 1), dtype=torch.float64)
+ print(h)
+ cc = cvxopt.matrix(c.view(-1).numpy().astype(np.double))
+ Gc = cvxopt.matrix(G.numpy().astype(np.double))
+ hc = cvxopt.matrix(h.numpy().astype(np.double))
+
+ glpk.options["it_lim"] = 10
+
+ solvers.solve(solver=cp.CBC)
+ (status, x) = glpk.ilp(cc, Gc, hc, B=set(range(d * (2 * n + 1))))
+
+ return x
+
# def N is the desired resolution
-tau = torch.from_numpy(np.arange(-N,N+1,1).astype(np.double))/N
-gp = GaussianProcess(gamma = 0.5, s = 0.001)
-c = torch.randn(size = (d*(2*N+1),1), dtype = torch.float64)
+tau = torch.from_numpy(np.arange(-N, N + 1, 1).astype(np.double)) / N
+gp = GaussianProcess(gamma=0.5, s=0.001)
+c = torch.randn(size=(d * (2 * N + 1), 1), dtype=torch.float64)
for i in range(d):
- plt.plot(gp.sample(tau.view(-1,1)).numpy())
- c[i*(2*N+1):(i+1)*(2*N+1)] = gp.sample(tau.view(-1,1))
+ plt.plot(gp.sample(tau.view(-1, 1)).numpy())
+ c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)] = gp.sample(tau.view(-1, 1))
plt.show()
-def select(c,N,n, low, up):
- plt.subplot(211)
- cs = torch.randn(size = (d*(2*n+1),1), dtype = torch.float64)
- step = N//n
- plt.plot(c.numpy())
- for i in range(d):
- for j in range(2*n+1):
- cs[i*(2*n+1)+j] = c[i*(2*N+1)+(j*step)]
- plt.plot(i*(2*N+1)+(j*step),cs[i*(2*n+1)+j].numpy(),"ro")
-
- sum_c = c[0*(2*N+1):(0+1)*(2*N+1)] *0
- for i in range(d):
- sum_c = sum_c+ c[i*(2*N+1):(i+1)*(2*N+1)]
- if low is not None:
- plt.subplot(2, 1, 2)
- plt.plot(sum_c.numpy())
- plt.plot(sum_c.numpy()*0+low,"--", label = "low")
- plt.plot(sum_c.numpy() * 0 + up, "--", label = "up")
- plt.legend()
-
- plt.show()
- return cs
+
+def select(c, N, n, low, up):
+ plt.subplot(211)
+ cs = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64)
+ step = N // n
+ plt.plot(c.numpy())
+ for i in range(d):
+ for j in range(2 * n + 1):
+ cs[i * (2 * n + 1) + j] = c[i * (2 * N + 1) + (j * step)]
+ plt.plot(
+ i * (2 * N + 1) + (j * step), cs[i * (2 * n + 1) + j].numpy(), "ro"
+ )
+
+ sum_c = c[0 * (2 * N + 1) : (0 + 1) * (2 * N + 1)] * 0
+ for i in range(d):
+ sum_c = sum_c + c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)]
+ if low is not None:
+ plt.subplot(2, 1, 2)
+ plt.plot(sum_c.numpy())
+ plt.plot(sum_c.numpy() * 0 + low, "--", label="low")
+ plt.plot(sum_c.numpy() * 0 + up, "--", label="up")
+ plt.legend()
+
+ plt.show()
+ return cs
up = None
@@ -98,13 +103,13 @@ def select(c,N,n, low, up):
x = solve(Q, c, n=N, up=up, low=low)
for j in range(int(np.log2(N))):
- n = np.power(2,j)
+ n = np.power(2, j)
- print(N, n)
- cs = select(c,N,n, low,up )
- x = solve(Q,cs,n = n, up=up, low = low)
- up = float(torch.mm(torch.t(cs),torch.from_numpy(np.array(x))))
- low = float(torch.mm(torch.t(cs),torch.from_numpy(np.array(x)))) - L/n
+ print(N, n)
+ cs = select(c, N, n, low, up)
+ x = solve(Q, cs, n=n, up=up, low=low)
+ up = float(torch.mm(torch.t(cs), torch.from_numpy(np.array(x))))
+ low = float(torch.mm(torch.t(cs), torch.from_numpy(np.array(x)))) - L / n
"""
m_value = 0
@@ -133,4 +138,4 @@ def select(c,N,n, low, up):
#print (res['x'])
#print (x)
-"""
\ No newline at end of file
+"""
diff --git a/tests/cvxpy_integer_test.py b/tests/cvxpy_integer_test.py
index 083d921..6048abf 100755
--- a/tests/cvxpy_integer_test.py
+++ b/tests/cvxpy_integer_test.py
@@ -8,7 +8,7 @@
d = 20
# Rotation
-theta = np.radians(45.)
+theta = np.radians(45.0)
thetainv = np.pi - theta
c, s = np.cos(theta), np.sin(theta)
Q = torch.from_numpy(np.array(((c, -s), (s, c))))
@@ -16,48 +16,47 @@
[Q, R] = torch.qr(M)
-def solve(Q, c, n=10, verbose=True, up=None, low=None, xwarm = None):
- if verbose == True:
- print("Starting Acq. Fucn solver...")
- print("Resolution: ", n)
- # Grid
-
- tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n
- s = torch.ones(2 * n + 1)
- Tau = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64)
- S = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64)
- for j in range(d):
- Tau[j, j * (2 * n + 1):(j + 1) * (2 * n + 1)] = tau
- S[j, j * (2 * n + 1):(j + 1) * (2 * n + 1)] = s
-
- B = Q @ Tau
-
- if (up is not None) or (low is not None):
- G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c)))
- h = torch.ones(4 * d + 2)
- h[0:2 * d] = 1
- h[3 * d:4 * d] = -1
- h[4 * d] = up
- h[4 * d + 1] = -low
- else:
- G = torch.cat((B, -B, S, -S))
- h = torch.ones(4 * d)
- h[0:2 * d] = 1
- h[3 * d:4 * d] = -1
- # Indicator variables
-
- x = cp.Variable(d * (2 * n + 1), boolean=True)
- if xwarm is not None:
- x.value = xwarm.numpy()
- c = c.view(-1).numpy()
-
- objective = cp.Maximize(c * x)
- constraints = [0 <= x, x <= 1, G.numpy()*x <= h.view(-1).numpy()]
- prob = cp.Problem(objective, constraints)
- prob.solve(solver=cp.MOSEK,verbose=verbose, warm_start=True)
-
-
- return (x.value,Tau.numpy() @ x.value, np.dot(c,x.value))
+def solve(Q, c, n=10, verbose=True, up=None, low=None, xwarm=None):
+ if verbose == True:
+ print("Starting Acq. Fucn solver...")
+ print("Resolution: ", n)
+ # Grid
+
+ tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n
+ s = torch.ones(2 * n + 1)
+ Tau = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64)
+ S = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64)
+ for j in range(d):
+ Tau[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = tau
+ S[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = s
+
+ B = Q @ Tau
+
+ if (up is not None) or (low is not None):
+ G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c)))
+ h = torch.ones(4 * d + 2)
+ h[0 : 2 * d] = 1
+ h[3 * d : 4 * d] = -1
+ h[4 * d] = up
+ h[4 * d + 1] = -low
+ else:
+ G = torch.cat((B, -B, S, -S))
+ h = torch.ones(4 * d)
+ h[0 : 2 * d] = 1
+ h[3 * d : 4 * d] = -1
+ # Indicator variables
+
+ x = cp.Variable(d * (2 * n + 1), boolean=True)
+ if xwarm is not None:
+ x.value = xwarm.numpy()
+ c = c.view(-1).numpy()
+
+ objective = cp.Maximize(c * x)
+ constraints = [0 <= x, x <= 1, G.numpy() * x <= h.view(-1).numpy()]
+ prob = cp.Problem(objective, constraints)
+ prob.solve(solver=cp.MOSEK, verbose=verbose, warm_start=True)
+
+ return (x.value, Tau.numpy() @ x.value, np.dot(c, x.value))
# def N is the desired resolution
@@ -65,58 +64,58 @@ def solve(Q, c, n=10, verbose=True, up=None, low=None, xwarm = None):
gp = GaussianProcess(gamma=0.5, s=0.001)
c = torch.randn(size=(d * (2 * N + 1), 1), dtype=torch.float64)
for i in range(d):
- z = gp.sample(tau.view(-1, 1))
- plt.plot(z.numpy())
- c[i * (2 * N + 1):(i + 1) * (2 * N + 1)] = z
+ z = gp.sample(tau.view(-1, 1))
+ plt.plot(z.numpy())
+ c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)] = z
plt.show()
def select(c, N, n, val):
- cs = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64)
- if val is not None:
- sol = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64).view(-1)*0
- else:
- sol = None
- step = N // n
-
- for i in range(d):
- #plt.plot(c[i * (2 * n + 1):(i+1) * (2 * n + 1)].numpy())
- for j in range(2 * n + 1):
- cs[i * (2 * n + 1) + j] = c[i * (2 * N + 1) + (j * step)]
- if val is not None:
- if (c[i * (2 * N + 1) + (j * step)] - val[i])**2 < 10e-10:
- sol[i * (2 * N + 1) + (j * step)] = 1.0
- #plt.plot((i * (2 * N + 1) + (j * step))/((i+1)*N), cs[i * (2 * n + 1) + j].numpy(), "ro")
- #plt.show()
- return cs,sol
+ cs = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64)
+ if val is not None:
+ sol = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64).view(-1) * 0
+ else:
+ sol = None
+ step = N // n
+
+ for i in range(d):
+ # plt.plot(c[i * (2 * n + 1):(i+1) * (2 * n + 1)].numpy())
+ for j in range(2 * n + 1):
+ cs[i * (2 * n + 1) + j] = c[i * (2 * N + 1) + (j * step)]
+ if val is not None:
+ if (c[i * (2 * N + 1) + (j * step)] - val[i]) ** 2 < 10e-10:
+ sol[i * (2 * N + 1) + (j * step)] = 1.0
+ # plt.plot((i * (2 * N + 1) + (j * step))/((i+1)*N), cs[i * (2 * n + 1) + j].numpy(), "ro")
+ # plt.show()
+ return cs, sol
up = None
low = None
L = 10e20
-#x = solve(Q, c, n=N, up=up, low=low)
+# x = solve(Q, c, n=N, up=up, low=low)
sol = None
val = None
-for j in range(int(np.log2(N))+1):
- n = np.power(2, j)
+for j in range(int(np.log2(N)) + 1):
+ n = np.power(2, j)
- print(N, n)
- cs, sol = select(c, N, n, val)
- x , val = solve(Q, cs, n=n, up=up, low=low, xwarm = sol)
- print (x, val)
- #up = float( torch.dot(cs.view(-1),torch.from_numpy(x)))
- #low = float( torch.dot(cs.view(-1),torch.from_numpy(x))) - L/n
- sol = x
+ print(N, n)
+ cs, sol = select(c, N, n, val)
+ x, val = solve(Q, cs, n=n, up=up, low=low, xwarm=sol)
+ print(x, val)
+ # up = float( torch.dot(cs.view(-1),torch.from_numpy(x)))
+ # low = float( torch.dot(cs.view(-1),torch.from_numpy(x))) - L/n
+ sol = x
plt.figure()
-colors = ['b','k','r','g','y']
+colors = ["b", "k", "r", "g", "y"]
for i in range(d):
- z = c[i * (2 * N + 1):(i + 1) * (2 * N + 1)].view(-1).numpy()
- x = np.linspace(-1,1,2*N+1)
- plt.plot(x,z, color = colors[i % 5], label = str(i))
- index = np.argmin(z)
- plt.plot(val[i],z[index],'o', color = colors[i % 5],label = str(i), ms = 10)
-#plt.legend()
-plt.show()
\ No newline at end of file
+ z = c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)].view(-1).numpy()
+ x = np.linspace(-1, 1, 2 * N + 1)
+ plt.plot(x, z, color=colors[i % 5], label=str(i))
+ index = np.argmin(z)
+ plt.plot(val[i], z[index], "o", color=colors[i % 5], label=str(i), ms=10)
+# plt.legend()
+plt.show()
diff --git a/tests/dpps_tests.py b/tests/dpps_tests.py
index 64689ae..ceb5444 100644
--- a/tests/dpps_tests.py
+++ b/tests/dpps_tests.py
@@ -1,7 +1,7 @@
-from stpy.helpers.helper import select_subset,select_subset_inv
+from stpy.helpers.helper import select_subset, select_subset_inv
import numpy as np
if __name__ == "__main__":
- n = 50
- M = np.random.randn(n,n)
+ n = 50
+ M = np.random.randn(n, n)
diff --git a/tests/efficient_triangle_basis.py b/tests/efficient_triangle_basis.py
new file mode 100644
index 0000000..305a5a2
--- /dev/null
+++ b/tests/efficient_triangle_basis.py
@@ -0,0 +1,35 @@
+from stpy.borel_set import BorelSet, HierarchicalBorelSets
+from stpy.embeddings.bump_bases import TriangleEmbedding
+from stpy.embeddings.triangle_base import EfficientTriangleEmbedding
+import torch
+
+
+if __name__ == "__main__":
+ d = 1
+ m = 2
+ S = BorelSet(1, torch.tensor([[-1, 1]]))
+
+ inefficient = TriangleEmbedding(d=d, m=m, interval=(-1, 1))
+ efficient = EfficientTriangleEmbedding(d, m, interval=(-1, 1))
+
+ for x, j in [(0.5, 1), (0.1, 0)]:
+ x = torch.tensor(x, dtype=torch.float64)
+ assert torch.allclose(
+ inefficient.basis_fun(x, j).double(), efficient.basis_fun(x, j)
+ )
+
+ for set in [[-1, 1], [-0.25, 1], [-2, 2]]:
+ s = BorelSet(1, torch.tensor([set]))
+ assert torch.allclose(inefficient.integral(s), efficient.integral(s))
+
+ d = 2
+ m = 2
+
+ inefficient = TriangleEmbedding(d=d, m=m, interval=(-1, 1))
+ efficient = EfficientTriangleEmbedding(d, m, interval=(-1, 1))
+
+ for x, j in [([0.5, 0.1], 1), ([0.7, 0.1], 0)]:
+ x = torch.tensor(x, dtype=torch.float64)
+ assert torch.allclose(
+ inefficient.basis_fun(x, j).double(), efficient.basis_fun(x, j)
+ )
diff --git a/tests/embedding/faber_schauder_embedding.py b/tests/embedding/faber_schauder_embedding.py
index 7fb9fb3..7ebd1d5 100644
--- a/tests/embedding/faber_schauder_embedding.py
+++ b/tests/embedding/faber_schauder_embedding.py
@@ -10,15 +10,15 @@
from stpy.helpers.helper import interval
m = 16
-B4 = FaberSchauderEmbedding(m = m, d = 1)
+B4 = FaberSchauderEmbedding(m=m, d=1)
-plt.figure(figsize = (20,20))
-basis = lambda x,j: B4.basis_fun(x,j)
-x = torch.from_numpy(np.linspace(-1,1,1024)).view(-1,1)
-print (B4.hierarchical_mask())
+plt.figure(figsize=(20, 20))
+basis = lambda x, j: B4.basis_fun(x, j)
+x = torch.from_numpy(np.linspace(-1, 1, 1024)).view(-1, 1)
+print(B4.hierarchical_mask())
for j in range(m):
- plt.plot(x,basis(x,j), lw = 6)
- plt.grid(ls = '--', lw = 4)
- plt.xlim((-1,1))
+ plt.plot(x, basis(x, j), lw=6)
+ plt.grid(ls="--", lw=4)
+ plt.xlim((-1, 1))
plt.show()
diff --git a/tests/fourier-features-multidimensional.py b/tests/fourier-features-multidimensional.py
index b1cfdd3..c46254b 100644
--- a/tests/fourier-features-multidimensional.py
+++ b/tests/fourier-features-multidimensional.py
@@ -2,25 +2,27 @@
from stpy.continuous_processes.fourier_fea import GaussianProcessFF
from stpy.embeddings.embedding import QuadratureEmbedding
from stpy.helpers.helper import interval
-if __name__ == "__main__":
- m = 128
+if __name__ == "__main__":
- def cost_function():
- gamma = 0.2
- torch.manual_seed(245)
- z2 = QuadratureEmbedding(gamma=gamma, m=m, d=2)
- theta2d = torch.randn(m, 1).double()
- F = lambda x: z2.embed_one(x[1, 0:2].view(1,-1)) @ theta2d
- print (torch.norm(theta2d))
- return F
+ m = 128
- F = cost_function()
- xtest = torch.from_numpy(interval(50,2))
- ytest = F(xtest)
+ def cost_function():
+ gamma = 0.2
+ torch.manual_seed(245)
+ z2 = QuadratureEmbedding(gamma=gamma, m=m, d=2)
+ theta2d = torch.randn(m, 1).double()
+ F = lambda x: z2.embed_one(x[1, 0:2].view(1, -1)) @ theta2d
+ print(torch.norm(theta2d))
+ return F
- GP = GaussianProcessFF(d = 2, groups=[[0,1]], m = torch.Tensor([m,64]), gamma = torch.Tensor([0.2]))
- GP.fit_gp(xtest,ytest)
+ F = cost_function()
+ xtest = torch.from_numpy(interval(50, 2))
+ ytest = F(xtest)
- GP.visualize_contour(xtest,f_true=F)
+ GP = GaussianProcessFF(
+ d=2, groups=[[0, 1]], m=torch.tensor([m, 64]), gamma=torch.tensor([0.2])
+ )
+ GP.fit_gp(xtest, ytest)
+ GP.visualize_contour(xtest, f_true=F)
diff --git a/tests/gibbs_kernel.py b/tests/gibbs_kernel.py
index 0e8cf49..c647570 100644
--- a/tests/gibbs_kernel.py
+++ b/tests/gibbs_kernel.py
@@ -1,38 +1,40 @@
from stpy.kernels import KernelFunction
-from stpy.continuous_processes.gauss_procc import GaussianProcess
+from stpy.continuous_processes.gauss_procc import GaussianProcess
from stpy.helpers.helper import interval
from stpy.embeddings.optimal_positive_basis import OptimalPositiveBasis
import matplotlib.pyplot as plt
import torch
import numpy as np
+
n = 1024
d = 1
def gamma(x):
- out = x[:,0].view(-1,1)*0
- small = x <= - 0.5
- mid = torch.logical_and(x >= -0.5,x <= 0.5)
- large = x > 0.5
- gamma1 = 0.1
- gamma2 = 1.
- out[small] = (gamma2-gamma1)/(torch.exp(-25*(x[small]+0.5)) + 1) + gamma1
- out[mid] = gamma2
- out[large] = (gamma2-gamma1)/(torch.exp(-25*(-x[large]+0.5)) + 1) + gamma1
- return out
-
-gamma = lambda x: x[:,0].view(-1,1)*0 + 0.05 + 0.3*(x+1)**4
-
-#gamma = lambda x: x[x<-0.5]*0 +0 + 0.05 + 0.2*(x+1)**2#*torch.abs(torch.cos(x*np.pi)) + 0.5
-xtest = torch.from_numpy(interval(n,d))
-
-vals = gamma(xtest).T**2 + gamma(xtest)**2
+ out = x[:, 0].view(-1, 1) * 0
+ small = x <= -0.5
+ mid = torch.logical_and(x >= -0.5, x <= 0.5)
+ large = x > 0.5
+ gamma1 = 0.1
+ gamma2 = 1.0
+ out[small] = (gamma2 - gamma1) / (torch.exp(-25 * (x[small] + 0.5)) + 1) + gamma1
+ out[mid] = gamma2
+ out[large] = (gamma2 - gamma1) / (torch.exp(-25 * (-x[large] + 0.5)) + 1) + gamma1
+ return out
+
+
+gamma = lambda x: x[:, 0].view(-1, 1) * 0 + 0.05 + 0.3 * (x + 1) ** 4
+
+# gamma = lambda x: x[x<-0.5]*0 +0 + 0.05 + 0.2*(x+1)**2#*torch.abs(torch.cos(x*np.pi)) + 0.5
+xtest = torch.from_numpy(interval(n, d))
+
+vals = gamma(xtest).T ** 2 + gamma(xtest) ** 2
plt.imshow(vals)
plt.colorbar()
plt.show()
-k = KernelFunction(kernel_name="gibbs", params={'gamma_fun':gamma})
-plt.imshow(k.kernel(xtest,xtest))
+k = KernelFunction(kernel_name="gibbs", params={"gamma_fun": gamma})
+plt.imshow(k.kernel(xtest, xtest))
plt.colorbar()
plt.show()
@@ -45,13 +47,23 @@ def gamma(x):
s = 0.01
b = 0
-Emb = OptimalPositiveBasis(d, m, offset=0.0, s=s, b=b, discretization_size=n, B=1000., kernel_object=k, samples = 1000)
+Emb = OptimalPositiveBasis(
+ d,
+ m,
+ offset=0.0,
+ s=s,
+ b=b,
+ discretization_size=n,
+ B=1000.0,
+ kernel_object=k,
+ samples=1000,
+)
for i in range(m):
- f_i = Emb.basis_fun(xtest, i) ## basis function
- plt.plot(xtest,f_i)
+ f_i = Emb.basis_fun(xtest, i) ## basis function
+ plt.plot(xtest, f_i)
plt.show()
# ytest = GP.sample(xtest)
# plt.plot(xtest,ytest)
-# plt.show()
\ No newline at end of file
+# plt.show()
diff --git a/tests/gradient_confidence_test.py b/tests/gradient_confidence_test.py
index 88288f7..19db4bd 100755
--- a/tests/gradient_confidence_test.py
+++ b/tests/gradient_confidence_test.py
@@ -2,39 +2,45 @@
import torch
from stpy.continuous_processes.gauss_procc import GaussianProcess
from stpy.helpers.helper import interval
-#%matplotlib notebook
+
+# %matplotlib notebook
# 2D Grid
n = 20
n_vis = 50
d = 2
-xtest_vis = torch.from_numpy(interval(n_vis,d))
-xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1.))
+xtest_vis = torch.from_numpy(interval(n_vis, d))
+xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.0))
noise_s = 0.001
bw = 0.4
-GP_true = GaussianProcess(groups = [[0],[1]], gamma = bw*torch.ones(2,dtype = torch.float64), kernel = "ard", s = noise_s)
+GP_true = GaussianProcess(
+ groups=[[0], [1]],
+ gamma=bw * torch.ones(2, dtype=torch.float64),
+ kernel="ard",
+ s=noise_s,
+)
y = GP_true.sample(xtest)
-GP_true.fit_gp(xtest,y)
+GP_true.fit_gp(xtest, y)
-zero = torch.from_numpy(np.array([[0.,0.]]))
-gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian = True)
+zero = torch.from_numpy(np.array([[0.0, 0.0]]))
+gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian=True)
-GP_fit = GaussianProcess(gamma = bw, kernel = "squared_exponential", s = noise_s)
-GP_fit.fit_gp(xtest ,y)
-#GP_fit.visualize(xtest_vis)
+GP_fit = GaussianProcess(gamma=bw, kernel="squared_exponential", s=noise_s)
+GP_fit.fit_gp(xtest, y)
+# GP_fit.visualize(xtest_vis)
GP_fit.log_marginal_likelihood_self()
GP_fit.visualize_quiver(xtest_vis)
-print ("Zero:" ,zero)
+print("Zero:", zero)
g, V = GP_fit.gradient_mean_var(zero)
-print (gradient)
+print(gradient)
-print (V)
+print(V)
-print ("------------------")
\ No newline at end of file
+print("------------------")
diff --git a/tests/gradients_test.py b/tests/gradients_test.py
index 67b97b3..ef28e49 100755
--- a/tests/gradients_test.py
+++ b/tests/gradients_test.py
@@ -3,87 +3,100 @@
import torch
from stpy.continuous_processes.gauss_procc import GaussianProcess
from stpy.helpers.helper import interval
-#%matplotlib notebook
+
+# %matplotlib notebook
# 2D Grid
-for n in np.arange(50,60,10):
+for n in np.arange(50, 60, 10):
n_vis = 50
d = 2
- xtest_vis = torch.from_numpy(interval(n_vis,d))
- xtest = torch.from_numpy(interval(n,d,L_infinity_ball=0.01))
+ xtest_vis = torch.from_numpy(interval(n_vis, d))
+ xtest = torch.from_numpy(interval(n, d, L_infinity_ball=0.01))
noise_s = 0.001
bw = 0.4
- GP_true = GaussianProcess(groups = [[0],[1]], gamma = bw*torch.ones(2,dtype = torch.float64), kernel = "ard", s = noise_s)
+ GP_true = GaussianProcess(
+ groups=[[0], [1]],
+ gamma=bw * torch.ones(2, dtype=torch.float64),
+ kernel="ard",
+ s=noise_s,
+ )
y = GP_true.sample(xtest)
- GP_true.fit_gp(xtest,y)
-
- zero = torch.from_numpy(np.array([[0.,0.]]))
- gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian = True)
+ GP_true.fit_gp(xtest, y)
- # print ("gradient:",gradient)
- # print ("hessian:",hessian)
+ zero = torch.from_numpy(np.array([[0.0, 0.0]]))
+ gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian=True)
+ # print ("gradient:",gradient)
+ # print ("hessian:",hessian)
# [mu, _] = GP_true.get_lambdas(2, mean=True)
# for z in [10e-1, 10e-2, 10e-3, 10e-4, 10e-5, 10e-6, 10e-7]:
# print(z, stpy.helper.finite_differences(mu,z,xtest[0].view(1,-1)))
- theta = np.radians(12.)
+ theta = np.radians(12.0)
thetainv = np.pi - theta
c, s = np.cos(theta), np.sin(theta)
- RandRot = torch.from_numpy(np.array(((c,-s), (s, c))))
- #print (RandRot)
+ RandRot = torch.from_numpy(np.array(((c, -s), (s, c))))
+ # print (RandRot)
def eval(x):
xprime = x.mm(RandRot)
f = GP_true.mean_std(xprime)[0]
return f
-
y_prime = eval(xtest)
- GP_fit = GaussianProcess(groups = [[0,1]], gamma = bw*torch.ones(2,dtype = torch.float64), kernel = "ard", s = noise_s)
- GP_fit.fit_gp(xtest,y_prime)
+ GP_fit = GaussianProcess(
+ groups=[[0, 1]],
+ gamma=bw * torch.ones(2, dtype=torch.float64),
+ kernel="ard",
+ s=noise_s,
+ )
+ GP_fit.fit_gp(xtest, y_prime)
GP_fit.visualize(xtest_vis)
GP_fit.log_marginal_likelihood_self()
- print ("Zero:",zero)
+ print("Zero:", zero)
g, V = GP_fit.gradient_mean_var(zero)
- print (gradient)
+ print(gradient)
- print (V)
+ print(V)
- print ("------------------")
-
- gradient, hessian = GP_fit.mean_gradient_hessian(zero, hessian = True)
- Q = torch.symeig(hessian, eigenvectors = True)[1]
+ print("------------------")
+ gradient, hessian = GP_fit.mean_gradient_hessian(zero, hessian=True)
+ Q = torch.symeig(hessian, eigenvectors=True)[1]
print(GP_fit.mean_std(zero))
- #print ("Estimated:",Q)
- #print ("True:", RandRot)
+ # print ("Estimated:",Q)
+ # print ("True:", RandRot)
P = torch.t(Q) @ RandRot
- I = torch.eye(GP_fit.d, dtype = torch.float64)
- Noise = s*I*s
+ I = torch.eye(GP_fit.d, dtype=torch.float64)
+ Noise = s * I * s
Perm = torch.clamp(torch.abs(P), min=10e-3)
- print (n, P,torch.norm(torch.abs(P)-Perm))
-
+ print(n, P, torch.norm(torch.abs(P) - Perm))
no = 100
- thetas = np.linspace(0.,np.pi,no)
+ thetas = np.linspace(0.0, np.pi, no)
res = []
for theta in thetas:
c, s = np.cos(theta), np.sin(theta)
- Rot = np.array(((c,-s), (s, c)))
+ Rot = np.array(((c, -s), (s, c)))
Rot = torch.from_numpy(Rot)
- res.append(float(GP_fit.log_marginal_likelihood(GP_fit.kernel_object.gamma,Rot,GP_fit.kernel_object.kappa)))
- plt.plot(thetas,res)
- plt.plot([thetainv],np.average(np.array(res)),'ro')
+ res.append(
+ float(
+ GP_fit.log_marginal_likelihood(
+ GP_fit.kernel_object.gamma, Rot, GP_fit.kernel_object.kappa
+ )
+ )
+ )
+ plt.plot(thetas, res)
+ plt.plot([thetainv], np.average(np.array(res)), "ro")
plt.show()
- GP_fit.optimize_params(type = "rots", restarts = 10)
+ GP_fit.optimize_params(type="rots", restarts=10)
GP_fit.log_marginal_likelihood_self()
print(GP_fit.Rot)
diff --git a/tests/hessian-estimation-test.py b/tests/hessian-estimation-test.py
index af8d033..6c287c6 100755
--- a/tests/hessian-estimation-test.py
+++ b/tests/hessian-estimation-test.py
@@ -10,14 +10,14 @@
L_infinity_ball = 0.5
d = 2
-thetae = np.radians(35.)
+thetae = np.radians(35.0)
ce, se = np.cos(thetae), np.sin(thetae)
R = torch.from_numpy(np.array(((ce, -se), (se, ce))))
-D = torch.diag(torch.Tensor([0.8, 1.1]).double())
-#D = torch.diag(torch.Tensor([1, 1]).double())
+D = torch.diag(torch.tensor([0.8, 1.1]).double())
+# D = torch.diag(torch.tensor([1, 1]).double())
W = R.T @ D @ R
-print (W)
+print(W)
BenchmarkFunc = QuadraticBenchmark(d=d, R=W)
x = BenchmarkFunc.initial_guess(N)
@@ -31,27 +31,36 @@
F0 = lambda x: BenchmarkFunc.eval(x, sigma=0)
-def plot_contour(xtest,ytest,lim=None):
+def plot_contour(xtest, ytest, lim=None):
from scipy.interpolate import griddata
+
xx = xtest[:, 0].numpy()
yy = xtest[:, 1].numpy()
- grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j]
- grid_z_mu = griddata((xx, yy), ytest[:, 0].numpy(), (grid_x, grid_y), method='linear')
+ grid_x, grid_y = np.mgrid[min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j]
+ grid_z_mu = griddata(
+ (xx, yy), ytest[:, 0].numpy(), (grid_x, grid_y), method="linear"
+ )
fig, ax = plt.subplots(figsize=(10, 9))
cs = ax.contourf(grid_x, grid_y, grid_z_mu)
- ax.contour(cs, colors='k')
+ ax.contour(cs, colors="k")
if lim is not None:
- plt.xlim([-lim,lim])
- plt.ylim([-lim,lim])
+ plt.xlim([-lim, lim])
+ plt.ylim([-lim, lim])
plt.colorbar(cs)
# Plot grid.
- ax.grid(c='k', ls='-', alpha=0.1)
-
+ ax.grid(c="k", ls="-", alpha=0.1)
## Additive Model
m = 64
-GP = GaussianProcessFF(d=d, s=s, m = torch.ones(d)*m, gamma=gamma*torch.ones(d), bounds=bounds, groups = stpy.helpers.helper.full_group(d))
+GP = GaussianProcessFF(
+ d=d,
+ s=s,
+ m=torch.ones(d) * m,
+ gamma=gamma * torch.ones(d),
+ bounds=bounds,
+ groups=stpy.helpers.helper.full_group(d),
+)
## Global Model
# m = 512
@@ -61,23 +70,22 @@ def plot_contour(xtest,ytest,lim=None):
p = 5
d = 2
-embedding = PolynomialEmbedding(d,p)
+embedding = PolynomialEmbedding(d, p)
Map = lambda x: embedding.embed(x)
# Starting points
-x0_1 = torch.Tensor([0.1, 0.1]).double().view(-1, d)
+x0_1 = torch.tensor([0.1, 0.1]).double().view(-1, d)
-#x0_1 = torch.Tensor([-0.1, 0.]).double().view(-1, d)
-x0_2 = torch.Tensor([0.1, 0.1]).double().view(-1, d)
+# x0_1 = torch.tensor([-0.1, 0.]).double().view(-1, d)
+x0_2 = torch.tensor([0.1, 0.1]).double().view(-1, d)
print("Embeding size:", Map(x0_1).size())
-Bandit = OPPR_TS_GP(x0_1, F, GP, Map, finite_dim=True, s = s, GPMap = True)
-#Bandit.decolerate(x0_1,10e-5,1)
-Bandit.decolerate_AJD([x0_1,x0_2],10e-5,1)
-
-print (Bandit.Q)
-print (W@Bandit.Q)
-print (W@torch.inverse(Bandit.Q))
+Bandit = OPPR_TS_GP(x0_1, F, GP, Map, finite_dim=True, s=s, GPMap=True)
+# Bandit.decolerate(x0_1,10e-5,1)
+Bandit.decolerate_AJD([x0_1, x0_2], 10e-5, 1)
+print(Bandit.Q)
+print(W @ Bandit.Q)
+print(W @ torch.inverse(Bandit.Q))
diff --git a/tests/interval_groups_test.py b/tests/interval_groups_test.py
index 0c20a87..bb566e0 100644
--- a/tests/interval_groups_test.py
+++ b/tests/interval_groups_test.py
@@ -1,10 +1,15 @@
-from stpy.helpers.helper import interval_groups, get_hierarchy, hierarchical_distance, valid_enlargement
+from stpy.helpers.helper import (
+ interval_groups,
+ get_hierarchy,
+ hierarchical_distance,
+ valid_enlargement,
+)
if __name__ == "__main__":
- out = get_hierarchy(start = 0,new_elements=[1,2,3])
- curr = [[0], [1], [2], [3]]
- print(hierarchical_distance(curr, [[0,1],[2],[3]]))
- enlargements = valid_enlargement(curr, out)
- for enlargement in enlargements:
- print (curr,"->",out[enlargement])
\ No newline at end of file
+ out = get_hierarchy(start=0, new_elements=[1, 2, 3])
+ curr = [[0], [1], [2], [3]]
+ print(hierarchical_distance(curr, [[0, 1], [2], [3]]))
+ enlargements = valid_enlargement(curr, out)
+ for enlargement in enlargements:
+ print(curr, "->", out[enlargement])
diff --git a/tests/kernelized-features-test.py b/tests/kernelized-features-test.py
index 3da0af7..e63d65e 100644
--- a/tests/kernelized-features-test.py
+++ b/tests/kernelized-features-test.py
@@ -5,27 +5,27 @@
import numpy as np
if __name__ == "__main__":
- m = 16
- gamma = 1.
- s = 0.0001
- n = 40
+ m = 16
+ gamma = 1.0
+ s = 0.0001
+ n = 40
- embedding = HermiteEmbedding(m = m, gamma = gamma)
- GP = KernelizedFeatures(embedding=embedding,s = s,m = m)
+ embedding = HermiteEmbedding(m=m, gamma=gamma)
+ GP = KernelizedFeatures(embedding=embedding, s=s, m=m)
- x = torch.from_numpy(interval(n,1))
- xtest = torch.from_numpy(interval(2048,1))
- F = lambda x: torch.sin(10*x)
- y = F(x)
+ x = torch.from_numpy(interval(n, 1))
+ xtest = torch.from_numpy(interval(2048, 1))
+ F = lambda x: torch.sin(10 * x)
+ y = F(x)
- GP.fit_gp(x,y)
- mu, std = GP.mean_std(xtest)
- print (mu.size())
- print (std.size())
- GP.visualize(xtest)
+ GP.fit_gp(x, y)
+ mu, std = GP.mean_std(xtest)
+ print(mu.size())
+ print(std.size())
+ GP.visualize(xtest)
- for _ in range(30):
- x = torch.from_numpy(np.random.uniform(-1,1,1)).view(1,1)
- GP.add_data_point(x,F(x))
+ for _ in range(30):
+ x = torch.from_numpy(np.random.uniform(-1, 1, 1)).view(1, 1)
+ GP.add_data_point(x, F(x))
- GP.visualize(xtest)
+ GP.visualize(xtest)
diff --git a/tests/kernels/ard_matern_kernel_test.py b/tests/kernels/ard_matern_kernel_test.py
index 56379b3..fd4b05b 100644
--- a/tests/kernels/ard_matern_kernel_test.py
+++ b/tests/kernels/ard_matern_kernel_test.py
@@ -10,14 +10,20 @@
d = 2
eps = 0.01
s = 1
-x = torch.rand(N,d).double()*2 - 1
-xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1))
+x = torch.rand(N, d).double() * 2 - 1
+xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1))
# true
GP = GaussianProcess(kernel_name="ard_matern", d=d)
y = GP.sample(x)
-GP.fit_gp(x,y)
-GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1., weight=1.)
+GP.fit_gp(x, y)
+GP.optimize_params(
+ type="bandwidth",
+ restarts=5,
+ verbose=False,
+ optimizer="pytorch-minimize",
+ scale=1.0,
+ weight=1.0,
+)
GP.visualize_contour(xtest)
#
-
diff --git a/tests/marginalized_likelihood_test.py b/tests/marginalized_likelihood_test.py
index 8dc1a6c..c1551c7 100644
--- a/tests/marginalized_likelihood_test.py
+++ b/tests/marginalized_likelihood_test.py
@@ -7,102 +7,98 @@
from pymanopt.manifolds import Euclidean
if __name__ == "__main__":
- d = 2
- n = 3
-
-
- ## Squared exponential with single parameter
- GP = GaussianProcess(gamma=1., kernel_name="ard", d=2)
- x = torch.rand(n,d).double()*2 - 1
- y = GP.sample(x)
- GP.fit_gp(x,y)
- xtest = torch.from_numpy(interval(50,2,L_infinity_ball=1))
-
- #
- # init_val = None
- # manifold = Euclidean(2)
- # bounds = None
- #
- # params = {"0":{"kappa":(1.,Euclidean(1),None),"ard_gamma":(init_val, manifold, bounds)}}
- #GP.optimize_params_general(params = params, maxiter = 100)
-
- #GP.optimize_params(type = "bandwidth", restarts=2)
-
-
-#
- ## Additive quick
- k = KernelFunction(kernel_name = "ard", d = 2, groups = [[0],[1]] )
- GP = GaussianProcess(kernel=k)
- x = torch.rand(n,d).double()*2 - 1
- y = GP.sample(x)
- GP.fit_gp(x,y)
-
- #GP.optimize_params(type="bandwidth", restarts=2)
-
-
-
-
- # ## Additive via algebra
- k1 = KernelFunction(kernel_name="ard" ,ard_gamma = 0.1, d = 1, group=[0])
- k2 = KernelFunction(kernel_name="polynomial" ,ard_gamma = 0.5, power = 2, d = 1, group=[1])
- k = k1 + k2
- #
- # print (k.params_dict)
- GP = GaussianProcess(kernel=k, d=2)
- #
- x = torch.rand(n, d).double() * 2 - 1
- y = GP.sample(x)
- GP.fit_gp(x, y)
- #GP.optimize_params(type="bandwidth", restarts=2)
-
-
- ## Additive two the same
- k1 = KernelFunction(kernel_name="ard" ,ard_gamma = 0.1, d = 1, group=[0])
- k2 = KernelFunction(kernel_name="ard" ,ard_gamma = 0.5, power = 2, d = 1, group=[1])
- GP = GaussianProcess(kernel=k, d=2)
- #
- x = torch.rand(n, d).double() * 2 - 1
- y = GP.sample(x)
- GP.fit_gp(x, y)
- #GP.optimize_params(type="bandwidth", restarts=2)
-
-
- ## Optimize groups
- k = KernelFunction(kernel_name="ard", d=2, groups = [[0,1]])
- GP = GaussianProcess(kernel=k, d=2)
- #
- x = torch.rand(n, d).double() * 2 - 1
- y = GP.sample(x)
- GP.fit_gp(x, y)
- #print(k.params_dict)
- #GP.optimize_params(type="groups", restarts=2)
-
- ## Optimize power in polynomial kernel
- k = KernelFunction(kernel_name="polynomial", d=2, power = 3)
- GP = GaussianProcess(kernel=k, d=2)
- #
- x = torch.rand(n, d).double() * 2 - 1
- y = GP.sample(x)
- GP.fit_gp(x, y)
- #print(k.params_dict)
- params = {"0":{"power":(1.,[1,2,3,4,5],None)}}
- #GP.optimize_params_general(params = params, optimizer="discrete")
-
-
- ## Covar
- k = KernelFunction(kernel_name="full_covariance_se", d=2)
- GP = GaussianProcess(kernel=k, d=2)
- #
- x = torch.rand(n, d).double() * 2 - 1
- y = GP.sample(x)
- GP.fit_gp(x, y)
- #GP.optimize_params(type="covariance", restarts=2)
-
- ## cova with regularizer
- k = KernelFunction(kernel_name="full_covariance_se", d=2)
- GP = GaussianProcess(kernel=k, d=2)
- #
- x = torch.rand(n, d).double() * 2 - 1
- y = GP.sample(x)
- GP.fit_gp(x, y)
- GP.optimize_params(type="covariance", restarts=2, regularizer=["spectral_norm",0.1])
\ No newline at end of file
+ d = 2
+ n = 3
+
+ ## Squared exponential with single parameter
+ GP = GaussianProcess(gamma=1.0, kernel_name="ard", d=2)
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+ xtest = torch.from_numpy(interval(50, 2, L_infinity_ball=1))
+
+ #
+ # init_val = None
+ # manifold = Euclidean(2)
+ # bounds = None
+ #
+ # params = {"0":{"kappa":(1.,Euclidean(1),None),"ard_gamma":(init_val, manifold, bounds)}}
+ # GP.optimize_params_general(params = params, maxiter = 100)
+
+ # GP.optimize_params(type = "bandwidth", restarts=2)
+
+ #
+ ## Additive quick
+ k = KernelFunction(kernel_name="ard", d=2, groups=[[0], [1]])
+ GP = GaussianProcess(kernel=k)
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+
+ # GP.optimize_params(type="bandwidth", restarts=2)
+
+ # ## Additive via algebra
+ k1 = KernelFunction(kernel_name="ard", ard_gamma=0.1, d=1, group=[0])
+ k2 = KernelFunction(
+ kernel_name="polynomial", ard_gamma=0.5, power=2, d=1, group=[1]
+ )
+ k = k1 + k2
+ #
+ # print (k.params_dict)
+ GP = GaussianProcess(kernel=k, d=2)
+ #
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+ # GP.optimize_params(type="bandwidth", restarts=2)
+
+ ## Additive two the same
+ k1 = KernelFunction(kernel_name="ard", ard_gamma=0.1, d=1, group=[0])
+ k2 = KernelFunction(kernel_name="ard", ard_gamma=0.5, power=2, d=1, group=[1])
+ GP = GaussianProcess(kernel=k, d=2)
+ #
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+ # GP.optimize_params(type="bandwidth", restarts=2)
+
+ ## Optimize groups
+ k = KernelFunction(kernel_name="ard", d=2, groups=[[0, 1]])
+ GP = GaussianProcess(kernel=k, d=2)
+ #
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+ # print(k.params_dict)
+ # GP.optimize_params(type="groups", restarts=2)
+
+ ## Optimize power in polynomial kernel
+ k = KernelFunction(kernel_name="polynomial", d=2, power=3)
+ GP = GaussianProcess(kernel=k, d=2)
+ #
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+ # print(k.params_dict)
+ params = {"0": {"power": (1.0, [1, 2, 3, 4, 5], None)}}
+ # GP.optimize_params_general(params = params, optimizer="discrete")
+
+ ## Covar
+ k = KernelFunction(kernel_name="full_covariance_se", d=2)
+ GP = GaussianProcess(kernel=k, d=2)
+ #
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+ # GP.optimize_params(type="covariance", restarts=2)
+
+ ## cova with regularizer
+ k = KernelFunction(kernel_name="full_covariance_se", d=2)
+ GP = GaussianProcess(kernel=k, d=2)
+ #
+ x = torch.rand(n, d).double() * 2 - 1
+ y = GP.sample(x)
+ GP.fit_gp(x, y)
+ GP.optimize_params(
+ type="covariance", restarts=2, regularizer=["spectral_norm", 0.1]
+ )
diff --git a/tests/orthogonal_map_test.py b/tests/orthogonal_map_test.py
index d173cc0..4be4589 100755
--- a/tests/orthogonal_map_test.py
+++ b/tests/orthogonal_map_test.py
@@ -5,45 +5,47 @@
if __name__ == "__main__":
+ dim = 4
+ Benchmark = ProteinBenchmark(
+ "/home/mojko/Documents/PhD/stpy/stpy/test_functions/protein_data_gb1.h5",
+ dim=dim,
+ ref=["A", "B", "C", "D"],
+ )
+ Benchmark.self_translate()
- dim = 4
+ X = Benchmark.data.values[:, 0:dim].astype(int)
+ Y = Benchmark.data.values[:, 5].astype(float).reshape(-1, 1)
- Benchmark = ProteinBenchmark("/home/mojko/Documents/PhD/stpy/stpy/test_functions/protein_data_gb1.h5", dim=dim, ref=['A', 'B', 'C', 'D'])
- Benchmark.self_translate()
+ X_one_hot = Benchmark.translate_one_hot(X)
- X = Benchmark.data.values[:,0:dim].astype(int)
- Y = Benchmark.data.values[:,5].astype(float).reshape(-1,1)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X_one_hot, Y, test_size=0.20, random_state=42
+ )
+ X_train = torch.from_numpy(X_train)
+ X_test = torch.from_numpy(X_test)
+ y_train = torch.from_numpy(y_train)
+ y_test = torch.from_numpy(y_test)
- X_one_hot = Benchmark.translate_one_hot(X)
+ print(X_train.size())
+ print(y_train.size())
- X_train, X_test, y_train, y_test = train_test_split(X_one_hot, Y, test_size = 0.20, random_state = 42)
+ print(X_test.size())
+ print(y_test.size())
- X_train = torch.from_numpy(X_train)
- X_test = torch.from_numpy(X_test)
- y_train = torch.from_numpy(y_train)
- y_test = torch.from_numpy(y_test)
+ d = dim * 26
+ m = dim * 26
- print(X_train.size())
- print(y_train.size())
+ ridge = lambda x: torch.relu(x)
+ Net = RandomMap(d, m, ridge, output=1)
+ print("Loss before training: ", Net.loss(X_test, y_test))
- print(X_test.size())
- print(y_test.size())
+ Net.fit_map(X_train, y_train, verbose=1, lr=10e-1, epochs=100)
- d = dim*26
- m = dim*26
+ print("Net:", Net.forward(X_test[1, :].view(1, -1)))
- ridge = lambda x: torch.relu(x)
- Net = RandomMap(d,m,ridge, output = 1)
+ print("Truth:", y_test[1, :])
- print ("Loss before training: ",Net.loss(X_test,y_test))
-
- Net.fit_map(X_train,y_train, verbose=1, lr = 10e-1, epochs = 100)
-
- print ("Net:",Net.forward(X_test[1,:].view(1,-1)))
-
- print ("Truth:",y_test[1,:])
-
- print (Net.loss(X_test,y_test))
\ No newline at end of file
+ print(Net.loss(X_test, y_test))
diff --git a/tests/regularization_basis.py b/tests/regularization_basis.py
index 8d4cea1..2ccc6b9 100644
--- a/tests/regularization_basis.py
+++ b/tests/regularization_basis.py
@@ -6,67 +6,83 @@
from stpy.helpers.helper import interval
import matplotlib.pyplot as plt
from stpy.kernels import KernelFunction
-from stpy.embeddings.bernstein_embedding import BernsteinEmbedding, BernsteinSplinesEmbedding, BernsteinSplinesOverlapping
-from stpy.embeddings.bump_bases import TriangleEmbedding,PositiveNystromEmbeddingBump
+from stpy.embeddings.bernstein_embedding import (
+ BernsteinEmbedding,
+ BernsteinSplinesEmbedding,
+ BernsteinSplinesOverlapping,
+)
+from stpy.embeddings.bump_bases import TriangleEmbedding, PositiveNystromEmbeddingBump
if __name__ == "__main__":
- d = 1
- m = 32
- n = 256
- N = 20
+ d = 1
+ m = 32
+ n = 256
+ N = 20
- s = 0.01
- b = 0.1
- B = 0.5
+ s = 0.01
+ b = 0.1
+ B = 0.5
- gamma = 0.1
- kernel_object = KernelFunction(gamma = gamma)
- kernel_object_poly = KernelFunction(kernel_name="polynomial", power = N)
+ gamma = 0.1
+ kernel_object = KernelFunction(gamma=gamma)
+ kernel_object_poly = KernelFunction(kernel_name="polynomial", power=N)
- EmbBern = BernsteinEmbedding(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s)
- EmbSplines = BernsteinSplinesEmbedding(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s)
- EmbSplinesOverlap = BernsteinSplinesOverlapping(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s)
- Emb = TriangleEmbedding(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s)
- Embpoly = TriangleEmbedding(d,m,kernel_object=kernel_object_poly,offset=0.5,b=b,B=B,s = s)
- Embnys = PositiveNystromEmbeddingBump(d, m, kernel_object=kernel_object, offset=0.5, b=0, B=1000, s = s)
+ EmbBern = BernsteinEmbedding(
+ d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s
+ )
+ EmbSplines = BernsteinSplinesEmbedding(
+ d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s
+ )
+ EmbSplinesOverlap = BernsteinSplinesOverlapping(
+ d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s
+ )
+ Emb = TriangleEmbedding(
+ d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s
+ )
+ Embpoly = TriangleEmbedding(
+ d, m, kernel_object=kernel_object_poly, offset=0.5, b=b, B=B, s=s
+ )
+ Embnys = PositiveNystromEmbeddingBump(
+ d, m, kernel_object=kernel_object, offset=0.5, b=0, B=1000, s=s
+ )
- GP = GaussianProcess(d = d, s = s, kernel=kernel_object)
+ GP = GaussianProcess(d=d, s=s, kernel=kernel_object)
- xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1.1))
- x = torch.from_numpy(np.random.uniform(-1,1,N)).view(-1,1)
+ xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.1))
+ x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1)
- F_true = lambda x: torch.sin(5*x)**2-0.1
- F = lambda x: F_true(x) + s*torch.randn(x.size()[0]).view(-1,1).double()
- y = F(x)
+ F_true = lambda x: torch.sin(5 * x) ** 2 - 0.1
+ F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double()
+ y = F(x)
- Emb.fit(x, y)
- EmbBern.fit(x, y)
- Embpoly.fit(x, y)
- EmbSplines.fit(x, y)
- EmbSplinesOverlap.fit(x, y)
- Embnys.fit(x, y)
- GP.fit_gp(x,y)
+ Emb.fit(x, y)
+ EmbBern.fit(x, y)
+ Embpoly.fit(x, y)
+ EmbSplines.fit(x, y)
+ EmbSplinesOverlap.fit(x, y)
+ Embnys.fit(x, y)
+ GP.fit_gp(x, y)
- mu = Emb.mean_std(xtest)
- mu_spline = EmbSplines.mean_std(xtest)
- mu_spline_overlap = EmbSplinesOverlap.mean_std(xtest)
- mu_true,_ = GP.mean_std(xtest)
- mu_bern = EmbBern.mean_std(xtest)
- mu_poly = Embpoly.mean_std(xtest)
- mu_pos = Embnys.mean_std(xtest)
+ mu = Emb.mean_std(xtest)
+ mu_spline = EmbSplines.mean_std(xtest)
+ mu_spline_overlap = EmbSplinesOverlap.mean_std(xtest)
+ mu_true, _ = GP.mean_std(xtest)
+ mu_bern = EmbBern.mean_std(xtest)
+ mu_poly = Embpoly.mean_std(xtest)
+ mu_pos = Embnys.mean_std(xtest)
- plt.plot(xtest, xtest*0+b, 'k--')
- plt.plot(xtest, xtest * 0 + B, 'k--')
+ plt.plot(xtest, xtest * 0 + b, "k--")
+ plt.plot(xtest, xtest * 0 + B, "k--")
- plt.plot(xtest,F_true(xtest),'r', label = 'true')
- plt.plot(xtest,mu_true,'b--', label = 'no-constraints')
- plt.plot(xtest,mu_pos)
- plt.plot(x,y,'ro')
- plt.plot(xtest, mu, 'g-x', label = 'Triangles')
- #plt.plot(xtest, mu_bern, 'y-o',label = 'Bernstein basis')
- #plt.plot(xtest, mu_poly, color = 'orange', label='triangles polynomial kernel')
- #plt.plot(xtest, mu_spline, color='purple', label='splines')
- #plt.plot(xtest, mu_spline_overlap, color='brown', label='splines_overlap')
- plt.legend()
- plt.show()
\ No newline at end of file
+ plt.plot(xtest, F_true(xtest), "r", label="true")
+ plt.plot(xtest, mu_true, "b--", label="no-constraints")
+ plt.plot(xtest, mu_pos)
+ plt.plot(x, y, "ro")
+ plt.plot(xtest, mu, "g-x", label="Triangles")
+ # plt.plot(xtest, mu_bern, 'y-o',label = 'Bernstein basis')
+ # plt.plot(xtest, mu_poly, color = 'orange', label='triangles polynomial kernel')
+ # plt.plot(xtest, mu_spline, color='purple', label='splines')
+ # plt.plot(xtest, mu_spline_overlap, color='brown', label='splines_overlap')
+ plt.legend()
+ plt.show()
diff --git a/tests/spike-basis-general.py b/tests/spike-basis-general.py
index 51bc283..fff3113 100644
--- a/tests/spike-basis-general.py
+++ b/tests/spike-basis-general.py
@@ -4,21 +4,22 @@
import torch
import matplotlib.pyplot as plt
from stpy.borel_set import BorelSet
+
if __name__ == "__main__":
- d = 1
- m = 100
- S = BorelSet(1,[-1,1])
+ d = 1
+ m = 100
+ S = BorelSet(1, [-1, 1])
- embed_p = FaberSchauderEmbedding(d=d, m=p)
- print (torch.sum(embed_p.integral(S)))
+ embed_p = FaberSchauderEmbedding(d=d, m=p)
+ print(torch.sum(embed_p.integral(S)))
- m = embed_p.size
- GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
- F = lambda x: torch.sin(x)
- x = torch.from_numpy(interval(2,d))
- xtest = torch.from_numpy(interval(1024, d))
- GP.fit_gp(x, F(x))
- GP.visualize(xtest, f_true=F, show = False)
- for j in range(p):
- plt.plot(xtest,embed_p.basis_fun(xtest,j+1))
- plt.show()
\ No newline at end of file
+ m = embed_p.size
+ GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
+ F = lambda x: torch.sin(x)
+ x = torch.from_numpy(interval(2, d))
+ xtest = torch.from_numpy(interval(1024, d))
+ GP.fit_gp(x, F(x))
+ GP.visualize(xtest, f_true=F, show=False)
+ for j in range(p):
+ plt.plot(xtest, embed_p.basis_fun(xtest, j + 1))
+ plt.show()
diff --git a/tests/test-absolute-deviation.py b/tests/test-absolute-deviation.py
index f3ec4ea..db2c30e 100644
--- a/tests/test-absolute-deviation.py
+++ b/tests/test-absolute-deviation.py
@@ -5,25 +5,25 @@
import matplotlib.pyplot as plt
if __name__ == "__main__":
- d = 1
- p = 4
- embed_p = ChebyschevEmbedding(d=d, p=p)
- m = embed_p.size
- GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
+ d = 1
+ p = 4
+ embed_p = ChebyschevEmbedding(d=d, p=p)
+ m = embed_p.size
+ GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
- x = torch.from_numpy(interval(10,d))
- xtest = torch.from_numpy(interval(1024, d))
- GP.fit_gp(x, x**4)
+ x = torch.from_numpy(interval(10, d))
+ xtest = torch.from_numpy(interval(1024, d))
+ GP.fit_gp(x, x**4)
- mu = GP.mean_aboslute_deviation(xtest, B = None)
- mu2 = GP.mean_aboslute_deviation(xtest, B = 0.1)
- mu3 = GP.mean_std(xtest)[0]
- mu4 = GP.mean_constrained(xtest, B = 0.1)
- #GP.visualize(xtest, show = False)
+ mu = GP.mean_aboslute_deviation(xtest, B=None)
+ mu2 = GP.mean_aboslute_deviation(xtest, B=0.1)
+ mu3 = GP.mean_std(xtest)[0]
+ mu4 = GP.mean_constrained(xtest, B=0.1)
+ # GP.visualize(xtest, show = False)
- plt.plot(xtest,mu, "--",label = 'l1 unconstrained', alpha = 0.5)
- plt.plot(xtest, mu2,"--",label = 'l1 constrained', alpha = 0.5)
- plt.plot(xtest, mu3, label = 'l2 unconstrained', alpha = 0.5)
- plt.plot(xtest, mu4,label = 'l2 constrained', alpha = 0.5)
- plt.legend()
- plt.show()
\ No newline at end of file
+ plt.plot(xtest, mu, "--", label="l1 unconstrained", alpha=0.5)
+ plt.plot(xtest, mu2, "--", label="l1 constrained", alpha=0.5)
+ plt.plot(xtest, mu3, label="l2 unconstrained", alpha=0.5)
+ plt.plot(xtest, mu4, label="l2 constrained", alpha=0.5)
+ plt.legend()
+ plt.show()
diff --git a/tests/test-positive-basis.py b/tests/test-positive-basis.py
index 71634ba..d07d575 100644
--- a/tests/test-positive-basis.py
+++ b/tests/test-positive-basis.py
@@ -9,16 +9,16 @@
from stpy.helpers.helper import interval
m = 32
-kernel = KernelFunction(gamma = 0.1,kernel_name="squared_exponential", power = 5)
-B4 = PositiveNystromEmbeddingBump(kernel_object=kernel, m = m, d = 1, samples = 100)
+kernel = KernelFunction(gamma=0.1, kernel_name="squared_exponential", power=5)
+B4 = PositiveNystromEmbeddingBump(kernel_object=kernel, m=m, d=1, samples=100)
-plt.figure(figsize = (20,20))
-basis = lambda x,j: B4.basis_fun(x,j)
-x = torch.from_numpy(np.linspace(-1,1,100)).view(-1,1)
+plt.figure(figsize=(20, 20))
+basis = lambda x, j: B4.basis_fun(x, j)
+x = torch.from_numpy(np.linspace(-1, 1, 100)).view(-1, 1)
for j in range(m):
- plt.plot(x,basis(x,j), lw = 6)
- plt.grid(ls = '--', lw = 4)
- plt.xlim((-1,1))
+ plt.plot(x, basis(x, j), lw=6)
+ plt.grid(ls="--", lw=4)
+ plt.xlim((-1, 1))
plt.show()
diff --git a/tests/test_functions/felsimulator_test.py b/tests/test_functions/felsimulator_test.py
index 91b8cb2..e12b571 100644
--- a/tests/test_functions/felsimulator_test.py
+++ b/tests/test_functions/felsimulator_test.py
@@ -7,8 +7,6 @@
if __name__ == "__main__":
sigma = 0.1
- xtest = interval_torch(30, d= 2, L_infinity_ball=0.5)
- F = SwissFEL(d =2, dts = 'evaluations_bpm.hdf5')
+ xtest = interval_torch(30, d=2, L_infinity_ball=0.5)
+ F = SwissFEL(d=2, dts="evaluations_bpm.hdf5")
F.Simulator.GP.visualize_contour(xtest)
-
-
diff --git a/tests/triangle-integration-test.py b/tests/triangle-integration-test.py
index 3c74e3d..591faff 100644
--- a/tests/triangle-integration-test.py
+++ b/tests/triangle-integration-test.py
@@ -10,42 +10,41 @@
if __name__ == "__main__":
- d = 1
- m = 64
- S = BorelSet(1,[-1,1])
-
- embedding = TriangleEmbedding(d=d, m=m, s = 10e-8)
-
- levels = 5
- hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
- basic_sets = hierarchical_structure.get_sets_level(hierarchical_structure.levels)
-
- xtest = hierarchical_structure.top_node.return_discretization(512)
-
- for set in basic_sets:
- print (set.bounds, set.volume())
- x = torch.linspace(set.bounds[0, 0], set.bounds[0, 1], 2)
- Gamma_half = embedding.cov()
- val = torch.sum(torch.pinverse(Gamma_half)@embedding.integral(set))
-
-
- plt.plot(x, x * 0 + float(val)/set.volume(), '-o', color="green", lw=5)
- for i in range(m):
- plt.plot(xtest, embedding.basis_fun(xtest,i), 'k')
- plt.show()
-
- plt.subplot(1,2,1)
- plt.imshow(embedding.M)
- plt.subplot(1,2,2)
- plt.imshow(embedding.Gamma_half)
- plt.show()
- # m = embed_p.size
- # GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
- # F = lambda x: torch.sin(x)
- # x = torch.from_numpy(interval(2,d))
- # xtest = torch.from_numpy(interval(1024, d))
- # GP.fit_gp(x, F(x))
- # GP.visualize(xtest, f_true=F, show = False)
- # for j in range(p):
- # plt.plot(xtest,embed_p.basis_fun(xtest,j+1))
- # plt.show()
\ No newline at end of file
+ d = 1
+ m = 64
+ S = BorelSet(1, [-1, 1])
+
+ embedding = TriangleEmbedding(d=d, m=m, s=10e-8)
+
+ levels = 5
+ hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels)
+ basic_sets = hierarchical_structure.get_sets_level(hierarchical_structure.levels)
+
+ xtest = hierarchical_structure.top_node.return_discretization(512)
+
+ for set in basic_sets:
+ print(set.bounds, set.volume())
+ x = torch.linspace(set.bounds[0, 0], set.bounds[0, 1], 2)
+ Gamma_half = embedding.cov()
+ val = torch.sum(torch.pinverse(Gamma_half) @ embedding.integral(set))
+
+ plt.plot(x, x * 0 + float(val) / set.volume(), "-o", color="green", lw=5)
+ for i in range(m):
+ plt.plot(xtest, embedding.basis_fun(xtest, i), "k")
+ plt.show()
+
+ plt.subplot(1, 2, 1)
+ plt.imshow(embedding.M)
+ plt.subplot(1, 2, 2)
+ plt.imshow(embedding.Gamma_half)
+ plt.show()
+ # m = embed_p.size
+ # GP = KernelizedFeatures(embeding=embed_p, m=m, d=d)
+ # F = lambda x: torch.sin(x)
+ # x = torch.from_numpy(interval(2,d))
+ # xtest = torch.from_numpy(interval(1024, d))
+ # GP.fit_gp(x, F(x))
+ # GP.visualize(xtest, f_true=F, show = False)
+ # for j in range(p):
+ # plt.plot(xtest,embed_p.basis_fun(xtest,j+1))
+ # plt.show()