secondmind-labs · SebastianPopescu · Aug 26, 2022 · Sep 14, 2022 · Sep 14, 2022 · Sep 14, 2022
diff --git a/.gitignore b/.gitignore
@@ -38,17 +38,25 @@ pip-log.txt
 pip-delete-this-directory.txt
 
 # Unit test / coverage reports
+reports/
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
+.junit.xml
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
 
+# notebooks
+docs/notebooks/checkpoint
+docs/notebooks/ckpts/
+docs/notebooks/logs/
+docs/notebooks/weights*
+
 # Translations
 *.mo
 *.pot

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -11,8 +11,12 @@ Because GitHub's [graph of contributors](http://github.com/secondmind-labs/GPflu
 [James A. Leedham](https://github.com/JamesALeedham)
 [Felix Leibfried](https://github.com/fleibfried), 
 [John A. McLeod](https://github.com/johnamcleod), 
+[Jesper Nielsen](https://github.com/jesnie), 
+[Sebastian Ober](https://github.com/sebastianober), 
+[Sebastian Popescu](https://github.com/SebastianPopescu), 
 [Hugh Salimbeni](https://github.com/hughsalimbeni), 
+[Hrvoje Stojic](https://github.com/hstojic), 
 [Marcin B. Tomczak](https://github.com/marctom)
 
 
-Feel free to add yourself when you first contribute to GPflux's code, tests, or documentation!
+Feel free to add yourself when you first contribute to GPflux's code, tests, or documentation!
diff --git a/docs/conf.py b/docs/conf.py
@@ -69,11 +69,11 @@
     "python": ("https://docs.python.org/3/", None),
     "tensorflow": (
         "https://www.tensorflow.org/api_docs/python",
-        "https://github.com/GPflow/tensorflow-intersphinx/raw/master/tf2_py_objects.inv"
+        "https://github.com/GPflow/tensorflow-intersphinx/raw/master/tf2_py_objects.inv",
     ),
     "tensorflow_probability": (
         "https://www.tensorflow.org/probability/api_docs/python",
-        "https://github.com/GPflow/tensorflow-intersphinx/raw/master/tfp_py_objects.inv"
+        "https://github.com/GPflow/tensorflow-intersphinx/raw/master/tfp_py_objects.inv",
     ),
     "gpflow": ("https://gpflow.readthedocs.io/en/master/", None),
 }
@@ -116,7 +116,9 @@
 }
 
 # If True, show link to rst source on rendered HTML pages
-html_show_sourcelink = False  # Remove 'view source code' from top of page (for html, not python)
+html_show_sourcelink = (
+    False  # Remove 'view source code' from top of page (for html, not python)
+)
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,

diff --git a/docs/notebooks/efficient_posterior_sampling.py b/docs/notebooks/efficient_posterior_sampling.py
@@ -80,7 +80,9 @@
 from gpflow.models import GPR
 
 from gpflux.layers.basis_functions.fourier_features import RandomFourierFeaturesCosine
-from gpflux.sampling.kernel_with_feature_decomposition import KernelWithFeatureDecomposition
+from gpflux.feature_decomposition_kernels.kernel_with_feature_decomposition import (
+    KernelWithFeatureDecomposition,
+)
 
 # %% [markdown]
 """

diff --git a/docs/notebooks/efficient_sampling.py b/docs/notebooks/efficient_sampling.py
@@ -37,7 +37,7 @@
 from gpflow.config import default_float
 
 from gpflux.layers.basis_functions.fourier_features import RandomFourierFeaturesCosine
-from gpflux.sampling import KernelWithFeatureDecomposition
+from gpflux.feature_decomposition_kernels import KernelWithFeatureDecomposition
 from gpflux.models.deep_gp import sample_dgp
 
 

diff --git a/docs/notebooks/multi_output_efficient_sampling.py b/docs/notebooks/multi_output_efficient_sampling.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# ---
+# jupyter:
+#   jupytext:
+#     cell_markers: '"""'
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.4.2
+#   kernelspec:
+#     display_name: Python 3
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+"""
+# Efficient sampling with Gaussian processes and Random Fourier Features
+
+Gaussian processes (GPs) provide a mathematically elegant framework for learning unknown functions from data. They are robust to overfitting, allow to incorporate prior assumptions into the model and provide calibrated uncertainty estimates for their predictions. This makes them prime candidates in settings where data is scarce, noisy or very costly to obtain, and are natural tools in applications such as Bayesian optimisation (BO).
+
+Despite their favorable properties, the use of GPs still has practical limitations. One of them is the computational complexity to draw predictive samples from the model, which quickly becomes prohibitive as the sample size grows, and creates a well-known bottleneck for GP-based Thompson sampling (GP-TS) for instance. 
+Recent work <cite data-cite="wilson2020efficiently"/> proposes to combine GP’s weight-space and function-space views to draw samples more efficiently from (approximate) posterior GPs with encouraging results in low-dimensional regimes.
+
+In GPflux, this functionality is unlocked by grouping a kernel (e.g., `gpflow.kernels.Matern52`) with its feature decomposition using `gpflux.sampling.KernelWithFeatureDecomposition`. See the notebooks on [weight space approximation](weight_space_approximation.ipynb) and [efficient posterior sampling](efficient_posterior_sampling.ipynb) for a thorough explanation.
+"""
+# %%
+import numpy as np
+import tensorflow as tf
+import matplotlib.pyplot as plt
+
+import gpflow
+import gpflux
+
+from gpflow.config import default_float
+
+from gpflux.layers.basis_functions.fourier_features import MultiOutputRandomFourierFeaturesCosine
+from gpflux.feature_decomposition_kernels import (
+    KernelWithFeatureDecomposition,
+    SharedMultiOutputKernelWithFeatureDecomposition,
+    SeparateMultiOutputKernelWithFeatureDecomposition,
+)
+from gpflux.models.deep_gp import sample_dgp
+
+tf.keras.backend.set_floatx("float64")
+
+# %% [markdown]
+"""
+## Load Snelson dataset
+"""
+
+# %%
+d = np.load("../../tests/snelson1d.npz")
+X, Y = data = d["X"], d["Y"]
+num_data, input_dim = X.shape
+
+# %% [markdown]
+r"""
+## Setting up the kernel and its feature decomposition
+
+The `KernelWithFeatureDecomposition` instance represents a kernel together with its finite feature decomposition,
+$$
+k(x, x') = \sum_{i=0}^L \lambda_i \phi_i(x) \phi_i(x'),
+$$
+where $\lambda_i$ and $\phi_i(\cdot)$ are the coefficients (eigenvalues) and features (eigenfunctions), respectively, and $L$ is the finite cutoff. See [the notebook on weight space approximation](weight_space_approximation.ipynb) for a detailed explanation of how to construct this decomposition using Random Fourier Features (RFF).
+"""
+
+# %%
+# kernel = gpflow.kernels.Matern52()
+kernel1 = gpflow.kernels.Matern52()
+kernel2 = gpflow.kernels.SquaredExponential()
+# kernel = gpflow.kernels.SeparateIndependent( kernels = [kernel1, kernel2])
+kernel = gpflow.kernels.SharedIndependent(kernel=kernel1, output_dim=2)
+
+Z_1 = np.linspace(X.min(), X.max(), 10).reshape(-1, 1).astype(np.float64)
+Z_2 = np.linspace(X.min(), X.max(), 10).reshape(-1, 1).astype(np.float64)
+
+inducing_variable_1 = gpflow.inducing_variables.InducingPoints(Z_1)
+inducing_variable_2 = gpflow.inducing_variables.InducingPoints(Z_2)
+# inducing_variable = gpflow.inducing_variables.SeparateIndependentInducingVariables(inducing_variable_list= [inducing_variable_1, inducing_variable_2])
+inducing_variable = gpflow.inducing_variables.SharedIndependentInducingVariables(
+    inducing_variable=inducing_variable_1
+)
+
+gpflow.utilities.set_trainable(inducing_variable, False)
+P = 2
+num_rff = 1000
+eigenfunctions = MultiOutputRandomFourierFeaturesCosine(kernel, num_rff, dtype=default_float())
+eigenvalues = np.ones((P, num_rff, 1), dtype=default_float())
+# kernel_with_features = SeparateMultiOutputKernelWithFeatureDecomposition(kernel, eigenfunctions, eigenvalues)
+kernel_with_features = SharedMultiOutputKernelWithFeatureDecomposition(
+    kernel, eigenfunctions, eigenvalues
+)
+# %% [markdown]
+"""
+## Building and training the single-layer GP
+
+### Initialise the single-layer GP
+Because `KernelWithFeatureDecomposition` is just a `gpflow.kernels.Kernel`, we can construct a GP layer with it.
+"""
+# %%
+layer = gpflux.layers.GPLayer(
+    kernel_with_features,
+    inducing_variable,
+    num_data,
+    whiten=True,
+    num_latent_gps=2,
+    mean_function=gpflow.mean_functions.Zero(),
+)
+likelihood_layer = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian())  # noqa: E231
+dgp = gpflux.models.DeepGP([layer], likelihood_layer)
+model = dgp.as_training_model()
+# %% [markdown]
+"""
+### Fit model to data
+"""
+
+# %%
+model.compile(tf.optimizers.Adam(learning_rate=0.1))
+
+callbacks = [
+    tf.keras.callbacks.ReduceLROnPlateau(
+        monitor="loss",
+        patience=5,
+        factor=0.95,
+        verbose=0,
+        min_lr=1e-6,
+    )
+]
+
+history = model.fit(
+    {"inputs": X, "targets": tf.tile(Y, [1, 2])},
+    batch_size=num_data,
+    epochs=100,
+    callbacks=callbacks,
+    verbose=0,
+)
+# %% [markdown]
+"""
+## Drawing samples
+
+Now that the model is trained we can draw efficient and consistent samples from the posterior GP. By "consistent" we mean that the `sample_dgp` function returns a function object that can be evaluated multiple times at different locations, but importantly, the returned function values will come from the same GP sample. This functionality is implemented by the `gpflux.sampling.efficient_sample` function.
+"""
+
+# %%
+from typing import Callable
+
+x_margin = 5
+n_x = 1000
+X_test = np.linspace(X.min() - x_margin, X.max() + x_margin, n_x).reshape(-1, 1)
+
+f_mean, f_var = dgp.predict_f(X_test)
+f_scale = np.sqrt(f_var)
+
+
+fig, axs = plt.subplots(1, 2)
+
+
+for dim in range(2):
+
+    # Plot samples
+    n_sim = 10
+    for _ in range(n_sim):
+        # `sample_dgp` returns a callable - which we subsequently evaluate
+        f_sample: Callable[[tf.Tensor], tf.Tensor] = sample_dgp(dgp)
+        axs[dim].plot(X_test, f_sample(X_test).numpy()[..., dim])
+
+    # Plot GP mean and uncertainty intervals and data
+    axs[dim].plot(X_test, f_mean[..., dim], "C0")
+    axs[dim].plot(X_test, f_mean[..., dim] + f_scale[..., dim], "C0--")
+    axs[dim].plot(X_test, f_mean[..., dim] - f_scale[..., dim], "C0--")
+    axs[dim].plot(X, Y, "kx", alpha=0.2)
+    axs[dim].set_xlim(X.min() - x_margin, X.max() + x_margin)
+    axs[dim].set_ylim(Y.min() - x_margin, Y.max() + x_margin)
+plt.show()
diff --git a/docs/notebooks/weight_space_approximation.py b/docs/notebooks/weight_space_approximation.py
@@ -62,7 +62,7 @@
 from gpflow.inducing_variables import InducingPoints
 
 from gpflux.layers.basis_functions.fourier_features import RandomFourierFeaturesCosine
-from gpflux.sampling.kernel_with_feature_decomposition import KernelWithFeatureDecomposition
+from gpflux.feature_decomposition_kernels import KernelWithFeatureDecomposition
 
 # %% [markdown]
 """

diff --git a/gpflux/feature_decomposition_kernels/__init__.py b/gpflux/feature_decomposition_kernels/__init__.py
@@ -0,0 +1,14 @@
+from .kernel_with_feature_decomposition import KernelWithFeatureDecomposition, _ApproximateKernel
+from .multioutput import (
+    SeparateMultiOutputKernelWithFeatureDecomposition,
+    SharedMultiOutputKernelWithFeatureDecomposition,
+    _MultiOutputApproximateKernel,
+)
+
+__all__ = [
+    "_ApproximateKernel",
+    "KernelWithFeatureDecomposition",
+    "_MultiOutputApproximateKernel",
+    "SharedMultiOutputKernelWithFeatureDecomposition",
+    "SeparateMultiOutputKernelWithFeatureDecomposition",
+]
diff --git a/...ling/kernel_with_feature_decomposition.py → ...nels/kernel_with_feature_decomposition.py b/...ling/kernel_with_feature_decomposition.py → ...nels/kernel_with_feature_decomposition.py
@@ -58,10 +58,10 @@ def __init__(
         :param feature_functions: A Keras layer for which the call evaluates the
             ``L`` features of the kernel :math:`\phi_i(\cdot)`. For ``X`` with the shape ``[N, D]``,
             ``feature_functions(X)`` returns a tensor with the shape ``[N, L]``.
-        :param feature_coefficients: A tensor with the shape ``[L, 1]`` with coefficients
+        :param feature_coefficients: A tensor with the shape ``[L, 1]`'  with coefficients
             associated with the features, :math:`\lambda_i`.
         """
-        self._feature_functions = feature_functions
+        self._feature_functions = feature_functions  # [N, L]
         self._feature_coefficients = feature_coefficients  # [L, 1]
 
     def K(self, X: TensorType, X2: Optional[TensorType] = None) -> tf.Tensor:
@@ -72,19 +72,23 @@ def K(self, X: TensorType, X2: Optional[TensorType] = None) -> tf.Tensor:
         else:
             phi2 = self._feature_functions(X2)  # [N2, L]
 
-        r = tf.matmul(
-            phi, tf.transpose(self._feature_coefficients) * phi2, transpose_b=True
+        r = tf.linalg.matmul(
+            phi,
+            tf.linalg.matrix_transpose(self._feature_coefficients) * phi2,
+            transpose_b=True,
         )  # [N, N2]
 
         N1, N2 = tf.shape(phi)[0], tf.shape(phi2)[0]
+
         tf.debugging.assert_equal(tf.shape(r), [N1, N2])
         return r
 
     def K_diag(self, X: TensorType) -> tf.Tensor:
         """Approximate the true kernel by an inner product between feature functions."""
         phi_squared = self._feature_functions(X) ** 2  # [N, L]
-        r = tf.reduce_sum(phi_squared * tf.transpose(self._feature_coefficients), axis=1)  # [N,]
-        N = tf.shape(X)[0]
+        r = tf.reduce_sum(phi_squared * tf.transpose(self._feature_coefficients), axis=-1)  # [N,]
+        N = tf.shape(X)[0] if tf.experimental.numpy.ndim(X) == 1 else tf.shape(X)[0]
+
         tf.debugging.assert_equal(tf.shape(r), [N])  # noqa: E231
         return r
 
@@ -156,8 +160,9 @@ def __init__(
         else:
             self._kernel = kernel
 
-        self._feature_functions = feature_functions
+        self._feature_functions = feature_functions  # [N, L]
         self._feature_coefficients = feature_coefficients  # [L, 1]
+
         tf.ensure_shape(self._feature_coefficients, tf.TensorShape([None, 1]))
 
     @property

diff --git a/gpflux/feature_decomposition_kernels/multioutput/__init__.py b/gpflux/feature_decomposition_kernels/multioutput/__init__.py
@@ -0,0 +1,11 @@
+from .kernel_with_feature_decomposition import (
+    SeparateMultiOutputKernelWithFeatureDecomposition,
+    SharedMultiOutputKernelWithFeatureDecomposition,
+    _MultiOutputApproximateKernel,
+)
+
+__all__ = [
+    "_MultiOutputApproximateKernel",
+    "SharedMultiOutputKernelWithFeatureDecomposition",
+    "SeparateMultiOutputKernelWithFeatureDecomposition",
+]