Skip to content

Commit

Permalink
Emulators now use npz file format
Browse files Browse the repository at this point in the history
The emulators (both spectral and scalar output) are both saved as
`.npz` files. This addresses #2. The idea is that you then have an
extra (text file) with some useful metadata for the emulators.
  • Loading branch information
Jose Gomez-Dans committed Jun 1, 2018
1 parent 1ea25ed commit c5393af
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 54 deletions.
49 changes: 39 additions & 10 deletions gp_emulator/GaussianProcess.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
import random
import warnings

import numpy as np

import scipy.spatial.distance as dist
import random


def k_fold_cross_validation(X, K, randomise=False):
Expand All @@ -25,15 +27,15 @@ def k_fold_cross_validation(X, K, randomise=False):
yield training, validation


class GaussianProcess:
class GaussianProcess(object):
"""
A simple class for Gaussian Process emulation. Currently, it assumes
a squared exponential covariance function, but other covariance
functions ought to be possible and easy to implement.
"""

def __init__(self, inputs, targets):
def __init__(self, inputs=None, targets=None, emulator_file=None):
"""The inputs are the input vectors, whereas the targets are the
emulated model outputs.
Expand All @@ -46,10 +48,37 @@ def __init__(self, inputs, targets):
targets: array size Ntrain
The model outputs corresponding to the ``inputs`` training set
"""

self.inputs = inputs
self.targets = targets
if emulator_file is None:
if inputs is None:
raise ValueError("No emulator file given, inputs can't be empty")
if targets is None:
raise ValueError("No emulator file given, targets can't be empty")
else:
if inputs is not None:
raise ValueError("Can't have both emulator file and input")
if targets is not None:
raise ValueError("Can't have both emulator file and training")
self.inputs = inputs
self.targets = targets
(self.n, self.D) = self.inputs.shape

def _load_emulator(self, emulator_file):
"""Reads in emulator from npz file
We read in an emulator parameterisation from an npz file.
"""
emulator_f = np.load(emulator_file)
self.inputs = emulator_f['inputs']
self.targets = emulator_f['targets']
(self.n, self.D) = self.inputs.shape
self.theta = emulator_f['theta']
self._set_params(self.theta)

def save_emulator(self, emulator_file):
"""Save emulator to disk as npz FileExistsError
Saves an emulator to disk using an npz file.
"""
np.savez(emulator_file, inputs=self.inputs, targets=self.targets,
theta=self.theta)

def _prepare_likelihood(self):
"""
Expand Down Expand Up @@ -197,8 +226,8 @@ def _learn(self, theta0, verbose):
)
except np.linalg.LinAlgError:
warnings.warn(
"Optimisation resulted in linear algebra error. "
+ "Returning last loglikelihood calculated, but this is fishy",
"Optimisation resulted in linear algebra error. " +
"Returning last loglikelihood calculated, but this is fishy",
RuntimeWarning,
)
# theta_opt = [ self.current_theta, self.current_loglikelihood ]
Expand Down Expand Up @@ -293,8 +322,8 @@ def predict(self, testing, do_deriv=True, do_unc=True):
deriv = np.zeros((nn, self.D))
for d in range(self.D):
aa = (
self.inputs[:, d].flatten()[None, :]
- testing[:, d].flatten()[:, None]
self.inputs[:, d].flatten()[None, :] -
testing[:, d].flatten()[:, None]
)
c = a * aa.T
deriv[:, d] = expX[d] * np.dot(c.T, self.invQt)
Expand Down
47 changes: 3 additions & 44 deletions gp_emulator/multivariate_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
"""

# import h5py
import numpy as np
import matplotlib.pyplot as plt

import numpy as np

from .GaussianProcess import GaussianProcess


Expand Down Expand Up @@ -79,16 +80,6 @@ def __init__(
if X is None and y is None:
if dump.find(".h5") > 0 or dump.find(".hdf5") > 0:
raise IOError("I can't be bothered working with HDF5 files")

##f = h5py.File ( dump, 'r+')
##group = "%s_%03d_%03d_%03d" % ( model, sza, vza, raa )
##X = f[group + '/X_train'][:,:]
##y = f[group + '/y_train'][:,:]
##hyperparams = f[group+'/hyperparams'][:,:]
##thresh = f[group+'/thresh'].value
##basis_functions = f[group+"/basis_functions"][:,:]
##n_pcs = f[group+"/n_pcs"].value
##f.close()
elif dump.find(".npz"):
f = np.load(dump)
X = f["X"]
Expand All @@ -99,19 +90,8 @@ def __init__(
basis_functions = f["basis_functions"]
n_pcs = f["n_pcs"]
f.close()

else:
pass

# f = h5py.File ( dump, 'r+')
# group = "/%s_%03d_%03d_%03d" % ( model, sza, vza, raa )
# X = f[group + '/X_train'][:,:]
# y = f[group + '/y_train'][:,:]
# hyperparams = f[group+'/hyperparams'][:,:]
# thresh = f[group+'/thresh'].value
# basis_functions = f[group+"/basis_functions"][:,:]
# n_pcs = f[group+"/n_pcs"].value
# f.close()
else:
raise ValueError("You specified both a dump file and X and y")
else:
Expand All @@ -127,7 +107,7 @@ def __init__(
self.y_train = y
self.thresh = thresh
if basis_functions is None:
print("Decomposing the input dataset into basis functions...", end=" ")
print("Decomposing the input dataset into basis functions...")
self.calculate_decomposition(X, thresh)
print("Done!\n ====> Using %d basis functions" % self.n_pcs)
basis_functions = self.basis_functions
Expand Down Expand Up @@ -160,27 +140,6 @@ def dump_emulator(self, fname, model_name, sza, vza, raa):
fname.find("h5") >= 0 or fname.find(".hdf") >= 0
):
raise IOError("I can't be bothered working with HDF5 files")
# try:
# f = h5py.File (fname, 'r+')
# except IOError:
# print "The file %s did not exist. Creating it" % fname
# f = h5py.File (fname, 'w')
# f
# group = '%s_%03d_%03d_%03d' % ( model_name, sza, vza, raa )
# if group in f.keys():
# raise ValueError, "Emulator already exists!"
# f.create_group ("/%s" % group )
# f.create_dataset ( "/%s/X_train" % group, data=self.X_train, compression="gzip" )
# f.create_dataset ( "/%s/y_train" % group, data=self.y_train, compression="gzip" )
# f.create_dataset ( "/%s/hyperparams" % group, data=self.hyperparams,
# compression="gzip" )
# f.create_dataset ( "/%s/basis_functions" % group, data=self.basis_functions,
# compression="gzip" )
# f.create_dataset ( "/%s/thresh" % group, data=self.thresh )
# f.create_dataset ( "/%s/n_pcs" % group, data=self.n_pcs)
# f.close()
# print "Emulator safely saved"

else:
np.savez_compressed(
fname,
Expand Down

0 comments on commit c5393af

Please sign in to comment.