From c5393af2c6ef8b4cd004fe986633fc1f08a2f81f Mon Sep 17 00:00:00 2001 From: Jose Gomez-Dans Date: Fri, 1 Jun 2018 15:14:19 +0100 Subject: [PATCH] Emulators now use npz file format The emulators (both spectral and scalar output) are both saved as `.npz` files. This addresses #2. The idea is that you then have an extra (text file) with some useful metadata for the emulators. --- gp_emulator/GaussianProcess.py | 49 +++++++++++++++++++++++++++------- gp_emulator/multivariate_gp.py | 47 +++----------------------------- 2 files changed, 42 insertions(+), 54 deletions(-) diff --git a/gp_emulator/GaussianProcess.py b/gp_emulator/GaussianProcess.py index 27737cf..e943941 100644 --- a/gp_emulator/GaussianProcess.py +++ b/gp_emulator/GaussianProcess.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- +import random import warnings + import numpy as np + import scipy.spatial.distance as dist -import random def k_fold_cross_validation(X, K, randomise=False): @@ -25,7 +27,7 @@ def k_fold_cross_validation(X, K, randomise=False): yield training, validation -class GaussianProcess: +class GaussianProcess(object): """ A simple class for Gaussian Process emulation. Currently, it assumes a squared exponential covariance function, but other covariance @@ -33,7 +35,7 @@ class GaussianProcess: """ - def __init__(self, inputs, targets): + def __init__(self, inputs=None, targets=None, emulator_file=None): """The inputs are the input vectors, whereas the targets are the emulated model outputs. @@ -46,10 +48,37 @@ def __init__(self, inputs, targets): targets: array size Ntrain The model outputs corresponding to the ``inputs`` training set """ - - self.inputs = inputs - self.targets = targets + if emulator_file is None: + if inputs is None: + raise ValueError("No emulator file given, inputs can't be empty") + if targets is None: + raise ValueError("No emulator file given, targets can't be empty") + else: + if inputs is not None: + raise ValueError("Can't have both emulator file and input") + if targets is not None: + raise ValueError("Can't have both emulator file and training") + self.inputs = inputs + self.targets = targets + (self.n, self.D) = self.inputs.shape + + def _load_emulator(self, emulator_file): + """Reads in emulator from npz file + We read in an emulator parameterisation from an npz file. + """ + emulator_f = np.load(emulator_file) + self.inputs = emulator_f['inputs'] + self.targets = emulator_f['targets'] (self.n, self.D) = self.inputs.shape + self.theta = emulator_f['theta'] + self._set_params(self.theta) + + def save_emulator(self, emulator_file): + """Save emulator to disk as npz FileExistsError + Saves an emulator to disk using an npz file. + """ + np.savez(emulator_file, inputs=self.inputs, targets=self.targets, + theta=self.theta) def _prepare_likelihood(self): """ @@ -197,8 +226,8 @@ def _learn(self, theta0, verbose): ) except np.linalg.LinAlgError: warnings.warn( - "Optimisation resulted in linear algebra error. " - + "Returning last loglikelihood calculated, but this is fishy", + "Optimisation resulted in linear algebra error. " + + "Returning last loglikelihood calculated, but this is fishy", RuntimeWarning, ) # theta_opt = [ self.current_theta, self.current_loglikelihood ] @@ -293,8 +322,8 @@ def predict(self, testing, do_deriv=True, do_unc=True): deriv = np.zeros((nn, self.D)) for d in range(self.D): aa = ( - self.inputs[:, d].flatten()[None, :] - - testing[:, d].flatten()[:, None] + self.inputs[:, d].flatten()[None, :] - + testing[:, d].flatten()[:, None] ) c = a * aa.T deriv[:, d] = expX[d] * np.dot(c.T, self.invQt) diff --git a/gp_emulator/multivariate_gp.py b/gp_emulator/multivariate_gp.py index 88f87dc..ba93e8d 100644 --- a/gp_emulator/multivariate_gp.py +++ b/gp_emulator/multivariate_gp.py @@ -28,9 +28,10 @@ """ # import h5py -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from .GaussianProcess import GaussianProcess @@ -79,16 +80,6 @@ def __init__( if X is None and y is None: if dump.find(".h5") > 0 or dump.find(".hdf5") > 0: raise IOError("I can't be bothered working with HDF5 files") - - ##f = h5py.File ( dump, 'r+') - ##group = "%s_%03d_%03d_%03d" % ( model, sza, vza, raa ) - ##X = f[group + '/X_train'][:,:] - ##y = f[group + '/y_train'][:,:] - ##hyperparams = f[group+'/hyperparams'][:,:] - ##thresh = f[group+'/thresh'].value - ##basis_functions = f[group+"/basis_functions"][:,:] - ##n_pcs = f[group+"/n_pcs"].value - ##f.close() elif dump.find(".npz"): f = np.load(dump) X = f["X"] @@ -99,19 +90,8 @@ def __init__( basis_functions = f["basis_functions"] n_pcs = f["n_pcs"] f.close() - else: pass - - # f = h5py.File ( dump, 'r+') - # group = "/%s_%03d_%03d_%03d" % ( model, sza, vza, raa ) - # X = f[group + '/X_train'][:,:] - # y = f[group + '/y_train'][:,:] - # hyperparams = f[group+'/hyperparams'][:,:] - # thresh = f[group+'/thresh'].value - # basis_functions = f[group+"/basis_functions"][:,:] - # n_pcs = f[group+"/n_pcs"].value - # f.close() else: raise ValueError("You specified both a dump file and X and y") else: @@ -127,7 +107,7 @@ def __init__( self.y_train = y self.thresh = thresh if basis_functions is None: - print("Decomposing the input dataset into basis functions...", end=" ") + print("Decomposing the input dataset into basis functions...") self.calculate_decomposition(X, thresh) print("Done!\n ====> Using %d basis functions" % self.n_pcs) basis_functions = self.basis_functions @@ -160,27 +140,6 @@ def dump_emulator(self, fname, model_name, sza, vza, raa): fname.find("h5") >= 0 or fname.find(".hdf") >= 0 ): raise IOError("I can't be bothered working with HDF5 files") - # try: - # f = h5py.File (fname, 'r+') - # except IOError: - # print "The file %s did not exist. Creating it" % fname - # f = h5py.File (fname, 'w') - # f - # group = '%s_%03d_%03d_%03d' % ( model_name, sza, vza, raa ) - # if group in f.keys(): - # raise ValueError, "Emulator already exists!" - # f.create_group ("/%s" % group ) - # f.create_dataset ( "/%s/X_train" % group, data=self.X_train, compression="gzip" ) - # f.create_dataset ( "/%s/y_train" % group, data=self.y_train, compression="gzip" ) - # f.create_dataset ( "/%s/hyperparams" % group, data=self.hyperparams, - # compression="gzip" ) - # f.create_dataset ( "/%s/basis_functions" % group, data=self.basis_functions, - # compression="gzip" ) - # f.create_dataset ( "/%s/thresh" % group, data=self.thresh ) - # f.create_dataset ( "/%s/n_pcs" % group, data=self.n_pcs) - # f.close() - # print "Emulator safely saved" - else: np.savez_compressed( fname,