diff --git a/GP_preference_demo.py b/GP_preference_demo.py
index ee18e19..49c8933 100644
--- a/GP_preference_demo.py
+++ b/GP_preference_demo.py
@@ -3,68 +3,127 @@
 import matplotlib.pyplot as plt
 import GPpref
 import scipy.optimize as op
+import plot_tools as ptt
+import test_data
+import yaml
+# from scipy.stats import beta
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
 
-log_hyp = np.log([0.1,1.0,0.1]) # length_scale, sigma_f, sigma_probit
-np.random.seed(1)
-
-n_train = 20
-true_sigma = 0.05
-delta_f = 1e-5
-
-# Define polynomial function to be modelled
-def true_function(x):
-    y = np.sin(x*2*np.pi + np.pi/4) + 0.2
-    return y
-
-# Define noisy observation function
-def obs_function(x, sigma):
-    fx = true_function(x)
-    noise = np.random.normal(scale=sigma, size=x.shape)
-    return fx + noise
-    
-def noisy_preference_rank(uv, sigma):
-    fuv = obs_function(uv,sigma)
-    y = -1*np.ones((fuv.shape[0],1),dtype='int')
-    y[fuv[:,1] > fuv[:,0]] = 1
-    return y, fuv
-
-# Main program
-# Plot true function
-x_plot = np.linspace(0.0,1.0,101,dtype='float')
-y_plot = true_function(x_plot)
+train_hyper = False
+use_test_data = False # test_data.data3 #
+verbose = 2
+
+with open('./data/statruns_2D.yaml', 'rt') as fh:
+    wave = yaml.safe_load(fh)
+try:
+    np.random.seed(wave['statrun_params']['randseed'])
+except KeyError:
+    np.random.seed(0)
+d_x = wave['GP_params']['hyper_counts'][0]-1
+random_wave = test_data.MultiWave(n_dimensions=d_x, **wave['wave_params'])
+log_hyp = np.log(wave['hyperparameters'])
+
+n_rel_train = 5
+n_abs_train = 5
+
+n_xplot = 31
+n_posterior_samples = 3
+
+random_wave.print_values()
+true_function = random_wave.out
+
+rel_obs_fun = GPpref.RelObservationSampler(true_function, wave['GP_params']['rel_likelihood'], wave['rel_obs_params'])
+abs_obs_fun = GPpref.AbsObservationSampler(true_function, wave['GP_params']['abs_likelihood'], wave['abs_obs_params'])
+
+# True function
+# This is a complicated (but memory efficient, maybe?) way to generate list of all grid points, there's probably a
+# better (itertools way)
+x_plot = np.linspace(0.0,1.0,n_xplot,dtype='float')
+x_test = ptt.make_meshlist(x_plot, d_x)
+f_true = abs_obs_fun.f(x_test)
+mu_true = abs_obs_fun.mean_link(x_test)
+abs_y_samples = abs_obs_fun.l.y_list
+p_abs_y_true = abs_obs_fun.observation_likelihood_array(x_test, abs_y_samples)
+if d_x is 1:
+    p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test)
 
 # Training data - this is a bit weird, but we sample x values, then the uv pairs
 # are actually indexes into x, because it is easier computationally. You can 
 # recover the actual u,v values using x[ui],x[vi]
-x_train = np.random.random((2*n_train,1))
-uvi_train = np.random.choice(range(2*n_train), (n_train,2), replace=False)
-uv_train = x_train[uvi_train][:,:,0]
+if use_test_data:
+    x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs = use_test_data()
+else:
+    x_rel, uvi_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(n_rel_train, n_xdim=d_x)
+    x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(n_abs_train, n_xdim=d_x)
+
+# Construct GP object
+wave['GP_params']['verbose'] = verbose
+model_kwargs = {'x_rel':x_rel, 'uvi_rel':uvi_rel, 'x_abs':x_abs, 'y_rel':y_rel, 'y_abs':y_abs,
+                'rel_kwargs': wave['rel_obs_params'], 'abs_kwargs': wave['abs_obs_params']}
+model_kwargs.update(wave['GP_params'])
+prefGP = GPpref.PreferenceGaussianProcess(**model_kwargs)
+
+prefGP.set_hyperparameters(log_hyp)
+# If training hyperparameters, use external optimiser
+if train_hyper:
+    log_hyp = op.fmin(prefGP.calc_nlml,log_hyp)
+
+f = prefGP.calc_laplace(log_hyp)
+prefGP.print_hyperparameters()
 
-# Get noisy observations f(uv) and corresponding ranks y_train
-y_train, fuv_train = noisy_preference_rank(uv_train,true_sigma)
+# Latent predictions
+fhat, vhat = prefGP.predict_latent(x_test)
 
-hf,ha = plt.subplots(1,1)
-ha.plot(x_plot,y_plot,'r-')
-for uv,fuv,y in zip(uv_train, fuv_train, y_train):
-    ha.plot(uv, fuv, 'b-')
-    ha.plot(uv[(y+1)/2],fuv[(y+1)/2],'k+')
+# Posterior likelihoods
+p_abs_y_post, E_y = prefGP.abs_posterior_likelihood(abs_y_samples, fhat=fhat, varhat=vhat)
 
-ha.set_title('Training data')
-ha.set_ylabel('f(x)')
-ha.set_xlabel('x')
+wrms = test_data.wrms(mu_true, E_y)
+wrms2 = test_data.wrms_misclass(mu_true, E_y)
 
+if d_x is 1:
+    p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test)
+    p_rel_y_post = prefGP.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+else:
+    p_rel_y_true, p_rel_y_post = None, None
 
-# GP hyperparameters
-# note: using log scaling to aid learning hyperparameters with varied magnitudes
+if d_x <= 2:
+    # Plot true functions
+    fig_t, ax_t = ptt.true_plots(x_test, f_true, mu_true, wave['rel_obs_params']['sigma'],
+                                                     abs_y_samples, p_abs_y_true, p_rel_y_true,
+                                                     t_l=r'True latent function, $f(x)$')
 
-prefGP = GPpref.PreferenceGaussianProcess(x_train, uvi_train, y_train, delta_f=delta_f)
+    # Posterior estimates
+    fig_p, ax_p, h_plotobj = \
+        ptt.estimate_plots(x_test, f_true, mu_true, fhat, vhat, E_y, wave['rel_obs_params']['sigma'],
+                           abs_y_samples, p_abs_y_post, p_rel_y_post,
+                           x_abs, y_abs, x_rel[uvi_rel], fuv_rel, y_rel, n_posterior_samples=n_posterior_samples,
+                           posterior_plot_kwargs={'color':'grey', 'ls':'--'},
+                           t_l=r'$\mathcal{GP}$ latent function estimate $\hat{f}(x)$',
+                           t_a=r'Posterior absolute likelihood, $p(u | \mathcal{Y}, \theta)$',
+                           t_r=r'Posterior relative likelihood $P(x_0 \succ x_1 | \mathcal{Y}, \theta)$')
+    if d_x == 1:
+        p_err = test_data.rel_error(mu_true, p_rel_y_true, E_y, p_rel_y_post, weight=False)
+        print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}, p_err: {2:0.3f}".format(wrms, wrms2, p_err)
+    else:
+        print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}".format(wrms, wrms2)
+    plt.show(block=False)
 
-# Pseudocode:
-# FOr a set of hyperparameters, return log likelihood that can be used by an optimiser
-theta0 = log_hyp
+else:
+    print "Input state space dimension: {0} is too high to plot".format(d_x)
+    print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}".format(wrms, wrms2)
 
-# log_hyp = op.fmin(prefGP.calc_nlml,theta0)
-f,lml = prefGP.calc_laplace(log_hyp)
 
-ha.plot(x_train, f, 'g^')
-plt.show()
+## SCRAP
+# p_y = np.zeros((n_ysamples, n_xplot))
+# y_samples = np.linspace(0.01, 0.99, n_ysamples)
+# iny = 1.0/n_ysamples
+# E_y2 = np.zeros(n_xplot)
+#
+# normal_samples = np.random.normal(size=n_mcsamples)
+# for i,(fstar,vstar) in enumerate(zip(fhat, vhat.diagonal())):
+#     f_samples = normal_samples*vstar+fstar
+#     aa, bb = prefGP.abs_likelihood.get_alpha_beta(f_samples)
+#     p_y[:, i] = [iny*np.sum(beta.pdf(yj, aa, bb)) for yj in y_samples]
+#     p_y[:, i] /= np.sum(p_y[:, i])
+#     E_y2[i] = np.sum(np.dot(y_samples, p_y[:, i]))
\ No newline at end of file
diff --git a/GP_regression_demo.py b/GP_regression_demo.py
index 4580219..fba2edf 100644
--- a/GP_regression_demo.py
+++ b/GP_regression_demo.py
@@ -3,52 +3,87 @@
 import scipy.optimize as op
 import matplotlib.pyplot as plt
 import GPr
-np.random.seed(0)
+import plot_tools as ptt
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
+
+f_sigma = 0.15
+n_samples = 10
+r_seed = 1
+n_posterior_samples = 3
+optimise_hp = False
 
 # Define polynomial function to be modelled
 def true_function(x):
-    c = np.array([3,5,-9,-3,2],float)
-    y = np.polyval(c,x)
+    # c = np.array([3,5,-9,-3,2],float)
+    # y = np.polyval(c,x)
+    y = np.sin(x*2*np.pi)
     return y
 
+
 # Define noisy observation function
-def obs_function(x):
-    y = true_function(x) + np.random.normal(0,0.1,len(x))
+def obs_function(x, sigma):
+    y = true_function(x) + np.random.normal(0, sigma, size=x.shape)
     return y
 
+
 # Main program
 # Plot true function
-x_plot = np.arange(0,1,0.01,float)
-y_plot = true_function(x_plot)
+x_plot = np.atleast_2d(np.arange(-0.5, 1.5, 0.01, dtype='float')).T
+fx_plot = true_function(x_plot)
 plt.figure()
-plt.plot(x_plot,y_plot,'k-')
+plt.plot(x_plot, fx_plot, 'k-')
 
 # Training data
-x_train = np.random.random(20)
-y_train = obs_function(x_train)
-plt.plot(x_train,y_train,'rx')
+np.random.seed(r_seed)
+x_train = np.atleast_2d(0.6*np.random.random(n_samples)+0.2).T
+# x_train = np.array([-10.0])
+y_train = obs_function(x_train, f_sigma)
+plt.plot(x_train, y_train, 'rx')
 
 # Test data
-x_test = np.arange(0,1,0.01,float)
+x_test = x_plot
 
 # GP hyperparameters
 # note: using log scaling to aid learning hyperparameters with varied magnitudes
-log_hyp = np.log([1,1,0.1])
+log_hyp = np.log([0.3, 1.0, 0.2])
 mean_hyp = 0
 like_hyp = 0
 
 # Initialise GP for hyperparameter training
-initGP = GPr.GaussianProcess(log_hyp,mean_hyp,like_hyp,"SE","zero","zero",x_train,y_train)
+initGP = GPr.GaussianProcess(log_hyp, mean_hyp, like_hyp, "SE", "zero", "zero", x_train, y_train)
 
 # Run optimisation routine to learn hyperparameters
-opt_log_hyp = op.fmin(initGP.compute_likelihood,log_hyp)
+if optimise_hp:
+    opt_log_hyp = op.fmin(initGP.compute_likelihood, log_hyp)
+else:
+    opt_log_hyp = log_hyp
 
-# Learnt GP with optimised hyperparameters
+# Learned GP with optimised hyperparameters
 optGP = GPr.GaussianProcess(opt_log_hyp,mean_hyp,like_hyp,"SE","zero","zero",x_train,y_train)
-y_test,cov_y = optGP.compute_prediction(x_test)
+fhat_test, fhat_var = optGP.compute_prediction(x_test)
+
+h_fig,h_ax = plt.subplots()
+h_fx, patch_fx = ptt.plot_with_bounds(h_ax, x_plot, fx_plot, f_sigma, c=ptt.lines[0])
+h_y, = h_ax.plot(x_train, y_train, 'rx', mew=1.0, ms=8)
+h_fhat, patch_fhat = ptt.plot_with_bounds(h_ax, x_plot, fhat_test, np.sqrt(fhat_var.diagonal())[np.newaxis].T, c=ptt.lines[1], ls='--')
+f_post = np.random.multivariate_normal(fhat_test.flatten(), fhat_var, n_posterior_samples)
+h_pp = h_ax.plot(x_plot, f_post.T, '--', color='grey', lw=0.8)
+h_ax.autoscale()
+
+# h_pp = h_ax.plot(x_plot, y_post.T, c='grey', ls='--', lw=0.8)
+
+# h_ax.set_ylim([-3, 3])
+gp_str = '$\hat{{f}}(x)|\mathcal{{Y}} \sim \mathcal{{GP}}(\mathbf{{0}}, K_{{SE}}:l={0:0.2f}, \sigma_f={1:0.2f}, \sigma_n={2:0.2f})$'
+gp_l, gp_sigf, gp_sign = optGP.covFun.hyp
+gp_str = gp_str.format(gp_l, gp_sigf, gp_sign)
+h_ax.legend((h_fx, h_y, h_fhat),('$f(x)$', '$y \sim \mathcal{{N}}(f(x), {0:0.1f}^2)$'.format(f_sigma), gp_str), loc='best')
+h_ax.set_xlabel('$x$')
+#h_fig.savefig('fig/regression_example.pdf', bbox_inches='tight', transparent='true')
+
 
 # Plot true and modelled functions
-plt.plot(x_test,y_test,'b-')
-plt.plot(x_test,y_test+np.sqrt(cov_y),'g--')
-plt.plot(x_test,y_test-np.sqrt(cov_y),'g--')
-plt.show()
+# plt.plot(x_test,y_test,'b-')
+# plt.plot(x_test,y_test+np.sqrt(cov_y),'g--')
+# plt.plot(x_test,y_test-np.sqrt(cov_y),'g--')
+plt.show(block=False)
diff --git a/GPpref.py b/GPpref.py
index f19ccd2..2c873a8 100644
--- a/GPpref.py
+++ b/GPpref.py
@@ -1,13 +1,34 @@
 # Simple 1D GP classification example
 import numpy as np
 import GPy
-from scipy.special import ndtr as std_norm_cdf
+#from scipy.special import ndtr as std_norm_cdf, digamma, polygamma
+from scipy.special import digamma, polygamma
+from scipy.stats import norm, beta
+from scipy.linalg import block_diag
+import sys
 
-#define a standard normal pdf
+class LaplaceException(Exception):
+    pass
+
+# Define a standard normal pdf
 _sqrt_2pi = np.sqrt(2*np.pi)
 def std_norm_pdf(x):
     x = np.clip(x,-1e150,1e150)
-    return np.exp(-(x**2)/2)/_sqrt_2pi
+    return norm.pdf(x)
+    # return np.exp(-(x**2)/2)/_sqrt_2pi
+
+
+def std_norm_cdf(x):
+    x = np.clip(x, -30, 100 )
+    return norm.cdf(x)
+
+
+def norm_pdf_norm_cdf_ratio(z):
+    # Inverse Mills ratio for stability
+    out = -z
+    out[z>-30] = std_norm_pdf(z[z>-30])/std_norm_cdf(z[z>-30])
+    return out
+
 
 # Define squared distance calculation function
 def squared_distance(A,B):
@@ -21,6 +42,15 @@ def squared_distance(A,B):
     sqDist = A2 + B2 - AB
     return sqDist
 
+
+def print_hyperparameters(theta, log=False):
+    if log is True:
+        theta = np.exp(theta)
+    nl = len(theta)-3
+    lstr = ', '.join(['%.2f']*nl) % tuple(theta[:nl])
+    print "l: {0}, sig_f: {1:0.2f}, sig: {2:0.2f}, v: {3:0.2f}".format(lstr, theta[-3], theta[-2], theta[-1])
+
+
 # Define squared exponential CovarianceFunction function
 class SquaredExponential(object):
     def __init__(self, logHyp, x):
@@ -44,17 +74,30 @@ def compute_Kxz_matrix(self, z):
         return Kxz
 
 class PrefProbit(object):
+    type = 'preference'
+    y_type = 'discrete'
+    y_list = np.array([[-1], [1]], dtype='int')
+
     def __init__(self, sigma=1.0):
-        self.set_sigma(sigma)
+        self.set_hyper([sigma])
         self.log2pi = np.log(2.0*np.pi)
 
+    def set_hyper(self, hyper):
+        self.set_sigma(hyper[0])
+
     def set_sigma(self, sigma):
         self.sigma = sigma
         self._isqrt2sig = 1.0 / (self.sigma * np.sqrt(2.0))
         self._i2var =  self._isqrt2sig**2
+        
+    def print_hyperparameters(self):
+        print "Probit relative, Gaussian noise on latent. ",
+        print "Sigma: {0:0.2f}".format(self.sigma)
 
-    def z_k(self, uvi, f, y):
-        zc = self._isqrt2sig * (f[uvi[:, 1]] - f[uvi[:, 0]])
+    def z_k(self, y, f, scale=None):
+        if scale is None:
+            scale = self._isqrt2sig
+        zc = scale * (f[:, 1, None] - f[:, 0, None]) # Weird None is to preserve shape
         return y * zc
 
     def I_k(self, x, uv):  # Jensen and Nielsen
@@ -67,17 +110,21 @@ def I_k(self, x, uv):  # Jensen and Nielsen
 
     def derivatives(self, uvi, y, f):
         nx = len(f)
-        z = self.z_k(uvi, f, y)
-        phi_z = std_norm_cdf(z)
-        N_z = std_norm_pdf(z)
+        z = self.z_k(y=y, f=self.get_rel_f(f, uvi))
+        N_over_phi = norm_pdf_norm_cdf_ratio(z)
+        # phi_z = std_norm_cdf(z)
+        # N_z = std_norm_pdf(z)
 
         # First derivative (Jensen and Nielsen)
         dpy_df = np.zeros((nx, 1), dtype='float')
-        dpyuv_df = y * self._isqrt2sig * N_z / phi_z
-        dpy_df[uvi[:, 0]] += -dpyuv_df  # This implements I_k (note switch because Jensen paper has funky backwards z_k)
-        dpy_df[uvi[:, 1]] += dpyuv_df
+        dpyuv_df = y * self._isqrt2sig * N_over_phi # N_z / phi_z
+        for i, (uvii, uvij) in enumerate(uvi):
+            dpy_df[uvii] += -dpyuv_df[i]  ## ^^FIXED BIT - THANE LOOK HERE^^
+            dpy_df[uvij] += dpyuv_df[i]  # This implements I_k (note switch because Jensen paper has funky backwards z_k)
+        # dpy_df[uvi[:, 0]] += -dpyuv_df  # NOTE: THESE TWO LINES ARE INCORRECT FOR REPEATED INDEXES!!!!!
+        # dpy_df[uvi[:, 1]] += dpyuv_df  # This implements I_k (note switch because Jensen paper has funky backwards z_k)
 
-        inner = -self._i2var * (z * N_z / phi_z + (N_z / phi_z) ** 2)
+        inner = -self._i2var * (z * N_over_phi + (N_over_phi) ** 2)
         W = np.zeros((nx, nx), dtype='float')
         for uvik, ddpy_df in zip(uvi, inner):
             xi, yi = uvik
@@ -85,63 +132,417 @@ def derivatives(self, uvi, y, f):
             W[yi, yi] -= ddpy_df  # If x_i = x_i = v_k then I(x_i)*I(y_i) = -1*-1 = 1
             W[xi, yi] -= -ddpy_df  # Otherwise, I(x_i)*I(y_i) = -1*1 = -1
             W[yi, xi] -= -ddpy_df
-        return W, dpy_df
 
-    def log_marginal(self, uvi, y, f, iK, logdetK):
-        z = self.z_k(uvi, f, y)
+        py = np.log(std_norm_cdf(z))
+        return W, dpy_df, py
+
+    def get_rel_f(self, f, uvi):
+        return np.hstack((f[uvi[:, 0]], f[uvi[:, 1]]))
+
+    def likelihood(self, y, f, scale=None):
+        z = self.z_k(y, f, scale=scale)
         phi_z = std_norm_cdf(z)
-        psi = np.sum(np.log(phi_z)) - 0.5 * np.matmul(np.matmul(f.T, iK), f) - 0.5*logdetK - iK.shape[0]/2.0*self.log2pi
-        return psi.flat[0]
+        return phi_z
+
+    def log_likelihood(self, y, f):
+        return np.log(self.likelihood(y, f))
+
+    def posterior_likelihood(self, fhat, varhat, uvi, y=1): # This is the likelihood assuming a Gaussian over f
+        var_star = 2*self.sigma**2 + np.atleast_2d([varhat[u, u] + varhat[v, v] - varhat[u, v] - varhat[v, u] for u,v in uvi]).T
+        p_y = self.likelihood(y, self.get_rel_f(fhat, uvi), 1.0/np.sqrt(var_star))
+        return p_y
+
+    def generate_samples(self, f):
+        fuv = f + np.random.normal(scale=self.sigma, size=f.shape)
+        y = -1 * np.ones((fuv.shape[0], 1), dtype='int')
+        y[fuv[:, 1] > fuv[:, 0]] = 1
+        return y, fuv
+
+
+class OrdinalProbit(object):
+    type = 'categorical'
+    y_type = 'discrete'
+
+    def __init__(self, sigma=1.0, b=1.0, n_ordinals=5, eps=1.0e-10):
+        self.n_ordinals=n_ordinals
+        self.set_hyper([sigma, b])
+        self.eps = eps
+        self.y_list = np.atleast_2d(np.arange(1, self.n_ordinals+1, 1, dtype='int')).T
+
+    def set_hyper(self, hyper):
+        self.set_sigma(hyper[0])
+        self.set_b(hyper[1])
+
+    def set_b(self, b):
+        if not hasattr(b, "__len__"):
+            b = abs(b)
+            self.b = np.hstack(([-np.Inf],np.linspace(-b, b, self.n_ordinals-1), [np.Inf]))
+        elif len(b) == self.n_ordinals+1:
+            self.b = b
+        elif len(b) == self.n_ordinals-1:
+            self.b = np.hstack(([-np.Inf], b, [np.Inf]))
+        else:
+            raise ValueError('Specified b should be a scalar or vector of breakpoints')
+
+    def set_sigma(self, sigma):
+        self.sigma = sigma
+        self._isigma = 1.0/self.sigma
+        self._ivar = self._isigma**2
+
+    def print_hyperparameters(self):
+        print "Ordinal probit, {0} ordered categories.".format(self.n_ordinals),
+        print "Sigma: {0:0.2f}, b: ".format(self.sigma),
+        print self.b
+
+    def z_k(self, y, f):
+        return self._isigma*(self.b[y] - f)
+
+    def norm_pdf(self, y, f):
+        f = f*np.ones(y.shape, dtype='float')       # This ensures the number of f values matches the number of y
+        out = np.zeros(y.shape, dtype='float')
+        for i in range(out.shape[0]):
+            if y[i] != 0 and y[i] != self.n_ordinals:  # b0 = -Inf -> N(-Inf) = 0
+                z = self._isigma*(self.b[y[i]] - f[i])
+                out[i] = std_norm_pdf(z)
+        return out
+
+    def norm_cdf(self, y, f, var_x=0.0):
+        ivar = self._isigma + var_x
+        f = f*np.ones(y.shape, dtype='float')
+        out = np.zeros(y.shape, dtype='float')
+        for i in range(out.shape[0]):
+            if y[i][0] == self.n_ordinals:
+                out[i] = 1.0
+            elif y[i][0] != 0:
+                z = ivar*(self.b[y[i]] - f[i])
+                out[i] = std_norm_cdf(z)
+        return out
+
+    def z_pdf(self, y, f):
+        f = f*np.ones(y.shape, dtype='float')
+        out = np.zeros(y.shape, dtype='float')
+        for i in range(out.shape[0]):
+            if y[i] != 0 and y[i] != self.n_ordinals:  # b0 = -Inf -> N(-Inf) = 0
+                z = self._isigma*(self.b[y[i]] - f[i])
+                out[i] = z*std_norm_pdf(z)
+        return out
+
+
+    def likelihood(self, y, f):
+        return self.norm_cdf(y, f) - self.norm_cdf(y-1, f)
+
+    def log_likelihood(self, y, f):
+        return np.log(self.likelihood(y, f))
+
+    def derivatives(self, y, f):
+
+        l = self.likelihood(y, f)
+        py = np.log(l)
+
+        # First derivative - Chu and Gharamani
+        # Having issues with derivative (likelihood denominator drops to 0)
+        dpy_df = np.zeros(l.shape, dtype='float')
+        d2py_df2 = np.zeros(l.shape, dtype='float')
+        for i in range(l.shape[0]):
+            if l[i] < self.eps:
+                # l2 = self.likelihood(y[i], f[i]+self.delta_f)
+                # l0 = self.likelihood(y[i], f[i]-self.delta_f)
+                # dpy_df[i] = -(l2-l[i])/self.delta_f/l[i]      # (ln(f))' = f'/f
+                # d2py_df2[i] = (l2 - 2*l[i] + l0)/self.delta_f**2/dpy_df[i]/l[i]
+
+                if y[i] == 1:
+                    dpy_df[i] = self._isigma*self.z_k(y[i], f[i])
+                    d2py_df2[i] = -self._ivar
+                elif y[i] == self.n_ordinals:
+                    dpy_df[i] = self._isigma*self.z_k(y[i]-1, f[i])
+                    d2py_df2[i] = -self._ivar
+                else:
+                    z1 = self.z_k(y[i], f[i])
+                    z2 = self.z_k(y[i]-1, f[i])
+                    ep = np.exp(-0.5*(z1**2 - z2**2))
+                    dpy_df[i] = self._isigma*(z1*ep-z2)/(ep - 1.0)
+                    d2py_df2[i] = -(self._ivar*(1.0 - (z1**2 *ep - z2**2)/(ep - 1.0)) + dpy_df[i]**2)
+            else:
+                dpy_df[i] = -self._isigma*(self.norm_pdf(y[i], f[i]) - self.norm_pdf(y[i]-1, f[i])) / l[i]
+                d2py_df2[i] = -(dpy_df[i]**2 + self._ivar*(self.z_pdf(y[i], f[i]) - self.z_pdf(y[i]-1, f[i])) / l[i])
+
+        W = np.diagflat(-d2py_df2)
+        return W, dpy_df, py
+
+    def posterior_likelihood(self, fhat, var_star, y=None):
+        if y is None:
+            y = self.y_list
+        py = np.zeros((len(y), len(fhat)), dtype='float')
+        mu = np.zeros(fhat.shape, dtype='float')
+        for i, (f, v) in enumerate(zip(fhat, var_star.diagonal())):
+            py[:,[i]] = self.norm_cdf(y, f, v) - self.norm_cdf(y-1, f, v)
+            mu[i] = (py[:,[i]]*y).sum()
+        if len(y) != self.n_ordinals or any(y != self.y_list):
+            print "Specified y vector is not full ordinal set."
+            mu = self.posterior_mean(fhat, var_star)
+        return py, mu
+
+    def posterior_mean(self, fhat, var_star):
+        py, mu = self.posterior_likelihood(fhat, var_star)
+        return mu
+
+    def mean_link(self, f):
+        mu = np.zeros(f.shape, dtype='float')
+        for i in range(f.shape[0]):
+            py = self.likelihood(self.y_list, f[i])
+            mu[i] = (py*self.y_list).sum()       # Expected value, first moment
+        return mu
+
+    def generate_samples(self, f):
+        z = np.zeros(f.shape, dtype='int')
+        mu = np.zeros(f.shape, dtype='float')
+        for i in range(f.shape[0]):
+            py = self.likelihood(self.y_list, f[i])
+            mu[i] = (py*self.y_list).sum()       # Expected value, first moment
+            z[i] = np.sum(np.random.uniform() > py.cumsum())+1
+        return z, mu
+
+
+class AbsBoundProbit(object):
+    type = 'bounded continuous'
+    y_type = 'bounded'
+    y_list = np.atleast_2d(np.linspace(0.01, 0.99, 101)).T
+
+    def __init__(self, sigma=1.0, v=10.0):
+        # v is the precision, kind of related to inverse of noise, high v is sharp distributions
+        # sigma is the slope of the probit, basically scales how far away from 
+        # 0 the latent has to be to to move away from 0.5 output. Sigma should 
+        # basically relate to the range of the latent function
+        self.set_hyper([sigma, v])
+        self.log2pi = np.log(2.0*np.pi)
+
+    def set_hyper(self, hyper):
+        self.set_sigma(hyper[0])
+        self.set_v(hyper[1])
+
+    def set_sigma(self, sigma):
+        self.sigma = sigma
+        self._isqrt2sig = 1.0 / (self.sigma * np.sqrt(2.0))
+        self._i2var =  self._isqrt2sig**2
+
+    def set_v(self, v):
+        self.v = v
+        
+    def print_hyperparameters(self):
+        print "Beta distribution, probit mean link.",
+        print "Sigma: {0:0.2f}, v: {1:0.2f}".format(self.sigma, self.v)
+
+    def mean_link(self, f):
+        ml = np.clip(std_norm_cdf(f*self._isqrt2sig), 1e-12, 1.0-1e-12)
+        return ml
+
+    # def alpha(self, f):
+    #     return self.v * self.mean_link(f)
+    #
+    # def beta(self, f):
+    #     return self.v * (1-self.mean_link(f))
+
+    def get_alpha_beta(self, f):
+        ml = self.mean_link(f)
+        aa = self.v * ml
+        bb = self.v - aa    # = self.v * (1-ml)
+        return aa, bb
+
+    def likelihood(self, y, f):
+        aa, bb = self.get_alpha_beta(f)
+        return beta.pdf(y, aa, bb)
+
+    def log_likelihood(self, y, f):
+        return np.log(self.likelihood(y,f))
+
+    def cdf(self, y, f):
+        aa, bb = self.get_alpha_beta(f)
+        return beta.cdf(y, aa, bb)
+
+    def derivatives(self, y, f):
+
+        aa, bb = self.get_alpha_beta(f)
+
+        # Trouble with derivatives...
+        dpy_df = self.v*self._isqrt2sig*std_norm_pdf(f*self._isqrt2sig) * (np.log(y) - np.log(1-y) - digamma(aa) + digamma(bb))
+
+        Wdiag = - self.v*self._isqrt2sig*std_norm_pdf(f*self._isqrt2sig) * (
+            f * self._i2var * (np.log(y) - np.log(1.0-y) - digamma(aa) + digamma(bb)) +
+            self.v * self._isqrt2sig * std_norm_pdf(f*self._isqrt2sig) * (polygamma(1, aa) + polygamma(1, bb)) )
+
+        W = np.diagflat(Wdiag)
+
+        py = np.log(beta.pdf(y, aa, bb))
+
+        return -W, dpy_df, py
+
+    def posterior_likelihood(self, fhat, varhat, y=None, normal_samples=None): # This is MC sampled due to no closed_form solution
+        if normal_samples is None:
+            normal_samples = np.random.normal(size=1000)
+        iny = 1.0/len(normal_samples)
+
+        if y is None:
+            y = self.y_list
+
+        # Sampling from posterior to show likelihoods
+        p_y = np.zeros((len(y), fhat.shape[0]))
+        for i, (fstar, vstar) in enumerate(zip(fhat, varhat.diagonal())):
+            f_samples = normal_samples * vstar + fstar
+            p_y[:, i] = [iny * np.sum(self.likelihood(yj, f_samples)) for yj in y]
+        return p_y, self.posterior_mean(fhat, varhat)
+
+    def posterior_mean(self, fhat, var_star):
+        #mu_t = self.mean_link(fhat)
+        vv = np.atleast_2d(var_star.diagonal()).T
+        E_x = np.clip(std_norm_cdf(fhat/(np.sqrt(2*self.sigma**2 + vv))), 1e-12, 1.0-1e-12)
+        return E_x
+
+    def generate_samples(self, f):
+        mu = self.mean_link(f)
+        a, b = self.get_alpha_beta(f)
+        z = np.zeros(f.shape, dtype='float')
+        for i, (aa,bb) in enumerate(zip(a,b)):
+            z[i] = beta.rvs(aa, bb)
+        return z, mu
+
 
 class PreferenceGaussianProcess(object):
 
-    def __init__(self, x_train, uvi_train, y_train, likelihood=PrefProbit, delta_f = 1e-6):
-        # log_hyp are log of hyperparameters, note that it is [length_0, ..., length_d, sigma_f, sigma_probit]
-        self._xdim = x_train.shape[1]
-        self._nx = x_train.shape[0]
-        self.x_train = x_train
-        self.y_train = y_train
-        self.uvi_train = uvi_train
+    def __init__(self, x_rel, uvi_rel, x_abs, y_rel, y_abs, rel_likelihood='PrefProbit', delta_f=1.0e-5,
+                 abs_likelihood='AbsBoundProbit', verbose=False, hyper_counts=[2, 1, 2], rel_kwargs={}, abs_kwargs={}):
+        # hyperparameters are split by hyper_counts, where hyper_counts should contain three integers > 0, the first is
+        # the number of hypers in the GP covariance, second is the number in the relative likelihood, last is the number
+        # in the absolute likelihood. hyper_counts.sum() should be equal to len(log_hyp)
+        # log_hyp are log of hyperparameters, note that it is [length_0, ..., length_d, sigma_f, sigma_probit, v_beta]
+
+        # Training points are split into relative and absolute for calculating f, but combined for predictions.  
+        self.set_observations(x_rel, uvi_rel, x_abs, y_rel, y_abs)
+
         self.delta_f = delta_f
+        self.rel_likelihood = getattr(sys.modules[__name__], rel_likelihood)(**rel_kwargs) # This calls the constructor from string
+        self.abs_likelihood = getattr(sys.modules[__name__], abs_likelihood)(**abs_kwargs)
+
+        self.verbose = verbose
+
+        if self._xdim is not None:
+            self.kern = GPy.kern.RBF(self._xdim, ARD=True)
+
+        self.Ix = np.eye(self._nx)
 
-        self.likelihood = likelihood()
+        self.f = None
 
-        self.kern = GPy.kern.RBF(self._xdim, ARD=True)
+        self.hyper_counts = np.array(hyper_counts)
+        self.init_extras()
 
+    def init_extras(self):
+        pass
 
-    def calc_laplace(self, loghyp, f=None):
+    def set_observations(self, x_rel, uvi_rel, x_abs, y_rel, y_abs):
+        if x_rel.shape[0] is not 0:
+            self._xdim = x_rel.shape[1]
+        elif x_abs.shape[0] is not 0:
+            self._xdim = x_abs.shape[1]
+        else:
+            self._xdim = None
+            # raise Exception("No Input Points")
+        self._n_rel = x_rel.shape[0]
+        self._n_abs = x_abs.shape[0]
+        self.x_rel = x_rel
+        self.y_rel = y_rel
+        self.x_abs = x_abs
+        self.y_abs = y_abs
+        self.uvi_rel = uvi_rel
+
+
+        self.x_train_all = np.concatenate((self.x_rel, self.x_abs), 0)
+
+        self._nx = self.x_train_all.shape[0]
+        self.Ix = np.eye(self._nx)
+
+    def add_observations(self, x, y, uvi=None, keep_f=False):
+        # keep_f is used to reset the Laplace solution. If it's a small update, it's sometimes better to keep the old
+        # values and append some 0's for the new observations (keep_f = True), otherwise it is reset (keep_f = False)
+        # Default is keep_f = False
+        if self._xdim is None:
+            self._xdim = x.shape[1]
+            self.kern = GPy.kern.RBF(self._xdim, ARD=True)
+        if uvi is None: # Absolute observation/s
+            if keep_f:
+                self.f = np.vstack((self.f, np.zeros((x.shape[0], 1))))
+            x_abs = np.concatenate((self.x_abs, np.atleast_2d(x)), 0)
+            y_abs = np.concatenate((self.y_abs, np.atleast_2d(y)), 0)
+            self.set_observations(self.x_rel, self.uvi_rel, x_abs, self.y_rel, y_abs)
+        else:           # Relative observation/s
+            if keep_f:  # The rel observations are stored at the front of f[0:_n_rel]
+                self.f = np.vstack((self.f[0:self._n_rel], np.zeros((x.shape[0], 1)), self.f[self._n_rel:]))
+            x_rel = np.concatenate((self.x_rel, np.atleast_2d(x)), 0)
+            y_rel = np.concatenate((self.y_rel, np.atleast_2d(y)), 0)
+            uvi_rel = np.concatenate((self.uvi_rel, uvi + self.x_rel.shape[0]), 0)
+            self.set_observations(x_rel, uvi_rel, self.x_abs, y_rel, self.y_abs)
+
+    def set_hyperparameters(self, loghyp):
+        assert sum(self.hyper_counts) == len(loghyp), "Sum of hyper_counts must match length of log_hyp"
+        assert self.hyper_counts[0] == self._xdim+1, "Currently only supporting sq exp covariance, hyper_counts[0] must be x_dim + 1"
         self.kern.lengthscale = np.exp(loghyp[0:self._xdim])
-        self.kern.variance = (np.exp(loghyp[self._xdim]))**2
-        self.likelihood.set_sigma = np.exp(loghyp[-1])
+        self.kern.variance = 1.0 # (np.exp(loghyp[self._xdim]))**2
+        dex = self.hyper_counts.cumsum()
+        self.rel_likelihood.set_hyper(np.exp(loghyp[dex[0]:dex[1]]))
+        self.abs_likelihood.set_hyper(np.exp(loghyp[dex[1]:dex[2]]))
+        # self.rel_likelihood.set_sigma(np.exp(loghyp[-3])) # Do we need different sigmas for each likelihood? Yes!
+        # self.abs_likelihood.set_sigma(np.exp(loghyp[-2])) # I think this sigma relates to sigma_f in the covariance, and is actually possibly redundant
+        # self.abs_likelihood.set_v(np.exp(loghyp[-1]))     # Should this relate to the rel_likelihood probit noise?
 
-        if f is None:
-            f = np.zeros((self._nx, 1))
+    def calc_laplace(self, loghyp=None):
+        if loghyp is not None:
+            self.set_hyperparameters(loghyp)
+
+        if self.f is None or self.f.shape[0] is not self._nx or np.isnan(self.f).any():
+            f = np.zeros((self._nx, 1), dtype='float')
+        else:
+            f = self.f
 
         # With current hyperparameters:
-        Ix = np.eye(self._nx)
-        K = self.kern.K(self.x_train)
-        eps = 1e-6
-        inv_ok = False
+        self.Kxx = self.kern.K(self.x_train_all)
 
-        while not inv_ok:
-            try:
-                L = np.linalg.cholesky(K + eps*Ix)
-                iK = np.linalg.solve(L.T, np.linalg.solve(L, Ix))
-                # detK = (np.product(L.diagonal()))**2
-                logdetK = np.sum(np.log(L.diagonal()))
-                inv_ok = True
-            except np.linalg.linalg.LinAlgError:
-                eps = eps*10
-                print "Inversion issue, adding noise: {0}".format(eps)
+        self.iK, self.logdetK = self._safe_invert_noise(self.Kxx)
 
         # First, solve for \hat{f} and W (mode finding Laplace approximation, Newton-Raphson)
         f_error = self.delta_f + 1
+        nloops = 0
 
         while f_error > self.delta_f:
-            W, dpy_df = self.likelihood.derivatives(self.uvi_train, self.y_train, f)
-            g = (iK + W)
-            f_new = np.matmul(np.linalg.inv(g), np.matmul(W, f) + dpy_df)
-            lml = self.likelihood.log_marginal(self.uvi_train, self.y_train, f_new, iK, logdetK)
+            # Is splitting these apart correct?  Will there be off-diagonal elements of W_abs that should be 
+            # non-zero because of the relative training points?
+            f_rel = f[0:self._n_rel]
+            f_abs = f[self._n_rel:]
+
+            # Get relative Hessian and Gradient
+            if self._n_rel>0 and self._n_abs>0:
+                W_rel, dpy_df_rel, py_rel = self.rel_likelihood.derivatives(self.uvi_rel, self.y_rel, f_rel)
+                # Get Absolute Hessian and Gradient
+                # Note that y_abs has to be in [0,1]
+                W_abs, dpy_df_abs, py_abs = self.abs_likelihood.derivatives(self.y_abs, f_abs)
+
+                # Combine W, gradient
+                py = py_abs.sum() + py_rel.sum()
+                dpy_df = np.concatenate((dpy_df_rel, dpy_df_abs), axis=0)
+                W = block_diag(W_rel, W_abs)
+
+            elif self._n_rel>0:
+                W, dpy_df, py = self.rel_likelihood.derivatives(self.uvi_rel, self.y_rel, f_rel)
+
+            elif self._n_abs>0:
+                W, dpy_df, py = self.abs_likelihood.derivatives(self.y_abs, f_abs)
+
+            # # print "Total"
+            # print "Dpy, W:"
+            # print dpy_df
+            # print W
+            lambda_eye = 0.0*np.eye(self.iK.shape[0])
+
+            g = (self.iK + W - lambda_eye)
+            f_new = np.matmul(np.linalg.inv(g), np.matmul(W-lambda_eye, f) + dpy_df)
+            #lml = self.rel_likelihood.log_marginal(self.uvi_rel, self.y_rel, f_new, iK, logdetK)
 
             ## Jensen version (iK + W)^-1 = K - K((I + WK)^-1)WK (not sure how to get f'K^-1f though...
             # ig = K - np.matmul(np.matmul(np.matmul(K, np.linalg.inv(Ix + np.matmul(W, K))), W), K)
@@ -151,11 +552,197 @@ def calc_laplace(self, loghyp, f=None):
             df = np.abs((f_new - f))
             f_error = np.max(df)
 
-            print f_error,lml
+            # print "F Error: " + str(f_error) #,lml
+            # print "F New: " + str(f_new)
             f = f_new
+            nloops += 1
+            if nloops > 10000:
+                raise LaplaceException("Maximum loops exceeded in calc_laplace!!")
+            if self.verbose > 1:
+                lml = py - 0.5*np.matmul(f.T, np.matmul(self.iK, f)) - 0.5*np.log(np.linalg.det(np.matmul(W, self.Kxx) + self.Ix))
+                print "Laplace iteration {0:02d}, log p(y|f) = {1:0.2f}".format(nloops, lml[0,0])
+
+        self.W = W
+        self.f = f
+        self.iKf = np.matmul(self.iK, self.f)
+        self.KWI = np.matmul(self.W, self.Kxx) + self.Ix
+
+        return f#, lml
+
+    def _safe_invert_noise(self, mat, start_noise=1.0e-6):
+        eps = start_noise
+        inv_ok = False
+
+        while not inv_ok:
+            try:
+                L = np.linalg.cholesky(mat + eps*self.Ix)
+                imat = np.linalg.solve(L.T, np.linalg.solve(L, self.Ix))
+                # detK = (np.product(L.diagonal()))**2
+                logdet = np.sum(np.log(L.diagonal()))
+                inv_ok = True
+            except np.linalg.linalg.LinAlgError:
+                eps = max(1e-6, eps*10.0)
+                print "Inversion issue, adding noise: {0}".format(eps)
+        return imat, logdet
+
+    def calc_nlml(self, loghyp, f=None):
+        if f is None:
+            f = self.calc_laplace(loghyp)
+        # Now calculate the log likelihoods (remember log(ax) = log a + log x)
+        log_py_f_rel = self.rel_likelihood.log_likelihood(self.y_rel, self.rel_likelihood.get_rel_f(f, self.uvi_rel))
+        log_py_f_abs = self.abs_likelihood.log_likelihood(self.y_abs, f[self._n_rel:]) #TODO: I would prefer to use the indexing in absolute ratings too for consistency
+        fiKf = np.matmul(f.T, self.iKf)
+        lml = log_py_f_rel.sum()+log_py_f_abs.sum() - 0.5*fiKf - 0.5*np.log(np.linalg.det(self.KWI))
+        return -lml
+
+    def predict_latent(self, x):
+        assert hasattr(self, 'iKf')
+        kt = self.kern.K(self.x_train_all, x)
+        mean_latent = np.matmul(kt.T, self.iKf)
+        Ktt = self.kern.K(x)
+        try:
+            iKW = np.linalg.inv(self.KWI)
+        except np.linalg.linalg.LinAlgError:
+            raise
+        var_latent = Ktt - np.matmul(kt.T, np.matmul(iKW, np.matmul(self.W, kt)))
+        return mean_latent, var_latent
+
+    def sample_latent_posterior(self, mean, covariance, n_samples = 1):
+        assert mean.shape[0] == covariance.shape[0]
+        assert covariance.shape[1] == covariance.shape[0]
+        y_post = np.random.multivariate_normal(mean.flatten(), covariance, n_samples)
+        return y_post
+
+    def _check_latent_input(self, x=None, fhat=None, varhat=None):
+        if (fhat is None or varhat is None):
+            if x is not None:
+                fhat, varhat = self.predict_latent(x)
+            else:
+                raise ValueError('Must supply either x or fhat and varhat')
+        return fhat, varhat
+
+    def rel_posterior_likelihood(self, uvi, y, x=None, fhat=None, varhat=None):
+        fhat, varhat = self._check_latent_input(x, fhat, varhat)
+        p_y = self.rel_likelihood.posterior_likelihood(fhat, varhat, uvi, y)
+        return p_y
+
+    def rel_posterior_likelihood_array(self, x=None, fhat=None, varhat=None, y=-1):
+        fhat, varhat = self._check_latent_input(x, fhat, varhat)
+        p_y = np.zeros((fhat.shape[0], fhat.shape[0]), dtype='float')
+        cross_uvi = np.zeros((fhat.shape[0],2), dtype='int')
+        cross_uvi[:, 1] = np.arange(fhat.shape[0])
+        for i in cross_uvi[:, 1]:
+            cross_uvi[:, 0] = i
+            p_y[:, i:i+1] = self.rel_likelihood.posterior_likelihood(fhat, varhat, cross_uvi, y)
+        return p_y
+
+    def rel_posterior_MAP(self, uvi, x=None, fhat=None, varhat=None):
+        p_y = self.rel_posterior_likelihood(uvi, y=1, x=x, fhat=fhat, varhat=varhat)
+        return 2*np.array(p_y < 0.5, dtype='int')-1
+
+    def abs_posterior_likelihood(self, y=None, x=None, fhat=None, varhat=None, **kwargs): # Currently sample-based
+        fhat, varhat = self._check_latent_input(x, fhat, varhat)
+        return self.abs_likelihood.posterior_likelihood(fhat, varhat, y=y, **kwargs)
+
+    def abs_posterior_mean(self, x=None, fhat=None, varhat=None):
+        fhat, varhat = self._check_latent_input(x, fhat, varhat)
+        return self.abs_likelihood.posterior_mean(fhat, varhat)
+
+    def print_hyperparameters(self):
+        print "COV: '{0}', l: {1}, sigma_f: {2}".format(self.kern.name, self.kern.lengthscale.values, np.sqrt(self.kern.variance.values))
+        print "REL: ",
+        self.rel_likelihood.print_hyperparameters()
+        print "ABS: ",
+        self.abs_likelihood.print_hyperparameters()
+
+
+class ObservationSampler(object):
+    def __init__(self, true_fun, likelihood_type, likelihood_kwargs):
+        self.f = true_fun
+        ltype = getattr(sys.modules[__name__], likelihood_type)
+        self.l = ltype(**likelihood_kwargs)
+
+    def generate_observations(self, x):
+        fx = self.f(x)
+        y, ff = self.l.generate_samples(fx)
+        return y, ff
+
+    @staticmethod
+    def _gen_x_obs(n, n_xdim=1, domain=None):
+        # Domain should be 2 x n_xdim, i.e [[x0_lo, x1_lo, ... , xn_lo], [x0_hi, x1_hi, ... , xn_hi ]]
+        x_test = np.random.uniform(size=(n, n_xdim))
+        if domain is not None:
+            x_test = x_test * np.diff(domain, axis=0) + domain[0, :]
+        return x_test
+
+    def gaussian_multi_pairwise_sampler(self, x):
+        # Find the maximum from a set of n samples (x should be n by d)
+        # Return pairwise relationships that one point is higher than the others
+        # Sample from Gaussian distributions around function values
+        fx = np.random.normal(loc=self.f(x), scale=self.l.sigma)
+        max_xi = np.argmax(fx)
+        other_xi = np.delete(np.arange(x.shape[0]), max_xi)
+        y = np.ones((x.shape[0]-1, 1), dtype='int')
+        uvi = np.hstack((np.atleast_2d(other_xi).T, max_xi*y))
+        return y, uvi, fx
+
+class AbsObservationSampler(ObservationSampler):
+    def observation_likelihood_array(self, x, y=None):
+        fx = self.f(x)
+        if y is None:
+            y = self.l.y_list
+        p_y = np.zeros((y.shape[0], fx.shape[0]), dtype='float')
+        for i, fxi in enumerate(fx):
+            p_y[:, i:i + 1] = self.l.likelihood(y, fxi[0])
+        return p_y
+
+    def mean_link(self, x):
+        fx = self.f(x)
+        return self.l.mean_link(fx)
+
+    def generate_n_observations(self, n, n_xdim=1, domain=None):
+        x = self._gen_x_obs(n, n_xdim, domain)
+        y, mu = self.generate_observations(x)
+        return x, y, mu
+
+
+class RelObservationSampler(ObservationSampler):
+    def observation_likelihood_array(self, x, y=-1):
+        fx = self.f(x)
+        p_y = np.zeros((x.shape[0], x.shape[0]), dtype='float')
+        cross_fx = np.hstack((np.zeros((x.shape[0], 1)), fx))
+        for i, fxi in enumerate(fx):
+            cross_fx[:, 0] = fxi
+            p_y[:, i:i + 1] = self.l.likelihood(y, cross_fx)
+        return p_y
+
+    def generate_observations(self, x, uvi):
+        fx = self.f(x)
+        y, ff = self.l.generate_samples(fx[uvi][:, :, 0])
+        return y, ff
+
+    def generate_n_observations(self, n, n_xdim=1, domain=None):
+        x = self._gen_x_obs(2*n, n_xdim, domain)
+        uvi = np.arange(2*n).reshape((n, 2))
+        # uv = x[uvi] # [:, :, 0]
+        y, fuv = self.generate_observations(x, uvi)
+        return x, uvi, y, fuv
+
+
+
+
+# SCRAP:
+        # Estimate dpy_df
+        # delta = 0.001
+        # print "Estimated dpy_df"
+        # est_dpy_df = (self.log_likelihood(y, f+delta) - self.log_likelihood(y, f-delta))/(2*delta)
+        # print est_dpy_df
+        # print 'log likelihood'
+        # print np.sum(self.log_likelihood(y, f))
+        #
+        # print "Estimated W"
+        # est_W_diag = (self.log_likelihood(y, f+2*delta) - 2*self.log_likelihood(y,f) + self.log_likelihood(y, f-2*delta))/(2*delta)**2
+        #
+        # print est_W_diag
 
-        return f, lml
 
-    def calc_nlml(self, loghyp):
-        f,lml = self.calc_laplace(loghyp)
-        return -lml
\ No newline at end of file
diff --git a/GPr.py b/GPr.py
index 96be1e3..93e6a43 100644
--- a/GPr.py
+++ b/GPr.py
@@ -43,23 +43,23 @@ def __init__(self,log_hyp,mean_hyp,like_hyp,covFunName,meanFunName,likeFunName,
         
     # Define GP prediction function
     def compute_prediction(self,testInput):
-        Kxz = self.covFun.compute_Kxz_matrix(testInput)
-        Kxx = self.covFun.compute_Kxx_matrix()
-        iKxx = np.linalg.inv(Kxx)
+        Kxx = self.covFun.compute_K_matrix()
+        Kxz = self.covFun.compute_K_matrix(X1=testInput)
+        Kzz = self.covFun.compute_K_matrix(testInput, testInput)
+        iKxx = np.linalg.inv(Kxx + self.covFun.sn2*np.eye(Kxx.shape[0]))
         Kzx = Kxz.T
-        K_diag = np.diagonal(np.dot(np.dot(Kzx,iKxx),Kxz))
-        K_noise = self.covFun.sf2*np.ones(np.size(testInput,axis=0))
-        fz = np.dot(np.dot(Kzx,iKxx),self.trainTarget.T)
-        cov_fz = K_noise - K_diag
+        K_diag = np.dot(np.dot(Kzx,iKxx),Kxz)
+        fz = np.dot(np.dot(Kzx,iKxx), self.trainTarget)
+        cov_fz = Kzz - K_diag
         return fz, cov_fz
 	
     # Define GP negative log marginal likelihood function
-    def compute_likelihood(self,hyp):
+    def compute_likelihood(self, hyp):
         n = np.size(self.trainInput,axis=0)
-        covSE = SquaredExponential(hyp,self.trainInput)
-        Kxx = covSE.compute_Kxx_matrix()
+        covSE = SquaredExponential(hyp, self.trainInput)
+        Kxx = covSE.compute_K_matrix()
         m = self.meanFun.y
-        L = np.linalg.cholesky(Kxx)
+        L = np.linalg.cholesky(Kxx +self.covFun.sn2*np.eye(Kxx.shape[0]))
         iKxx = np.linalg.solve(L.T,np.linalg.solve(L,np.eye(n)))
         y = np.reshape(self.trainTarget,(len(self.trainTarget),1))
         err_y = np.dot(np.dot((y-m).T,iKxx),(y-m))/2
@@ -93,18 +93,16 @@ def __init__(self,logHyp,x):
         self.hyp = np.exp(self.logHyp)	# hyperparameters
         n = len(self.hyp)
         self.M = self.hyp[:n-2]        	# length scales
-        self.sf2 = self.hyp[n-2]**2        # squared exponential variance
+        self.sf2 = self.hyp[n-2]**2        # sigma_f variance
         self.sn2 = self.hyp[n-1]**2        # noise variance
-	
-    def compute_Kxx_matrix(self):
-        scaledX = self.x/self.M
-        sqDist = squared_distance(scaledX,scaledX)
-        Kxx = self.sn2*np.eye(np.size(self.x,axis=0))+self.sf2*np.exp(-0.5*sqDist)
-        return Kxx
-	
-    def compute_Kxz_matrix(self,z):
-        scaledX = self.x/self.M
-        scaledZ = z/self.M
+
+    def compute_K_matrix(self, X0=None, X1=None):
+        if X0 is None:
+            X0 = self.x
+        if X1 is None:
+            X1 = self.x
+        scaledX = X0/self.M
+        scaledZ = X1/self.M
         sqDist = squared_distance(scaledX,scaledZ)
-        Kxz = self.sf2*np.exp(-0.5*sqDist)
-        return Kxz
\ No newline at end of file
+        K = self.sf2*np.exp(-0.5*sqDist)
+        return K
diff --git a/active_learners.py b/active_learners.py
new file mode 100644
index 0000000..7cd49f4
--- /dev/null
+++ b/active_learners.py
@@ -0,0 +1,725 @@
+import numpy as np
+import GPpref
+from scipy.stats import beta
+import plot_tools as ptt
+import time
+import sys
+
+def calc_ucb(fhat, vhat, gamma=2.0, sigma_offset=0.0):
+    # return fhat + gamma * (np.sqrt(np.atleast_2d(vhat.diagonal()).T) - sigma_offset)
+    return fhat.flatten() + gamma * (np.sqrt(np.diagonal(vhat)) - sigma_offset)
+
+
+def softmax_selector(x, tau=1.0):
+    # High tau is more random
+    ex = np.exp((x - x.max())/tau)
+    Px = ex/ex.sum()
+    return np.random.choice(len(x), p=Px)
+
+class Learner(object):
+    def __init__(self, model_type, obs_args, name, update_p_rel = False):
+        self.model_type = getattr(sys.modules[__name__], model_type)
+        self.obs_arguments = obs_args
+        self.name = name
+        self.update_p_rel = update_p_rel
+
+    def build_model(self, model_kwargs):
+        self.model = self.model_type(**model_kwargs)
+
+    def select_observation(self):
+        return self.model.select_observation(**self.obs_arguments)
+
+class ActiveLearner(GPpref.PreferenceGaussianProcess):
+    def init_extras(self):
+        self._default_uvi = np.array([[0, 1]])
+        self._plus_y_obs = np.ones((1, 1), dtype='int')
+        self._minus_y_obs = -1*self._plus_y_obs
+
+    def solve_laplace(self, log_hyp=None):
+        self.f = self.calc_laplace(log_hyp)
+        return self.f
+
+    def get_observations(self):
+        return self.x_rel, self.uvi_rel, self.x_abs, self.y_rel, self.y_abs
+
+    def select_observation(self, p_rel=0.5, domain=None, n_rel_samples=2):
+        if np.random.uniform() > p_rel: # i.e choose an absolute sample
+            n_rel_samples = 1
+        return self.uniform_domain_sampler(n_rel_samples, domain)
+
+    def uniform_domain_sampler(self, n_samples, domain=None, sortx=False):
+        # Domain should be 2 x n_xdim, i.e [[x0_lo, x1_lo, ... , xn_lo], [x0_hi, x1_hi, ... , xn_hi ]]
+        x_test = np.random.uniform(size=(n_samples, self._xdim))
+        if domain is not None:
+            x_test = x_test*np.diff(domain, axis=0) + domain[0, :]
+        if sortx:
+            x_test = np.sort(x_test, axis=0)
+        return x_test
+
+    def linear_domain_sampler(self, n_samples, domain=None):
+        # One dimension only at the moment!
+        assert self._xdim == 1
+        x_test = np.atleast_2d(np.linspace(.0, 1.0, n_samples, self._xdim)).T
+        if domain is not None:
+            x_test = x_test*np.diff(domain, axis=0) + domain[0, :]
+        return x_test
+
+
+    def get_latent_ests(self, x_test, abs_y_samples ):
+        # Latent predictions
+        d_x = x_test.shape[1]
+        fhat, vhat = self.predict_latent(x_test)
+        p_abs_y_post, E_y = self.abs_posterior_likelihood(abs_y_samples, fhat=fhat, varhat=vhat)
+        if d_x is 1:
+            p_rel_y_post = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+        else:
+            p_rel_y_post = None
+        return fhat, vhat, p_abs_y_post, E_y, p_rel_y_post
+
+
+    def create_posterior_plot(self, x_test, f_true, mu_true, rel_sigma, fuv_train, abs_y_samples, **plot_kwargs):
+
+        fhat, vhat, p_abs_y_post, E_y, p_rel_y_post = self.get_latent_ests(x_test, abs_y_samples)
+
+        x_train, uvi_train, x_abs_train, y_train, y_abs_train = self.get_observations()
+        uv_train = x_train[uvi_train]
+
+        # Posterior estimates
+        fig_p, ax_p = \
+            ptt.estimate_plots(x_test, f_true, mu_true, fhat, vhat, E_y, rel_sigma,
+                               abs_y_samples, p_abs_y_post, p_rel_y_post,
+                               x_abs_train, y_abs_train, uv_train, fuv_train, y_train,
+                               t_l=r'$\mathcal{GP}$ latent function estimate $\hat{f}(x)$',
+                               t_a=r'Posterior absolute likelihood, $p(y | \mathcal{Y}, \theta)$',
+                               t_r=r'Posterior relative likelihood $P(x_0 \succ x_1 | \mathcal{Y}, \theta)$', **plot_kwargs)
+        return fig_p, ax_p, [fhat, vhat, p_abs_y_post, E_y, p_rel_y_post]
+
+    def update_posterior_plot(self, x_test, f_true, mu_true, rel_sigma, fuv_train, abs_y_samples,
+                              fhat, vhat, p_abs_y_post, E_y, p_rel_y_post, ax_p):
+        x_train, uvi_train, x_abs_train, y_train, y_abs_train = self.get_observations()
+        uv_train = x_train[uvi_train]
+
+        ptt.reset_axes2d(ax_p)
+        # Posterior estimates
+        fig_p, ax_p = \
+            ptt.estimate_plots(x_test, f_true, mu_true, fhat, vhat, E_y, rel_sigma,
+                               abs_y_samples, p_abs_y_post, p_rel_y_post,
+                               x_abs_train, y_abs_train, uv_train, fuv_train, y_train,
+                               t_l=r'$\mathcal{GP}$ latent function estimate $\hat{f}(x)$',
+                               t_a=r'Posterior absolute likelihood, $p(y | \mathcal{Y}, \theta)$',
+                               t_r=r'Posterior relative likelihood $P(x_0 \succ x_1 | \mathcal{Y}, \theta)$', ax=ax_p)
+        return fig_p, ax_p
+
+
+class MaxVar(ActiveLearner):
+    l2pie = 1.0 + np.log(2*np.pi)
+    def logistic(self, H, n_rel):
+        L = 2.0
+        return 1.0 / (1.0 + np.exp(-L * (H-self.kern.variance)) )
+
+    # Max variance
+    def select_observation(self, domain=None, n_test=100, n_rel_samples = 2, p_rel = 0.0, rel_tau = 1.0, abs_tau = 1.0e-5,w_v=1.0, selector='det'):
+        # p_rel is the likelihood of selecting a relative query
+        # tau are softmax temperatures (low is more greedy, high is more random)
+        # w_v is the UCB weighting for variance, where UCB = w_v*variance + (1-w_v)*mean
+        # det_type is the type of value used to select between
+        # If p_rel < 0 then we will select based on variance magnitude
+        if p_rel < 0.0:
+            p_select = 1.0
+        else:
+            p_select = p_rel
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        vv = np.sqrt(np.diagonal(vhat))
+
+        if p_select >= 1.0 or np.random.uniform() < p_select: # i.e choose a relative sample
+            available_indexes = set(range(len(x_test)))
+            dK = np.zeros(len(available_indexes))
+            best_n = [softmax_selector(vv, tau=rel_tau)]
+            while len(best_n) < n_rel_samples:
+                dK[best_n[-1]] = -1.0e10  # Bit of a scam because it is still possible to sample this value
+                available_indexes.remove(best_n[-1])
+                best_n.append(-1)
+                for cn in available_indexes:
+                    best_n[-1] = cn
+                    K = vhat[np.ix_(best_n, best_n)]
+                    dK[cn] = np.linalg.det(K)
+                best_n[-1] = softmax_selector(dK/dK.max(), tau=rel_tau)  # np.argmax(dK) #
+        else:
+            best_n = [softmax_selector(w_v*vv+(1.0-w_v)*fhat.flatten(), tau=abs_tau)]   #[np.argmax(ucb)]  #
+
+        # This chooses an absolute query based on determinant. Choose relK using beta cdf likelihood
+        if p_rel < 0.0:
+            best_abs = softmax_selector(w_v * vv + (1.0 - w_v) * fhat.flatten(), tau=abs_tau)
+            if selector == 'det':
+                best_detK = -p_rel*np.sqrt(dK[best_n[-1]])
+                K_ratio = best_detK/(vv.max() + best_detK)
+            elif selector == 'entropy':
+                H_rel = 0.5*(n_rel_samples*self.l2pie + np.log(dK[best_n[-1]]))
+                H_abs = 0.5*(self.l2pie + np.log(vhat[best_abs, best_abs]))
+                K_ratio = H_rel/(H_rel + H_abs)
+            p_select = beta.cdf(K_ratio, -p_rel, -p_rel)
+            if np.random.uniform() > p_select:
+                best_n = [best_abs]
+        return x_test[best_n, :]
+
+class UCBLatent(ActiveLearner):
+    # All absolute returns
+    def select_observation(self, domain=None, n_test=100, gamma=2.0):
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        ucb = calc_ucb(fhat, vhat, gamma)
+        return x_test[[np.argmax(ucb)], :]
+
+
+class UCBLatentSoftmax(ActiveLearner):
+    # All absolute returns
+    def select_observation(self, domain=None, n_test=100, gamma=2.0, tau=1.0):
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        ucb = calc_ucb(fhat, vhat, gamma)
+        return x_test[[softmax_selector(ucb, tau)], :]
+
+class UCBCovarianceSoftmax(ActiveLearner):
+    # All absolute returns
+    def select_observation(self, domain=None, n_test=100, gamma=2.0, tau=1.0):
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        ucb = fhat.flatten() + gamma * (vhat.sum(axis=1) ** 0.25)
+        return x_test[[softmax_selector(ucb, tau)], :]
+
+class UCBOut(ActiveLearner):
+    # NOT FULLY IMPLEMENTED - BROKEN
+    def select_observation(self, domain=None, n_test=100, gamma=2.0):
+        # Don't know how to recover the second moment of the predictive distribution, so this isn't done
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        Ey = self.expected_y(x_test, fhat, vhat)
+        return x_test[[np.argmax(Ey)], :]
+
+
+class ProbabilityImprovementAbs(ActiveLearner):
+    def mean_var_sampler(self, n_test, domain):
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        shat = np.atleast_2d(np.sqrt(vhat.diagonal())).T
+        return x_test, fhat, shat
+
+    @staticmethod
+    def delta(f_star, f_est, sig_est, zeta):
+        return (f_est - f_star - zeta) / sig_est
+
+    def probability_improvement(self, f_star, f_est, sig_est, zeta):
+        Z = self.delta(f_star, f_est, sig_est, zeta)
+        PI = GPpref.std_norm_cdf(Z)
+        return PI
+
+    def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs):
+        f_star = self.f.max()
+        x_test, fhat, shat = self.mean_var_sampler(n_test, domain)
+        PI = self.probability_improvement(f_star, fhat, shat, zeta)
+        return x_test[[np.argmax(PI)], :]
+
+
+class ProbabilityImprovementRel(ProbabilityImprovementAbs):
+    def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs):
+        i_star = np.argmax(self.f)
+        x_out = np.zeros((2, self.x_train_all.shape[1]), dtype='float')
+        x_out[0] = self.x_train_all[i_star]
+        x_out[1] = super(ProbabilityImprovementRel, self).select_observation(domain, n_test, zeta, *args, **kwargs)[0]
+        return x_out
+
+
+class ExpectedImprovementAbs(ProbabilityImprovementAbs):
+    def expected_improvement(self, f_star, f_est, sig_est, zeta):
+        Z = self.delta(f_star, f_est, sig_est, zeta)
+        EI = sig_est * (Z * GPpref.std_norm_cdf(Z) + GPpref.std_norm_pdf(Z))
+        return EI
+
+    def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs):
+        f_star = self.f.max()
+        x_test, fhat, shat = self.mean_var_sampler(n_test, domain)
+        EI = self.expected_improvement(f_star, fhat, shat, zeta)
+        return x_test[[np.argmax(EI)], :]
+
+
+class ExpectedImprovementRel(ExpectedImprovementAbs):
+    def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs):
+        i_star = np.argmax(self.f)
+        x_out = np.zeros((2, self.x_train_all.shape[1]), dtype='float')
+        x_out[0] = self.x_train_all[i_star]
+        x_out[1] = super(ExpectedImprovementRel, self).select_observation(domain, n_test, zeta, *args, **kwargs)[0]
+        return x_out
+
+
+# class PredictiveEntropy(ActiveLearner):
+#     # All absolute returns
+#     def select_observation(self, domain=None, n_test=100):
+#
+#         x_test = self.uniform_domain_sampler(n_test, domain)
+#         fhat, vhat = self.predict_latent(x_test)
+#         ucb = calc_ucb(fhat, vhat, gamma)
+#         return x_test[[softmax_selector(ucb, tau)], :]
+
+class ABSThresh(ActiveLearner):
+    def select_observation(self, domain=None, n_test=100, p_thresh=0.7):
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        aa, bb = self.abs_likelihood.get_alpha_beta(fhat)
+        p_under_thresh = beta.cdf(p_thresh, aa, bb)
+        # ucb = calc_ucb(fhat, vhat, gamma)
+        return x_test[[np.argmax(p_under_thresh * (1.0 - p_under_thresh))], :]
+
+
+class UCBAbsRel(ActiveLearner):
+    def select_observation(self, domain=None, n_test=100, p_rel=0.5, n_rel_samples=2, gamma=2.0, tau=1.0):
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        ucb = calc_ucb(fhat, vhat, gamma)
+
+        if np.random.uniform() < p_rel: # i.e choose a relative sample
+            best_n = [softmax_selector(ucb, tau=tau)]   #[np.argmax(ucb)]  #
+            # sq_dist = GPpref.squared_distance(x_test, x_test)
+            p_rel_y = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+
+            while len(best_n) < n_rel_samples:
+                # ucb = ucb*sq_dist[best_n[-1], :] # Discount ucb by distance
+                ucb[best_n[-1]] = 0.0
+                ucb *= 4*p_rel_y[best_n[-1],:]*(1.0 - p_rel_y[best_n[-1],:]) # Reduce by likelihood that each point is better than previous best
+                best_n.append(softmax_selector(ucb, tau=tau))
+                # best_n.append(np.argmax(ucb))
+        else:
+            best_n = [softmax_selector(ucb, tau=tau/2.0)]   #[np.argmax(ucb)]  #
+        return x_test[best_n, :]
+
+
+class UCBAbsRelD(ActiveLearner):
+    def select_observation(self, domain=None, n_test=100, p_rel=0.5, n_rel_samples=2, gamma=2.0, tau=1.0):
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        ucb = calc_ucb(fhat, vhat, gamma)
+
+        if np.random.uniform() < p_rel: # i.e choose a relative sample
+            best_n = [softmax_selector(ucb, tau=tau)]   #[np.argmax(ucb)]  #
+            p_rel_y = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+            # sq_dist = np.sqrt(GPpref.squared_distance(x_test, x_test))
+            while len(best_n) < n_rel_samples:
+                # ucb *= sq_dist[best_n[-1], :] # Discount ucb by distance
+                ucb[best_n[-1]] = 0.0
+                ucb *= 4*p_rel_y[best_n[-1],:]*(1.0 - p_rel_y[best_n[-1],:]) # Divide by likelihood that each point is better than previous best
+                best_n.append(softmax_selector(ucb, tau=tau))
+                #best_n.append(np.argmax(ucb))
+        else:
+            best_n = [softmax_selector(ucb, tau=tau/2.0)]   #[np.argmax(ucb)]  #
+        return x_test[best_n, :]
+
+class DetRelBoo(ActiveLearner):
+    def select_observation(self, domain=None, n_test=100, n_rel_samples=2, gamma=2.0, tau=1.0):
+        x_test = self.uniform_domain_sampler(n_test, domain, sortx=True)
+        fhat, vhat = self.predict_latent(x_test)
+        # ucb = calc_ucb(fhat, vhat, gamma)
+
+        # Select the first location using the mean, uncertainty and likelihood of improvement over everywhere (?) else
+        p_rel = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+        pp_rel = p_rel.mean(axis=0)
+        ucb = 4*pp_rel*(1-pp_rel)*np.diagonal(vhat)**0.5 # THIS SEEMS BACKWARD!!! WHY DOES IT WORK?
+
+        available_indexes = set(range(len(x_test)))
+        dK = np.zeros(len(available_indexes))
+
+        best_n = [softmax_selector(ucb, tau=tau)]   #[np.argmax(ucb)]  #
+        uvi = self._default_uvi.copy()
+        uvi[0][0] = best_n[0]
+        while len(best_n) < n_rel_samples:
+            dK[best_n[-1]] = -1e5                   # Bit of a scam because it is still possible to sample this value
+            available_indexes.remove(best_n[-1])
+            best_n.append(-1)
+            for cn in available_indexes:
+                best_n[-1] = cn; uvi[0][1] = cn
+                K = vhat[np.ix_(best_n, best_n)]
+                # p_y = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=self._plus_y_obs)
+                dK[cn] = (gamma*np.sqrt(np.linalg.det(K)) + fhat[cn]) # *p_y
+            best_n[-1] = softmax_selector(dK, tau=tau)  # np.argmax(dK) #
+        return x_test[best_n, :]
+
+class DetSelect(ActiveLearner):
+    def select_observation(self, domain=None, n_test=100, n_rel_samples=2, gamma=2.0, rel_tau=1.0, abs_tau=1.0):
+        x_test = self.uniform_domain_sampler(n_test, domain, sortx=True)
+        fhat, vhat = self.predict_latent(x_test)
+        # ucb = calc_ucb(fhat, vhat, gamma).flatten()
+
+        # Select the first location using the mean, uncertainty and likelihood of improvement over everywhere (?) else
+        p_rel = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+        pp_rel = p_rel.mean(axis=0)
+        x_std = np.diagonal(vhat)**0.5
+        rel_value = 4*pp_rel*(1-pp_rel)*(x_std.max() - x_std) # fhat.flatten() + gamma*(vhat.sum(axis=1)**0.25) #
+
+        available_indexes = set(range(len(x_test)))
+        dK = np.zeros(len(available_indexes))
+        best_n = [softmax_selector(rel_value, tau=rel_tau)]   #[np.argmax(rel_value)]  #
+        uvi = self._default_uvi.copy()
+        uvi[0][0] = best_n[0]
+        while len(best_n) < n_rel_samples:
+            dK[best_n[-1]] = -1e5                   # Bit of a scam because it is still possible to sample this value
+            available_indexes.remove(best_n[-1])
+            best_n.append(-1)
+            for cn in available_indexes:
+                best_n[-1] = cn; uvi[0][1] = cn
+                K = vhat[np.ix_(best_n, best_n)]
+                p_y = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=self._plus_y_obs)
+                dK[cn] = p_y*(gamma*np.sqrt(np.linalg.det(K)) + fhat[cn])
+            best_n[-1] = softmax_selector(dK, tau=rel_tau)  # np.argmax(dK) #
+
+        K = vhat[np.ix_(best_n, best_n)]
+        mdK = gamma*np.sqrt(np.linalg.det(K)) + fhat[cn]
+        # mdK = dK.max()
+        ucb = calc_ucb(fhat, vhat, gamma)
+        p_rel = mdK/(mdK + ucb.max())
+
+        if abs_tau > 0 and np.random.uniform() > p_rel:     # i.e choose an abs:
+            # best_n = [softmax_selector(fhat.flatten() + gamma*(vhat.sum(axis=1)**0.25), tau)]  #[best_n[0]]   #
+            best_n = [softmax_selector(ucb, tau=abs_tau)]
+        return x_test[best_n, :]
+
+
+class PeakComparitor(ActiveLearner):
+
+    def test_observation(self, x, y, x_test, gamma):
+        self.store_observations()
+        self.add_observations(x, y, self._default_uvi)
+        self.solve_laplace()
+        fhat, vhat = self.predict_latent(x_test)
+        ucb = calc_ucb(fhat, vhat, gamma)
+        self.reset_observations()
+        return ucb.max()
+
+    def store_observations(self):
+        crx, cuv, cax, cry, cay = self.get_observations()
+        self.crx, self.cuv, self.cax, self.cry, self.cay = crx.copy(), cuv.copy(), cax.copy(), cry.copy(), cay.copy()
+        self.crf = self.f.copy()
+
+    def reset_observations(self):
+        try:
+            self.set_observations(self.crx, self.cuv, self.cax, self.cry, self.cay)
+            self.f = self.crf
+        except AttributeError:
+            print "reset_observations failed: existing observations not found"
+
+    def select_observation(self, domain=None, n_test=50, gamma=2.0, n_rel_samples=2):
+        n_comparators = n_rel_samples-1
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        ucb = calc_ucb(fhat, vhat, gamma)
+        max_xi = np.argmax(ucb)  # Old method used highest x, not ucb
+        other_xi = np.delete(np.arange(n_test), max_xi)
+        uvi = np.vstack((max_xi * np.ones(n_test - 1, dtype='int'), other_xi)).T
+
+        p_pref = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=-1)
+        V = np.zeros(n_test - 1)
+        x = np.zeros((2, 1), dtype='float')
+        x[0] = x_test[max_xi]
+
+        # Now calculate the expected value for each observation pair
+        for i,uvi1 in enumerate(other_xi):
+            x[1] = x_test[uvi1]
+            V[i] += p_pref[i]*self.test_observation(x, self._minus_y_obs, x_test, gamma)
+            if (1 - p_pref[i]) > 1e-3:
+                V[i] += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, x_test, gamma)
+
+        best_n = np.argpartition(V, -n_comparators)[-n_comparators:]
+        # best = np.argmax(V)
+        cVmax = np.argmax(ucb)  # This is repeated in case I want to change max_xi
+
+        if ucb[cVmax] > V.max():
+            return x_test[[cVmax], :]
+        else:
+            xi = np.zeros(n_comparators+1, dtype='int')
+            xi[0] = max_xi
+            xi[1:] = other_xi[best_n]
+            return x_test[xi, :]
+
+
+class LikelihoodImprovement(PeakComparitor):
+
+    def test_observation(self, x, y, x_test, max_xi):
+        self.store_observations()
+        self.add_observations(x, y, self._default_uvi)
+        f = self.solve_laplace()
+        fhat, vhat = self.predict_latent(x_test)
+        new_xi = np.argmax(fhat)
+        p_new_is_better = self.rel_likelihood.posterior_likelihood(fhat, vhat, np.array([[max_xi, new_xi]]), self._plus_y_obs)
+        self.reset_observations()
+        return p_new_is_better
+
+    def select_observation(self, domain=None, n_test=50, req_improvement=0.6, n_rel_samples=2, gamma=1.5, p_thresh=0.7):
+        n_comparators = n_rel_samples-1
+        x_test = self.uniform_domain_sampler(n_test, domain)
+        fhat, vhat = self.predict_latent(x_test)
+        max_xi = np.argmax(fhat)
+        other_xi = np.delete(np.arange(n_test), max_xi)
+        uvi = np.vstack((max_xi * np.ones(n_test - 1, dtype='int'), other_xi)).T
+
+        p_pref = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=-1)
+        V = np.zeros(n_test - 1)
+        x = np.zeros((2, 1), dtype='float')
+        x[0] = x_test[max_xi]
+
+
+        # Now calculate the expected value for each observation pair
+        for i,uvi1 in enumerate(other_xi):
+            x[1] = x_test[uvi1]
+            V[i] += p_pref[i]*self.test_observation(x, self._minus_y_obs, x_test, max_xi)
+            if (1-p_pref[i]) > 1e-3:
+                V[i] += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, x_test, max_xi)
+
+        Vmax = V.max()
+        # best_n = np.argpartition(V, -n_comparators)[-n_comparators:]
+        # best = np.argmax(V)
+        if self.verbose:
+            print 'V_max = {0}'.format(Vmax)
+
+        if Vmax < req_improvement:
+            # aa, bb = self.abs_likelihood.get_alpha_beta(fhat)
+            # p_under_thresh = beta.cdf(p_thresh, aa, bb)
+            # return x_test[[np.argmax(p_under_thresh*(1.0-p_under_thresh))], :]
+            ucb = calc_ucb(fhat, vhat, gamma, self.rel_likelihood.sigma)
+            return x_test[[np.argmax(ucb)], :]
+        else:
+            best_n = []
+            while len(best_n) < n_comparators:
+                cbest = np.argmax(V)
+                best_n.append(cbest)
+                V = V * np.sqrt(GPpref.squared_distance(x_test[[other_xi[cbest]], :], x_test[other_xi])[0])
+            xi = np.zeros(n_comparators+1, dtype='int')
+            xi[0] = max_xi
+            xi[1:] = other_xi[best_n]
+            return x_test[xi, :]
+
+
+class SampledThreshold(PeakComparitor):
+
+    def calculate_threshold_utility(self, fhat, vhat, n_samples, y_threshold):
+        f_sampled = self.sample_latent_posterior(fhat, vhat, n_samples = n_samples)
+        threshold_utility = 0.0
+        # For each one, evaluate the probability mass above the threshold in the absolute function
+        for fs in f_sampled:
+            threshold_utility += self.point_utility(fhat, fs, y_threshold)
+        return threshold_utility/n_samples
+
+    def point_utility(self, fhat, fs, y_threshold):
+        # This is the mean probability mass above the threshold value
+        pmass = 0.0
+        for fi in fs:
+            pmass += 1.0 - self.abs_likelihood.cdf(y_threshold, fi)
+        return pmass/fs.shape[0]
+
+    def test_observation(self, x, y, uvi, x_test, n_samples, y_threshold, f = None):
+        self.add_observations(x, y, uvi, keep_f=True)
+        self.solve_laplace()
+        fhat, vhat = self.predict_latent(x_test)
+        util = self.calculate_threshold_utility(fhat, vhat, n_samples, y_threshold)
+        self.reset_observations()
+        return util
+
+    def select_observation(self, domain=None, x_test=None, n_test=50, n_samples=50, y_threshold=0.8, p_pref_tol=1e-3, n_mc_abs=5):
+        # n_test is the number of test point locations on the input function
+        # n_samples is the number of functions sampled from the posterior for estimating the utility
+        # n_mc_abs is the number of proposed observations sampled from the current posterior for absolute estimates
+        # Generate a set of test points in the domain (if not specified)
+        if x_test is None:
+            # x_test = self.linear_domain_sampler(n_test, domain)
+            x_test = self.uniform_domain_sampler(n_test, domain)
+            # x_test.sort(axis=0)
+        n_test = len(x_test)
+
+        # Sample a set of functions from the current posterior
+        # We save the f value because otherwise it gets out of whack when we add observations
+        flap = self.solve_laplace()
+        fhat, vhat = self.predict_latent(x_test)
+        # base_utility = self.calculate_threshold_utility(fhat, vhat, n_samples, y_threshold)
+
+        # Check a random set of pairwise relatives and all absolutes (massive sampling)
+
+        # Generate a set of random pairs (this randomly pairs all x_test points)
+        uvi = np.random.choice(n_test, (n_test/2, 2), replace=False)
+        p_pref = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=-1)
+        V_max_rel = 0.0
+
+        self.store_observations()
+
+        # Relative observations
+        t_rel = time.time()
+        # Now calculate the expected value for each observation pair
+        for i, uv in enumerate(uvi):
+            x = x_test[uv]
+            V_rel = 0.0
+            try:
+                if p_pref[i] < p_pref_tol:
+                    V_rel += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap)
+                elif p_pref[i] > 1.0-p_pref_tol:
+                    V_rel += p_pref[i]*self.test_observation(x, self._minus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap)
+                else:
+                    V_rel += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap)
+                    V_rel += p_pref[i] * self.test_observation(x, self._minus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap)
+                if V_rel >= V_max_rel:
+                    V_max_rel = V_rel
+                    x_best_rel = x
+            except GPpref.LaplaceException as exc:
+                print "Failed in relative test observation, x = [{0}, {1}]".format(x[0], x[1])
+                raise exc
+
+        # best_n = np.argpartition(V, -n_comparators)[-n_comparators:]
+        # best = np.argmax(V)
+        if self.verbose:
+            print 'V_max_rel = {0}, x = {2}, t = {1}s'.format(V_max_rel[0], time.time()-t_rel, x_best_rel[:,0])
+
+        # Absolute queries
+        V_max = 0.0
+        t_rel = time.time()
+        for i, x in enumerate(x_test):
+            F = fhat[i] + np.random.randn(n_mc_abs)*np.sqrt(vhat[i,i])
+            Y, mu = self.abs_likelihood.generate_samples(F)
+            V_abs = 0.0
+            Y = np.clip(Y, 1e-2, 1-1e-2) # I had stability problems in Laplace with values approaching 0 or 1
+            for y in Y:
+                try:
+                    V_abs += self.test_observation(x, y, None, x_test, n_samples, y_threshold, f=flap)
+                except ValueError:
+                    print "NaN Issue"
+                except GPpref.LaplaceException as exc:
+                    print "Failed in absolute test observation, x = {0}, y = {1}".format(x, y)
+                    raise exc
+            V_abs /= n_mc_abs
+            if V_abs > V_max:
+                V_max = V_abs
+                x_best = x
+        if self.verbose:
+            print 'V_max_abs = {0}, x = {2}, t = {1}s'.format(V_max, time.time() - t_rel, x_best)
+
+        if V_max_rel > V_max:
+            x_best = x_best_rel
+
+        return x_best
+
+
+class SampledClassification(SampledThreshold):
+    def calculate_threshold_utility(self, fhat, vhat, n_samples, y_threshold):
+        # Class accuracy
+        # Note this assumes the sampled functions are used to make the classification decision, and we calculate the
+        # probability mass from the current mean estimate above the threshold
+        P_below = self.abs_likelihood.cdf(y_threshold, fhat)
+
+        f_sampled = self.sample_latent_posterior(fhat, vhat, n_samples=n_samples)
+        threshold_utility = 0.0
+        # For each one, evaluate the probability mass above the threshold in the absolute function
+        for fs in f_sampled:
+            fm = self.abs_likelihood.mean_link(fs)
+            predict_below = fm < y_threshold
+            predicted_accuracy = P_below[predict_below].sum() + (1.0 - P_below[~predict_below]).sum()
+            threshold_utility += predicted_accuracy/len(fhat)
+        return threshold_utility/n_samples
+
+
+class OrdinalSampler(PeakComparitor):
+
+    def model_value(self, fhat, vhat, y_threshold):
+        # Probability of correct classification
+        p_y, mu = self.abs_likelihood.posterior_likelihood(fhat, vhat)
+        selection = np.argmax(p_y, axis=0)+1
+        value = 0.0
+        for i in range(len(fhat)):
+            if selection[i] >= y_threshold:
+                value += p_y[y_threshold-1:,i].sum()
+            else:
+                value += p_y[0:y_threshold-1,i].sum()
+        return value/len(fhat)
+
+    def test_observation(self, x, y, uvi, x_test, y_threshold):
+        self.add_observations(x, y, uvi, keep_f=True)
+        self.solve_laplace()
+        fhat, vhat = self.predict_latent(x_test)
+        util = self.model_value(fhat, vhat, y_threshold)
+        self.reset_observations()
+        return util
+
+    def get_rating_set_uv(self, n_ratings):
+        y = np.ones((n_ratings - 1, 1), dtype='int')
+        uv = []
+        for i in range(n_ratings):
+            other_i = np.delete(np.arange(n_ratings), i)
+            uv.append(np.hstack((np.atleast_2d(other_i).T, y*i)))
+        return y, uv
+
+
+    def select_observation(self, domain=None, x_test=None, n_test=50, y_threshold=5, p_pref_tol=1e-3, n_rel_samples=2, n_mc_samples=100):
+        # Generate a set of test points in the domain (if not specified)
+        if x_test is None:
+            # x_test = self.linear_domain_sampler(n_test, domain)
+            x_test = self.uniform_domain_sampler(n_test, domain)
+        n_test = len(x_test)
+
+        # We save the f value because otherwise it gets out of whack when we add observations
+        flap = self.solve_laplace()
+        fhat, vhat = self.predict_latent(x_test)
+
+        # Sample a set of functions from the current posterior
+        f_post = np.random.multivariate_normal(fhat.flatten(), vhat, n_mc_samples)
+
+        # Generate a set of random groups of n_rel_samples from the input space
+        uvi = np.array([np.random.choice(n_test, n_rel_samples, replace=False) for i in range(n_test)])
+        V_max_rel = 0.0
+
+        self.store_observations()
+
+        # Relative observations
+        t_rel = time.time()
+        y_uv, full_uv = self.get_rating_set_uv(n_rel_samples)
+
+        # Calculate the expected value for each observation set
+        for i, uv in enumerate(uvi):
+            x = x_test[uv]
+            p_pref = np.zeros(n_rel_samples)
+            for fp in f_post:
+                p_pref[np.argmax(fp[uv])] += 1
+            p_pref /= n_mc_samples
+            V_rel = 0.0
+
+            for i, p_x in enumerate(p_pref):
+                if p_x > p_pref_tol:
+                    V_rel += p_x*self.test_observation(x, y_uv, full_uv[i], x_test, y_threshold)
+            if V_rel >= V_max_rel:
+                V_max_rel = V_rel
+                x_best_rel = x
+
+        # best_n = np.argpartition(V, -n_comparators)[-n_comparators:]
+        # best = np.argmax(V)
+        if self.verbose >= 1:
+            print 'V_max_rel = {0}, x = {2}, t = {1}s'.format(V_max_rel, time.time()-t_rel, x_best_rel[:,0])
+
+        # Absolute queries
+        V_max = 0.0
+        t_rel = time.time()
+        p_y, mu = self.abs_likelihood.posterior_likelihood(fhat, vhat)
+        for x, ppy in zip(x_test, p_y.T):
+            V_abs = 0.0
+            for y_obs, p_obs in enumerate(ppy):
+                if p_obs > p_pref_tol:
+                    V_abs += p_obs*self.test_observation(x, y_obs+1, None, x_test, y_threshold)
+            if V_abs > V_max:
+                V_max = V_abs
+                x_best = np.array([x])
+        if self.verbose >= 1:
+            print 'V_max_abs = {0}, t = {1}s'.format(V_max, time.time() - t_rel)
+
+        if V_max_rel > V_max:
+            x_best = x_best_rel
+
+        return x_best
+
+
+class OrdinalOptimist(OrdinalSampler):
+    def model_value(self, fhat, vhat, y_threshold):
+        # Probability of values above threshold
+        p_y, mu = self.abs_likelihood.posterior_likelihood(fhat, vhat)
+        value = p_y[y_threshold-1:,:].sum()/len(fhat)
+        return value/len(fhat)
diff --git a/active_statruns.py b/active_statruns.py
new file mode 100644
index 0000000..eadc825
--- /dev/null
+++ b/active_statruns.py
@@ -0,0 +1,201 @@
+# Simple 1D GP classification example
+import time
+import numpy as np
+import GPpref
+import plot_tools as ptt
+import active_learners
+import test_data
+import plot_statruns
+import yaml
+import argparse
+
+np.set_printoptions(precision=3)
+wrms_fun = test_data.wrms_misclass
+wrms_args = {'w_power': 1}
+
+now_time = time.strftime("%Y_%m_%d-%H_%M")
+yaml_config ='./data/statruns_dec2017.yaml'
+
+parser = argparse.ArgumentParser(description='Statruns for active learning with preference GP')
+parser.add_argument('-np', '--no-plots', dest='make_plots', action='store_false', help='Turn off plots (default False)')
+parser.add_argument('-y', '--yaml-config', default=yaml_config, help='YAML config file')
+
+args = parser.parse_args()
+
+print "Using YAML config: {0}".format(args.yaml_config)
+if not args.make_plots:
+    print "No plot output."
+with open(args.yaml_config, 'rt') as fh:
+    run_parameters = yaml.safe_load(fh)
+
+log_hyp = np.log(run_parameters['hyperparameters'])
+
+# Statrun parameters
+statrun_params = run_parameters['statrun_params']
+np.random.seed(statrun_params['randseed'])
+n_best_points = statrun_params['n_best_points']
+n_trials = statrun_params['n_trials']
+n_queries = statrun_params['n_queries']
+if 'calc_relative_error' in run_parameters['statrun_params']:
+    calc_relative_error = statrun_params['calc_relative_error']
+else:
+    calc_relative_error = False
+
+n_rel_samples = run_parameters['statrun_params']['n_rel_samples']
+
+# Define polynomial function to be modelled
+d_x = run_parameters['GP_params']['hyper_counts'][0]-1
+random_wave  = test_data.MultiWave(n_dimensions=d_x, **run_parameters['wave_params'])
+
+now_time = time.strftime("%Y_%m_%d-%H_%M")
+data_dir = 'data/' + now_time + '/'
+ptt.ensure_dir(data_dir)
+print "Data will be saved to: {0}".format(data_dir)
+waver = test_data.WaveSaver(n_trials, random_wave.n_components, n_dim=d_x)
+
+
+# True function
+x_plot = np.linspace(0.0, 1.0, statrun_params['n_xtest'], dtype='float')
+
+try:
+    if statrun_params['sample_type'] is 'uniform':
+        x_test = ptt.make_meshlist(x_plot, d_x)
+    else:
+        x_test = np.random.uniform(0.0, 1.0,
+                                   size = (statrun_params['n_xtest'], d_x))
+except KeyError:
+    print('Config does not contain statrun_params: sample_type value. Default to uniform sampling')
+    x_test = ptt.make_meshlist(x_plot, d_x)
+
+far_domain = np.tile(np.array([[-3.0], [-2.0]]), d_x)
+
+# Construct active learner objects
+n_learners = len(run_parameters['learners'])
+learners = []
+names = []
+obs_array = []
+full_obs = {}
+for l in run_parameters['learners']:
+    if 'n_rel_samples' in l['obs_args']:
+        l['obs_args']['n_rel_samples'] = n_rel_samples
+    learners.append(active_learners.Learner(**l))
+    names.append(l['name'])
+    obs_array.append({'name': l['name'], 'obs': []})
+    full_obs[l['name']] = [None] * n_trials
+
+wrms_results = np.zeros((n_learners, n_queries+1, n_trials))
+true_pos_results = np.zeros((n_learners, n_queries+1, n_trials), dtype='int')
+selected_error = np.zeros((n_learners, n_queries+1, n_trials))
+if calc_relative_error:
+    relative_error = np.zeros((n_learners, n_queries+1, n_trials))
+else:
+    relative_error = None
+
+with open(data_dir + 'params.yaml', 'wt') as fh:
+    yaml.safe_dump(run_parameters, fh)
+
+trial_number = 0
+while trial_number < n_trials:
+
+    try:
+        print 'Trial {0}'.format(trial_number)
+        random_wave.randomize()
+        random_wave.print_values()
+        waver.set_vals(trial_number, *random_wave.get_values())
+        rel_obs_fun = GPpref.RelObservationSampler(random_wave.out, run_parameters['GP_params']['rel_likelihood'], run_parameters['rel_obs_params'])
+        abs_obs_fun = GPpref.AbsObservationSampler(random_wave.out, run_parameters['GP_params']['abs_likelihood'], run_parameters['abs_obs_params'])
+
+        f_true = abs_obs_fun.f(x_test)
+        y_abs_true = abs_obs_fun.mean_link(x_test)
+        best_points = np.argpartition(y_abs_true.flatten(), -n_best_points)[-n_best_points:]
+        best_points_set = set(best_points)
+        if calc_relative_error:
+            p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test)
+
+        # Initial data
+        x_rel, uvi_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(statrun_params['n_rel_train'],
+                                                                                     n_xdim=d_x, domain=far_domain)
+        x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(statrun_params['n_abs_train'], n_xdim=d_x,
+                                                                                     domain=far_domain)
+        model_kwargs = {'x_rel': x_rel, 'uvi_rel': uvi_rel, 'x_abs': x_abs, 'y_rel': y_rel, 'y_abs': y_abs,
+                        'rel_kwargs': run_parameters['rel_obs_params'], 'abs_kwargs': run_parameters['abs_obs_params']}
+        model_kwargs.update(run_parameters['GP_params'])
+
+        # Get initial solution
+        for nl, learner in enumerate(learners):
+            learner.build_model(model_kwargs)
+            learner.model.set_hyperparameters(log_hyp)
+            f = learner.model.solve_laplace()
+            fhat, vhat = learner.model.predict_latent(x_test)
+            y_abs_est = learner.model.abs_posterior_mean(x_test, fhat, vhat)
+
+            best_points_est = set(np.argpartition(y_abs_est.flatten(), -n_best_points)[-n_best_points:])
+            wrms_results[nl, 0, trial_number] = wrms_fun(y_abs_true, y_abs_est, **wrms_args)
+            true_pos_results[nl, 0, trial_number] = len(best_points_set.intersection(best_points_est))
+            selected_error[nl, 0, trial_number] = test_data.wrms(y_abs_true[best_points], y_abs_est[best_points], weight=False)
+            if calc_relative_error:
+                p_rel_y_post = learner.model.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+                relative_error[nl, 0, trial_number] = test_data.rel_error(y_abs_true, p_rel_y_true, y_abs_est, p_rel_y_post, weight=True)
+
+            obs_tuple = learner.model.get_observations()
+            full_obs[learner.name][trial_number] = [test_data.ObsObject(*obs_tuple)]
+
+        for obs_num in range(n_queries):
+            t0 = time.time()
+            linear_p_rel = max(0.0, (n_queries-obs_num)/float(n_queries))
+
+            for nl, learner in enumerate(learners):
+                # Update p_rel for certain methods (hacky?)
+                if learner.update_p_rel:
+                    learner.obs_arguments['p_rel'] = linear_p_rel
+
+                next_x = learner.model.select_observation(**learner.obs_arguments)
+                next_uvi = None
+                if next_x.shape[0] == 1:
+                    next_y, next_f = abs_obs_fun.generate_observations(next_x)
+                    # print 'Abs: x:{0}, y:{1}'.format(next_x[0], next_y[0])
+                else:
+                    next_y, next_uvi, next_fx = rel_obs_fun.gaussian_multi_pairwise_sampler(next_x)
+                    next_fuv = next_fx[next_uvi][:,:,0]
+                    fuv_rel = np.concatenate((fuv_rel, next_fuv), 0)
+                    # print 'Rel: x:{0}, best_index:{1}'.format(next_x.flatten(), next_uvi[0, 1])
+                full_obs[learner.name][trial_number].append((next_x, next_y, next_uvi))
+                learner.model.add_observations(next_x, next_y, next_uvi)
+                f = learner.model.solve_laplace()
+                fhat, vhat = learner.model.predict_latent(x_test)
+                y_abs_est = learner.model.abs_posterior_mean(x_test, fhat, vhat)
+
+                # Get selected best point set and error results
+                best_points_est = set(np.argpartition(y_abs_est.flatten(), -n_best_points)[-n_best_points:])
+                wrms_results[nl, obs_num+1, trial_number] = wrms_fun(y_abs_true, y_abs_est, **wrms_args)
+                true_pos_results[nl, obs_num+1, trial_number] = len(best_points_set.intersection(best_points_est))
+                selected_error[nl, obs_num+1, trial_number] = test_data.wrms(y_abs_true[best_points], y_abs_est[best_points], weight=False)
+                if calc_relative_error:
+                    p_rel_y_post = learner.model.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+                    relative_error[nl, obs_num+1, trial_number] = test_data.rel_error(y_abs_true, p_rel_y_true, y_abs_est,
+                                                                              p_rel_y_post, weight=True)
+            if calc_relative_error:
+                print "{0}, t={t:0.2f}s, tp = {1}, wrms = {2}, p_err={3}".format(obs_num, true_pos_results[:, obs_num+1, trial_number], wrms_results[:, obs_num+1, trial_number], relative_error[:, obs_num+1, trial_number], t=time.time()-t0)
+            else:
+                print "{0}, t={t:0.2f}s, tp = {1}, wrms = {2}".format(obs_num, true_pos_results[:, obs_num+1, trial_number], wrms_results[:, obs_num+1, trial_number], t=time.time()-t0)
+        for nl, learner in enumerate(learners):
+            obs_tuple = learner.model.get_observations()
+            obs_array[nl]['obs'].append(test_data.ObsObject(*obs_tuple))
+
+
+    except Exception as e:
+        print 'Exception error is: %s, attempting a new wave' % e
+        continue
+
+    trial_number += 1
+    if relative_error is not None:
+        plot_statruns.save_data(data_dir, wrms_results, true_pos_results, selected_error, obs_array, full_obs=full_obs,
+                                rel_err=relative_error, t=trial_number)
+    else:
+        plot_statruns.save_data(data_dir, wrms_results, true_pos_results, selected_error, obs_array, full_obs=full_obs,
+                                t=trial_number)
+
+    waver.save(data_dir+'wave_data.pkl')
+
+if args.make_plots:
+    hfig = plot_statruns.plot_results(wrms_results, true_pos_results, selected_error, obs_array, relative_error=relative_error, data_dir=data_dir, bars=True, norm_comparator=0)
\ No newline at end of file
diff --git a/beta_to_discrete.py b/beta_to_discrete.py
new file mode 100644
index 0000000..6f86c37
--- /dev/null
+++ b/beta_to_discrete.py
@@ -0,0 +1,61 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle
+import GPpref
+import nice_plot_colors as npc
+
+def convert_to_discrete(b, f, n_cat=5):
+    P = np.zeros(n_cat+1, dtype='float')
+    for i in range(1, n_cat+1):
+        P[i] = b.cdf(float(i)/n_cat, f) - P[0:i].sum()
+    return P[1:]
+
+def draw_probs(ax, x, P):
+    del_x = x[1]-x[0]
+    ddel_x = del_x / P.shape[0]
+    for xi, p in zip(x, P.T): # Loop over columns
+        ip = np.argsort(p, )
+        for i in ip[::-1]:
+            box = Rectangle((xi+ddel_x*i, 0.0), ddel_x, p[i], color=npc.bars[i])
+            ax.add_artist(box)
+
+# Examine how continuous variable is converted to discrete categories through beta distribution
+sigma = 1.0
+v = 10.0
+n = 5
+
+beta_likelihood = GPpref.AbsBoundProbit(sigma=sigma, v=v)
+
+f = np.linspace(-3.0, 3.0, 7)
+
+y = np.linspace(0.0, 1.0, 201)
+py = np.zeros((len(f), len(y)), dtype='float')
+Py = np.zeros((len(f), n), dtype='float')
+for i, fi in enumerate(f):
+    py[i] = beta_likelihood.likelihood(y, fi)
+    Py[i] = convert_to_discrete(beta_likelihood, fi, n)
+
+fh, ah = plt.subplots()
+xbars = np.arange(0.0, 1.0, step=1.0/n)
+draw_probs(ah, xbars, Py)
+ah2 = ah.twinx()
+lines = []
+for i, p in enumerate(py):
+    lines.extend(ah2.plot(y, p, color=npc.lines[i]))
+ah2.set_ybound(lower=0.0)
+ah2.legend(lines, ['${0:0.1f}\sigma$'.format(fi) for fi in f])
+
+# Logistic ordinal
+log_sigma = 0.1
+b = np.linspace(-3.0, 3.0, n)   # Log breakpoints
+Py_ord = np.zeros((len(f), n), dtype='float')
+for i, fi in enumerate(f):
+    py[i] = beta_likelihood.likelihood(y, fi)
+    Py[i] = convert_to_discrete(beta_likelihood, fi, n)
+
+
+
+plt.show()
+
+
+
diff --git a/config/statruns2_oct2017.yaml b/config/statruns2_oct2017.yaml
new file mode 100644
index 0000000..6501171
--- /dev/null
+++ b/config/statruns2_oct2017.yaml
@@ -0,0 +1,28 @@
+# Hyperparameters are: [l, sig_f, sig_rel, sig_beta, v_beta]
+hyperparameters: [0.025, 1.0, 0.2, 1.0, 40.0]
+abs_obs_params: {sigma: 1.0, v: 40.0}
+rel_obs_params: {sigma: 0.2}
+wave_params:
+  amp_range: [1.5, 1.8]
+  damp_range: [250.0, 350.0]
+  f_range: [10.0, 30.0]
+  n_components: 4
+  off_range: [0.01, 0.99]
+statrun_params:
+  n_queries: 80
+  n_trials: 100
+  n_rel_train: 0
+  n_abs_train: 1
+  randseed: 0
+  n_best_points: 30
+  n_xtest: 200
+  calc_relative_error: True
+learner_params: {delta_f: 1.0e-05, n_rel_samples: 5}
+# High tau for softmax is more random
+learners:
+#  - name: MaxVar (abs)
+#    model_type: MaxVar
+#    obs_args: {p_rel: 0.0}
+  - name: HUCB ($p_{rel}=-1.0, w_v=0.75, \tau_r=0.1, \tau_a=0.05$) (rel, abs)
+    model_type: MaxVar
+    obs_args: {n_rel_samples: 5, p_rel: -1.0, rel_tau: 0.1, abs_tau: 0.05, w_v: 0.75, selector: entropy}    
diff --git a/mcmc.py b/mcmc.py
new file mode 100644
index 0000000..923a53b
--- /dev/null
+++ b/mcmc.py
@@ -0,0 +1,93 @@
+import numpy as np
+
+
+class Enum(set):
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+
+Samplers = Enum(['Metropolis', 'MetropolisHastings', 'Gibbs'])
+
+
+class MCMCSampler(object):
+    def __init__(self, func, limits, q=None, seed=None, func_kwargs={}):
+        self.f = func
+        # self.sampler = sampler ## , sampler=Samplers.'MetropolisHastings'
+        self.limits = limits
+        self.lim_range = self.limits[1, :] - self.limits[0, :]
+        self.n_dim = self.limits.shape[1]
+        np.random.seed(seed)
+        if q == None:
+            q_std = self.lim_range / 10.0
+            q = (lambda x: x + np.random.normal(size=self.n_dim) * q_std)
+        self.q = q
+        self.func_kwargs = func_kwargs
+
+    @property
+    def sampler(self):
+        return self._sampler
+
+    @sampler.setter
+    def sampler(self, s):
+        if s not in Samplers: raise Exception("Specified sampler must be in Sampler set")
+        self._sampler = s
+
+    @property
+    def limits(self):
+        return self._limits
+
+    @limits.setter
+    def limits(self, l):
+        l = np.array(l)
+        if l.shape[0] != 2: raise Exception("Limits must be a 2*n_dim array")
+        self._limits = l
+
+    def generate_sample(self, X_current, f_current):
+        # Generate proposed sample
+        X_proposed = self.q(X_current)
+        while True:
+            inlimits = np.logical_and(X_proposed >= self.limits[0, :], X_proposed <= self.limits[1, :])
+            if not inlimits.all():
+                X_proposed = self.q(X_current)
+            else:
+                break
+
+        # Acceptance ratio
+        f_proposed = self.f(X_proposed, **self.func_kwargs)
+        alpha = f_proposed / f_current
+
+        # Accept?
+        pp = np.random.rand(1)
+        if pp < alpha:  # Accept
+            return X_proposed, f_proposed
+        return X_current, f_current
+
+    def sample_chain(self, n_samples, burn_in=None, X_start=None):
+        if burn_in is None:
+            burn_in = np.floor_divide(n_samples, 10)
+        if X_start is None:
+            X_start = self.limits[0, :] + np.random.rand(self.n_dim) * self.lim_range
+        X_current = X_start
+        f_current = self.f(X_current, **self.func_kwargs)
+
+        # Run for burn-in and throw away samples
+        for t in range(burn_in):
+            X_current, f_current = self.generate_sample(X_current, f_current)
+            if t % 100 == 0:
+                print "Burn-in: {0}/{1}".format(t, burn_in)
+
+        X = np.zeros((n_samples, self.n_dim))
+        f_X = np.zeros(n_samples)
+        X[0, :] = X_current
+        f_X[0] = f_current
+
+        for t in range(1, n_samples):
+            X_current, f_current = self.generate_sample(X_current, f_current)
+            X[t, :] = X_current
+            f_X[t] = f_current
+            if t % 100 == 0:
+                print "Chain sampling: {0}/{1}".format(t, n_samples)
+
+        return X, f_X
\ No newline at end of file
diff --git a/nice_plot_colors.py b/nice_plot_colors.py
new file mode 100644
index 0000000..c318b28
--- /dev/null
+++ b/nice_plot_colors.py
@@ -0,0 +1,31 @@
+import numpy as np
+
+lines = np.array([[57,106,177], [218,124,48], [62,150,81], [204,37,41], [83,81,84], [107,76,154], [146,36,40], [148,139,61]], dtype='float')/255
+bars = np.array([[114,147,203], [225,151,76], [132,186,91], [211,94,96], [128,133,133], [144,103,167], [171,104,87], [204,194,16]], dtype='float')/255
+
+# These look better in print
+vlines = ['cornflowerblue', 'green', 'firebrick', 'orange', 'black', 'indigo']
+vbars = ['steelblue', 'darkgreen', 'darkred', 'darkorange', 'grey', 'mediumvioletred']
+
+lines = np.hstack((lines, np.ones((lines.shape[0],1))))
+bars = np.hstack((bars, np.ones((bars.shape[0],1))))
+
+
+def darken(c,power=2):
+    co = np.array(c).copy()
+    co = np.clip(co**power, 0, 1.0)
+    co[-1] = c[-1]
+    return co
+
+
+def lighten(c, power=2):
+    co = 1.0-np.array(c).copy()
+    co = darken(co, power)
+    return 1.0-co
+
+
+def greyify(c, grey=0.5, target_level=0.5):
+    co = np.array(c).copy()
+    co = np.clip(co + grey*(target_level-co), 0, 1.0)
+    co[-1] = c[-1]
+    return co
diff --git a/ordinal_likelihoods.py b/ordinal_likelihoods.py
new file mode 100644
index 0000000..fe0dd9a
--- /dev/null
+++ b/ordinal_likelihoods.py
@@ -0,0 +1,128 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import GPpref
+import nice_plot_colors as npc
+import plot_tools
+from scipy.stats import norm
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
+
+def beta_discrete(b, f, n_cat=5):
+    P = np.zeros(n_cat+1, dtype='float')
+    for i in range(1, n_cat+1):
+        P[i] = b.cdf(float(i)/n_cat, f) - P[0:i].sum()
+    return P[1:]
+
+
+# Examine how continuous variable is converted to discrete categories through beta distribution and logistic ordinal
+n_ords = 5
+
+n_f = 101
+f = np.linspace(-3.0, 3.0, n_f)
+
+# Integrated beta
+sigma = 1.0
+v = 15.0
+beta_likelihood = GPpref.AbsBoundProbit(sigma=sigma, v=v)
+Py = np.zeros((n_f, n_ords), dtype='float')
+for i, fi in enumerate(f):
+    Py[i] = beta_discrete(beta_likelihood, fi, n_ords)
+
+fh, ah = plt.subplots()
+beta_lines = []
+for i, p in enumerate(Py.T):
+    beta_lines.extend(ah.plot(f, p, color=npc.lines[i]))
+ah.legend(beta_lines, ['$p(y={0}|f)$'.format(ni+1) for ni in range(n_ords)])
+
+# Logistic ordinal
+ord_sigma = 0.25
+ord_b = 1.2
+b = np.linspace(-ord_b, ord_b, n_ords-1)   # Log breakpoints
+Py_ord = np.zeros((n_f, n_ords), dtype='float')
+z = lambda ff, bb: (bb-ff) / ord_sigma
+Py_ord[:, 0] = norm.cdf(z(f, b[0]))
+Py_ord[:, -1] = 1.0-norm.cdf(z(f, b[-1]))
+for i in range(1, n_ords-1):
+    Py_ord[:,i] = norm.cdf(z(f, b[i])) - norm.cdf(z(f, b[i-1]))
+
+ord_lines = []
+for i, p in enumerate(Py_ord.T):
+    ord_lines.extend(ah.plot(f, p, color=npc.lines[i], ls='--'))
+
+ah.set_xlabel('$f(x)$')
+ah.set_ylabel('$p(y|f)$')
+
+
+# Testing the Ordinal Probit class in GPPref
+ord_kwargs = {'sigma': ord_sigma, 'b': ord_b, 'n_ordinals': n_ords, 'eps': 1.0e-5}
+orpro = GPpref.OrdinalProbit(**ord_kwargs)
+Py_c = np.zeros((n_f, n_ords), dtype='float')
+dPy_c = np.zeros((n_f, n_ords), dtype='float')
+d2Py_c = np.zeros((n_f, n_ords), dtype='float')
+fv = np.atleast_2d(f).T
+y = np.ones(fv.shape, dtype='int')
+for i in range(n_ords):
+    d2p, dp, p = orpro.derivatives(y*(i+1), fv)
+    d2Py_c[:,i], dPy_c[:,i], Py_c[:,i] = d2p.diagonal(), dp.flat, p.flat
+
+fh2, ah2 = plt.subplots(2, 2)
+ah2 = ah2.flatten()
+ah2[0].plot(f, np.exp(Py_c))
+ah2[0].set_title('$P(y|f(x))$')
+ah2[0].set_ylabel('$P(y)$')
+
+ah2[1].plot(f, Py_c)
+ah2[1].set_title('$ln P(y|f(x))$')
+ah2[1].set_ylabel('$ln P(y)$')
+
+ah2[2].plot(f, dPy_c)
+ah2[2].set_title('$\partial ln P(y|f(x) / \partial f(x)$')
+ah2[2].set_ylabel('$\partial ln P(y) / \partial f(x)$')
+
+ah2[3].plot(f, d2Py_c)
+ah2[3].set_title('$\partial^2 l(y,f(x)) / \partial f(x)^2$')
+ah2[3].set_ylabel('$\partial^2 ln P(y) / \partial f(x)^2$')
+for a in ah2:
+    a.set_xlabel('$f(x)$')
+
+
+# Test the sampler
+def fun(x):
+    return x
+    # return 2.5*np.sin(x/2.0)
+obs_fun = GPpref.AbsObservationSampler(fun, 'OrdinalProbit', ord_kwargs)
+x_plot = f
+x_test = np.atleast_2d(x_plot).T
+f_true = obs_fun.f(x_test)
+y_samples = np.atleast_2d(np.arange(1, n_ords+1, 1, dtype='int')).T
+p_y_array = obs_fun.observation_likelihood_array(x_test, y_samples)
+z, mu = obs_fun.generate_observations(x_test)
+
+fh3, ah3 = plt.subplots(1, 2)
+plot_tools.plot_with_bounds(ah3[0], x_test, f_true, ord_sigma, c=npc.lines[0])
+abs_extent = [x_test[0, 0], x_test[-1, 0], y_samples[0, 0]-0.5, y_samples[-1, 0]+0.5]
+h_pat = ah3[1].imshow(p_y_array, origin='lower', extent=abs_extent, aspect='auto')
+ah3[1].plot(x_test, z, 'w+')
+h_yt, = ah3[1].plot(x_test, mu, c=npc.lines[1])
+ah3[1].legend([h_yt], ['$E[y]$'])
+fh3.colorbar(h_pat, ax=ah3[1])
+
+
+# Plot some beta likelihood stuff for Thane
+# beta_likelihood = GPpref.AbsBoundProbit(sigma=1.0, v=25.0)
+# y = np.atleast_2d(np.linspace(0.0, 1.0, 201)).T
+# f = np.linspace(-2.0, 2.0, 5)
+# p_y = np.zeros((y.shape[0], len(f)), dtype='float')
+# for i, fxi in enumerate(f):
+#     p_y[:, i:i + 1] = beta_likelihood.likelihood(y, fxi)
+# fh4, ah4 = plt.subplots()
+# h_b = plt.plot(y, p_y)
+# ah4.legend(h_b, ['$f = {0:0.1f}$'.format(fi) for fi in f])
+# ah4.set_ylim([0, 10.0])
+# ah4.set_xlabel('$y$')
+# ah4.set_ylabel('$p(y|f)$')
+# ah4.set_title('Symmetric beta likelihood')
+plt.show(block=False)
+
+
+
diff --git a/plot_acquisition.py b/plot_acquisition.py
new file mode 100644
index 0000000..26f97c1
--- /dev/null
+++ b/plot_acquisition.py
@@ -0,0 +1,135 @@
+# Simple 1D GP classification example
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+import GPpref
+import plot_tools as ptt
+import active_learners
+import test_data
+from matplotlib.backends.backend_pdf import PdfPages
+
+nowstr = time.strftime("%Y_%m_%d-%H_%M")
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
+
+save_plots = False
+
+# log_hyp = np.log([0.1,0.5,0.1,10.0]) # length_scale/s, sigma_f, sigma_n_rel, sigma_beta, v_beta
+# log_hyp = np.log([0.07, 0.75, 0.25, 1.0, 28.1])
+# log_hyp = np.log([0.05, 1.5, 0.09, 2.0, 50.0])
+log_hyp = np.log([0.02, 0.5, 0.1, 0.8, 60.0])
+np.random.seed(2)
+
+n_rel_train = 1
+n_abs_train = 1
+rel_sigma = 0.05
+delta_f = 1e-5
+
+beta_sigma = 0.8
+beta_v = 80.0
+
+n_xplot = 101
+n_mcsamples = 1000
+n_ysamples = 101
+
+n_queries = 10
+
+# Define polynomial function to be modelled
+# true_function = test_data.multi_peak
+# random_wave = test_data.VariableWave([0.6, 1.0], [5.0, 10.0], [0.0, 1.0], [10.0, 20.0])
+random_wave = test_data.MultiWave(amp_range=[0.6, 1.2], f_range=[10.0, 30.0], off_range=[0.1, 0.9],
+                                     damp_range=[250.0, 350.0], n_components=3)
+random_wave.randomize()
+# random_wave.set_values(1.0, 6.00, 0.2, 10.50)
+true_function = random_wave.out
+random_wave.print_values()
+
+if save_plots:
+    nowstr = time.strftime("%Y_%m_%d-%H_%M")
+    fig_dir = 'fig/' + nowstr + '/'
+    ptt.ensure_dir(fig_dir)
+    print "Figures will be saved to: {0}".format(fig_dir)
+    pdf_pages = PdfPages(fig_dir+'posterior_all.pdf')
+    acq_pdf = PdfPages(fig_dir+'acquisition.pdf')
+
+rel_obs_fun = GPpref.RelObservationSampler(true_function, GPpref.PrefProbit(sigma=rel_sigma))
+abs_obs_fun = GPpref.AbsObservationSampler(true_function, GPpref.AbsBoundProbit(sigma=beta_sigma, v=beta_v))
+
+# True function
+x_plot = np.linspace(0.0, 1.0, n_xplot,dtype='float')
+x_test = np.atleast_2d(x_plot).T
+f_true = abs_obs_fun.f(x_test)
+mu_true = abs_obs_fun.mean_link(x_test)
+mc_samples = np.random.normal(size=n_mcsamples)
+abs_y_samples = np.atleast_2d(np.linspace(0.01, 0.99, n_ysamples)).T
+p_abs_y_true = abs_obs_fun.observation_likelihood_array(x_test, abs_y_samples)
+p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test)
+
+# Training data
+x_rel, uvi_rel, uv_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(n_rel_train, n_xdim=1)
+x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(n_abs_train, n_xdim=1)
+
+
+# Plot true functions
+fig_t, (ax_t_l, ax_t_a, ax_t_r) = ptt.true_plots(x_test, f_true, mu_true, rel_sigma,
+                                                 abs_y_samples, p_abs_y_true, p_rel_y_true,
+                                                 x_abs, y_abs, uv_rel, fuv_rel, y_rel,
+                                                 t_l=r'True latent function, $f(x)$')
+if save_plots:
+    fig_t.savefig(fig_dir+'true.pdf', bbox_inches='tight')
+
+# Construct active learner object
+# learner = active_learners.UCBAbsRelD(x_rel, uvi_rel, x_abs,  y_rel, y_abs, delta_f=delta_f,
+#                                          rel_likelihood=GPpref.PrefProbit(), abs_likelihood=GPpref.AbsBoundProbit())
+# # obs_arguments = {'req_improvement': 0.60, 'n_test': 50, 'gamma': 2.0, 'n_rel_samples': 5, 'p_thresh': 0.7}
+# obs_arguments = {'n_test': 100, 'p_rel': 0.5, 'n_rel_samples': 5, 'gamma': 2.0}
+
+learner = active_learners.ExpectedImprovementRel(x_rel, uvi_rel, x_abs,  y_rel, y_abs, delta_f=delta_f,
+                                         rel_likelihood=GPpref.PrefProbit(), abs_likelihood=GPpref.AbsBoundProbit())
+acq_fun = learner.expected_improvement
+obs_arguments = {'n_test': 100, 'zeta': 0.1}
+
+# Get initial solution
+learner.set_hyperparameters(log_hyp)
+f = learner.solve_laplace()
+learner.print_hyperparameters()
+
+if save_plots:
+    fig_p, (ax_p_l, ax_p_a, ax_p_r), h_plotobj = learner.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel,
+                                                                    abs_y_samples, mc_samples)
+    fig_a, ax_a = plt.subplots()
+    pdf_pages.savefig(fig_p, bbox_inches='tight')
+    # fig_p.savefig(fig_dir+'posterior00.pdf', bbox_inches='tight')
+
+for obs_num in range(n_queries):
+    obs_arguments['p_rel'] = max(0.0, (n_queries - obs_num) / float(n_queries))
+    next_x = learner.select_observation(**obs_arguments)
+    y_acq = learner.expected_improvement()
+    if next_x.shape[0] == 1:
+        next_y, next_f = abs_obs_fun.generate_observations(next_x)
+        learner.add_observations(next_x, next_y)
+        print 'Abs: x:{0}, y:{1}'.format(next_x[0], next_y[0])
+    else:
+        next_y, next_uvi, next_fx = rel_obs_fun.gaussian_multi_pairwise_sampler(next_x)
+        next_fuv = next_fx[next_uvi][:, :, 0]
+        fuv_rel = np.concatenate((fuv_rel, next_fuv), 0)
+        learner.add_observations(next_x, next_y, next_uvi)
+        print 'Rel: x:{0}, best_index:{1}'.format(next_x.flatten(), next_uvi[0, 1])
+
+    f = learner.solve_laplace()
+    if save_plots:
+        fig_p, (ax_p_l, ax_p_a, ax_p_r), h_plotobj = learner.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel,
+                                                                        abs_y_samples, mc_samples)
+        pdf_pages.savefig(fig_p, bbox_inches='tight')
+        # fig_p.savefig(fig_dir+'posterior{0:02d}.pdf'.format(obs_num+1), bbox_inches='tight')
+        plt.close(fig_p)
+
+learner.print_hyperparameters()
+
+if not save_plots:
+    fig_p, (ax_p_l, ax_p_a, ax_p_r), h_plotobj = learner.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel,
+                                                                    abs_y_samples, mc_samples)
+else:
+    pdf_pages.close()
+
+plt.show()
diff --git a/plot_statruns.py b/plot_statruns.py
new file mode 100644
index 0000000..eefa2ef
--- /dev/null
+++ b/plot_statruns.py
@@ -0,0 +1,326 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle
+import yaml
+import test_data
+import GPpref
+import active_learners
+from Tkinter import Tk
+from tkFileDialog import askdirectory
+import argparse
+# from test_data import ObsObject
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
+
+
+def single_plot(data, x=None, names=None, title='', xlabel='Number of samples', ylabel='', bars=False, percentile=0, precut=0, errorevery=5):
+    data = data[:,precut:,:]
+    n_trials = data.shape[2]
+
+    if x is None:
+        x = np.arange(data.shape[1])+precut
+
+    hf, hax = plt.subplots()
+    hl = []
+    for dd in data:
+        mean_err = np.nanmean(dd, axis=1)
+        if percentile < 0:
+            err_lo = np.nanstd(dd, axis=1, ddof=1) / np.sqrt(n_trials)
+            err_hi = err_lo
+        elif percentile == 0:
+            err_lo = np.nanstd(dd, axis=1)
+            err_hi = err_lo
+        else:
+            err_lo = mean_err - np.percentile(dd, percentile, axis=1)
+            err_hi = np.percentile(dd, percentile + 50, axis=1) - mean_err
+        err = np.array([err_lo, err_hi])
+
+        if bars:
+            hl.append(hax.errorbar(x, mean_err, yerr=err, capsize=2.0, errorevery=errorevery))
+        else:
+            hl.append(hax.plot(x, mean_err)[0])
+    hax.legend(hl, names, loc='best')
+    hax.set_title(title)
+    hax.set_xlabel(xlabel)
+    hax.set_ylabel(ylabel)
+    return hf, hax
+
+def plot_results(wrms_results, true_pos_results, selected_error, obs_array, relative_error=None, data_dir=None, bars=True, norm_comparator=0, exclusions=[]):
+    methods_indexes = []
+    for i in range(wrms_results.shape[0]):
+        if i not in exclusions:
+            methods_indexes.append(i)
+    methods_indexes = np.array(methods_indexes)
+
+    names = [obs_array[i]['name'] for i in methods_indexes]
+
+    wrms_results=wrms_results[methods_indexes,:,:]
+    true_pos_results=true_pos_results[methods_indexes,:,:]
+    selected_error=selected_error[methods_indexes,:,:]
+
+
+    f0, ax0 = single_plot(wrms_results, names=names, ylabel='Weighted RMSE', bars=bars, percentile=25)
+    f1, ax1 = single_plot(true_pos_results, names=names, ylabel='True positive selections (out of 30)', bars=True, precut=1, percentile=25)
+    f2, ax2 = single_plot(selected_error, names=names, ylabel='RMSE of best paths', bars=True, precut=1, percentile=25)
+    f = [f0, f1, f2]
+    ax = [ax0, ax1, ax2]
+
+    try:
+        norm_wrms = wrms_results/wrms_results[norm_comparator]
+        f3, ax3 = single_plot(norm_wrms, names=names, title='Normalized weighted RMSE', bars=bars, percentile=0)
+        f.append(f3)
+        ax.append(ax3)
+    except:
+        pass
+
+    if relative_error is not None:
+        relative_error=relative_error[methods_indexes,:,:]
+        fr, axr = single_plot(relative_error, names=names, ylabel='Mean relative prediction error', bars=True)
+        f.append(fr); ax.append(axr)
+        if data_dir is not None:
+            fr.savefig(data_dir + 'rel_error.pdf', bbox_inches='tight')
+
+    if data_dir is not None:
+        f0.savefig(data_dir + 'wrms.pdf', bbox_inches='tight')
+        f1.savefig(data_dir + 'true_pos.pdf', bbox_inches='tight')
+        f2.savefig(data_dir + 'rms_best.pdf', bbox_inches='tight')
+    # hl = []
+    # for i in range(mean_err.shape[0]):
+    #     hl.append(plt.errorbar(np.arange(mean_err.shape[1]), mean_err[i,:], yerr=std_err[i, :]))
+    # plt.legend(hl, names)
+    plt.show()
+    return f
+
+
+def _save_file(file, data):
+    with open(file, 'wb') as fh:
+        pickle.dump(data, fh)
+    return
+
+def save_data(data_dir, wrms, true_pos, sel_err, obs, rel_err=None, full_obs=None, t=None):
+    if t is None:
+        t = wrms.shape[2]
+    _save_file(data_dir + 'wrms.pkl', wrms[:,:,:t])
+    _save_file(data_dir + 'true_pos.pkl', true_pos[:,:,:t])
+    _save_file(data_dir + 'selected_error.pkl', sel_err[:,:,:t])
+    _save_file(data_dir + 'obs.pkl', obs)
+    if rel_err is not None:
+        _save_file(data_dir + 'relative_error.pkl', rel_err[:,:,:t])
+    if full_obs is not None:
+        fobs = {key:full_obs[key][:t] for key in full_obs}
+        _save_file(data_dir + 'full_obs.pkl', fobs)
+
+def _load_file(file):
+    with open(file, 'rb') as fh:
+        data = pickle.load(fh) # Dimensions n_learners, n_queries+1, n_trials
+    return data
+
+def _load_params(file):
+    with open(file, 'rt') as fh:
+        params = yaml.safe_load(fh)
+    return params
+
+def get_data_dir():
+    Tk().withdraw()  # we don't want a full GUI, so keep the root window from appearing
+    data_dir = askdirectory(initialdir='./data/')
+    if data_dir == '':  # Cancel clicked
+        raise IOError('No directory selected')
+    elif data_dir[-1] is not '/':
+        data_dir += '/'
+    return data_dir
+
+def load_data(data_dir=None):
+    if data_dir is None:
+        data_dir = get_data_dir()
+
+    wrms_results = _load_file(data_dir+'wrms.pkl')
+    true_pos_results = _load_file(data_dir+'true_pos.pkl')
+    selected_error = _load_file(data_dir+'selected_error.pkl')
+    obs_array = _load_file(data_dir+'obs.pkl')
+    try:
+        relative_error = _load_file(data_dir+'relative_error.pkl')
+    except IOError:
+        print "No relative error data found."
+        relative_error = None
+    try:
+        full_obs = _load_file(data_dir+'full_obs.pkl')
+    except IOError:
+        print "No full observation set found."
+        full_obs = None
+    return data_dir, wrms_results, true_pos_results, selected_error, obs_array, relative_error, full_obs
+
+
+def load_and_plot(save_plots=True, data_dir=None, *args, **kwargs):
+    try:
+        data_dir, wrms_results, true_pos_results, selected_error, obs_array, relative_error, full_obs = load_data(data_dir)
+    except IOError:
+        return None
+
+    if save_plots is False:
+        data_dir = None
+    hf = plot_results(wrms_results, true_pos_results, selected_error, obs_array, data_dir=data_dir, relative_error=relative_error, *args, **kwargs)
+    plt.show()
+    return hf
+
+def get_selection():
+    _dd, wrms, true_pos, sel_err, obs, rel_err, full_obs = load_data()
+    print "The following models were found:"
+    for i, obs_item in enumerate(obs):
+        print "{0}: {1}".format(i, obs_item['name'])
+    dex = input('Select desired models by index as an array: ')
+    wrms, true_pos, sel_err = wrms[dex, :, :], true_pos[dex, :, :], sel_err[dex, :, :]
+    if rel_err is not None:
+        rel_err = rel_err[dex, :, :]
+    obs = [obs[i] for i in dex]
+    # finished = input('Finished? (0/1)')
+    return wrms, true_pos, sel_err, obs, rel_err
+
+
+def load_multiple(*args, **kwargs):
+    wrms, true_pos, sel_err, obs, rel_err = get_selection()
+    app = lambda a, b: np.append(a, b, axis=0)
+    while True:
+        try:
+            wrms2, true_pos2, sel_err2, obs2, rel_err2 = get_selection()
+        except IOError:
+            break
+        wrms, true_pos, sel_err = app(wrms, wrms2), app(true_pos, true_pos2), app(sel_err, sel_err2)
+        if rel_err is not None: rel_err = app(rel_err, rel_err2)
+        obs.extend(obs2)
+    hf = plot_results(wrms, true_pos, sel_err, obs, relative_error=rel_err, *args, **kwargs)
+    plt.show()
+    if 'data_dir' in kwargs:
+        save_data(kwargs['data_dir'], wrms, true_pos, sel_err, obs, rel_err)
+        print 'Data saved to {0}'.format(kwargs['data_dir'])
+    return hf, wrms, true_pos, sel_err, obs, rel_err
+
+
+def build_ordinal_wrms(max_y = 0.8, data_dir = None, make_plots = True, *args, **kwargs):
+    if data_dir is None:
+        data_dir = get_data_dir()
+    run_parameters = _load_params(data_dir+'params.yaml')
+    wave_data = _load_file(data_dir + 'wave_data.pkl')
+    full_obs = _load_file(data_dir + 'full_obs.pkl')
+    random_wave = test_data.MultiWave(**run_parameters['wave_params'])
+
+    if full_obs is None:
+        raise IOError('full_obs.pkl not found, cannot reconstruct without full observation history')
+
+    log_hyp = np.log(run_parameters['hyperparameters'])
+    n_learners, n_queries = len(run_parameters['learners']), run_parameters['statrun_params']['n_queries']
+    n_trials = len(full_obs[run_parameters['learners'][0]['name']])
+    x_plot = np.linspace(0.0, 1.0, run_parameters['statrun_params']['n_xtest'], dtype='float')
+    x_test = np.atleast_2d(x_plot).T
+
+    wkld = np.zeros((n_learners, n_queries+1, n_trials), dtype='float')
+    max_count = np.zeros((n_learners, n_queries+1, n_trials), dtype='float')
+
+    learners, names = [], []
+    for l in run_parameters['learners']:
+        if 'n_rel_samples' in l['obs_args']:
+            l['obs_args']['n_rel_samples'] = run_parameters['statrun_params']['n_rel_samples']
+        learners.append(active_learners.Learner(**l))
+        names.append(l['name'])
+
+    for trial_number in range(n_trials):
+        print 'Trial {0}'.format(trial_number)
+        a, f, o, d = wave_data.amplitude[trial_number], wave_data.frequency[trial_number], wave_data.offset[trial_number], wave_data.damping[trial_number]
+        random_wave.set_values(a, f, o, d)
+        random_wave.print_values()
+        abs_obs_fun = GPpref.AbsObservationSampler(random_wave.out, run_parameters['GP_params']['abs_likelihood'],
+                                                   run_parameters['abs_obs_params'])
+
+        min_max_label = np.floor(max_y * abs_obs_fun.l.y_list[-1])
+
+        p_y_true = abs_obs_fun.observation_likelihood_array(x_test)
+        true_y = abs_obs_fun.l.y_list[p_y_true.argmax(axis=0)]     # True maximum likelihood labels
+        max_y_true = (true_y >= min_max_label)    # True best label indexes (bool array)
+        n_max = float(max_y_true.sum())
+
+        for obs_num in range(0, n_queries+1):
+            for nl, learner in enumerate(learners):
+                # Get initial observations and build models
+                if obs_num == 0:
+                    obs0 = full_obs[learner.name][trial_number][0]
+                    x_rel, uvi_rel, x_abs, y_rel, y_abs = obs0.x_rel, obs0.uvi_rel, obs0.x_abs, obs0.y_rel, obs0.y_abs
+                    model_kwargs = {'x_rel': x_rel, 'uvi_rel': uvi_rel, 'x_abs': x_abs, 'y_rel': y_rel, 'y_abs': y_abs,
+                                    'rel_kwargs': run_parameters['rel_obs_params'],
+                                    'abs_kwargs': run_parameters['abs_obs_params']}
+                    model_kwargs.update(run_parameters['GP_params'])
+                    learner.build_model(model_kwargs)
+                    learner.model.set_hyperparameters(log_hyp)
+
+                else:
+                    next_x, next_y, next_uvi = full_obs[learner.name][trial_number][obs_num]
+                    learner.model.add_observations(next_x, next_y, next_uvi)
+
+                learner.model.solve_laplace()
+                fhat, vhat = learner.model.predict_latent(x_test)
+                p_y_est, mu_est = learner.model.abs_likelihood.posterior_likelihood(fhat, vhat)
+                est_y = abs_obs_fun.l.y_list[p_y_est.argmax(axis=0)]
+                max_y_est = (est_y >= min_max_label)
+
+                wkld[nl, obs_num, trial_number] = test_data.ordinal_kld(p_y_true, p_y_est, np.maximum(true_y, est_y))
+                max_count[nl, obs_num, trial_number] = np.logical_and(max_y_true, max_y_est).sum() / n_max
+
+
+    _save_file(data_dir+'wkld.pkl', wkld)
+    _save_file(data_dir+'max_count.pkl', max_count)
+    if make_plots:
+        plot_ordinal_results(wkld, max_count, run_parameters=run_parameters, data_dir=data_dir)
+
+def load_ordinal_results(data_dir = None):
+    if data_dir is None:
+        data_dir = get_data_dir()
+
+    wkld = _load_file(data_dir+'wkld.pkl')
+    max_count = _load_file(data_dir+'max_count.pkl')
+
+    plot_ordinal_results(wkld, max_count, data_dir=data_dir)
+
+def plot_ordinal_results(wkld, max_count, run_parameters = None, data_dir=None, bars=True, exclusions=[]):
+    if run_parameters is None:
+        with open(data_dir + 'params.yaml', 'rt') as fh:
+            run_parameters = yaml.safe_load(fh)
+
+    names = []
+    for l in run_parameters['learners']:
+        names.append(l['name'])
+
+    methods_indexes = []
+    for i in range(len(names)):
+        if i not in exclusions:
+            methods_indexes.append(i)
+    methods_indexes = np.array(methods_indexes)
+
+    wkld=wkld[methods_indexes,:,:]
+    max_count=max_count[methods_indexes,:,:]
+
+    f0, ax0 = single_plot(wkld, names=names, ylabel='Weighted KLD', bars=bars, percentile=-1)
+    f1, ax1 = single_plot(max_count, names=names, ylabel='Proportion of selected max points', bars=True, precut=1, percentile=-1)
+    f = [f0, f1]
+    ax = [ax0, ax1]
+
+    if data_dir is not None:
+        f0.savefig(data_dir + 'wkld.pdf', bbox_inches='tight')
+        f1.savefig(data_dir + 'max_count.pdf', bbox_inches='tight')
+    plt.show()
+    return f
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Statrun plots for active learning preference GP')
+    parser.add_argument('-np', '--no-plots', dest='make_plots', action='store_false',
+                        help='Turn off plots (default False)')
+    parser.add_argument('-ow', '--ordinal-wrms', dest='owrms', action='store_true',
+                        help='Build ordinal wrms results (default False)')
+    parser.add_argument('-lm', '--load-multiple', dest='load_multiple', action='store_true',
+                        help='Load from multiple directories (default False)')
+    parser.add_argument('-d', '--data-dir', default=None, help='Data directory')
+    args = parser.parse_args()
+
+    if args.owrms:
+        build_ordinal_wrms(data_dir=args.data_dir, make_plots=args.make_plots)
+    if args.load_multiple:
+        load_multiple(save_plots=args.make_plots)
+    else:
+        load_and_plot(save_plots=args.make_plots, bars=True)
\ No newline at end of file
diff --git a/plot_tools.py b/plot_tools.py
new file mode 100644
index 0000000..faebfee
--- /dev/null
+++ b/plot_tools.py
@@ -0,0 +1,301 @@
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.patches import Polygon
+import matplotlib.cm as cm
+import matplotlib.colors
+from nice_plot_colors import *
+from cycler import cycler
+from mpl_toolkits.mplot3d.art3d import Line3DCollection
+# plt.rc('axes', prop_cycle=(cycler('color', [greyify(c, .5, .8) for c in reversed(lines)])))
+plt.rc('axes', prop_cycle=(cycler('color', lines)))
+
+
+def xgen(xp, n_dim):
+    for i in range(n_dim):
+        yield xp
+
+
+def make_meshlist(x_plot, d_x):
+    xx = xgen(x_plot, d_x)
+    x_mesh = np.vstack(np.meshgrid(*xx, copy=False)).reshape(d_x, -1).T
+    return x_mesh
+
+
+def make_poly_array(x,y,sigma):
+    nx = len(x)
+    sigma = np.atleast_2d(sigma)
+    xy = np.zeros((2*nx, 2))
+    xy[:,0] = np.append(x, x[::-1])
+    xy[:,1] = np.append(y-sigma, y[::-1]+sigma[::-1])
+    return xy
+
+
+def plot_with_bounds(ax, x, y, s, c=lines[0], lw=1.5, *args, **kwargs):
+    isort = np.argsort(x.flat)
+    xx, yy = x[isort], y[isort]
+    try:
+        ss = s[isort]
+    except:
+        ss = s
+    xy = make_poly_array(xx, yy, ss)
+    h_patch = Polygon(xy, ec=c, fc=lighten(c, 3), alpha=0.5)
+    h_fx, = ax.plot(xx, yy, lw=lw, c=c, *args, **kwargs)
+    ax.add_patch(h_patch)
+    clim = ax.get_ylim()
+    ax.set_ylim(bottom = min(clim[0],xy[:,1].min()), top = max(clim[1], xy[:,1].max()))
+    return h_fx, h_patch
+
+
+def _set_subplot_labels(ax, xlabel='$x$', ylabel='$f(x)$', title=''):
+    ax.set_title(title)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+
+
+def plot_setup_1drel(t_l = r'Latent function, $f(x)$', t_r = r'Relative likelihood, $P(x_0 \succ x_1 | f(x_0), f(x_1))$', ax=None, **kwargs):
+    if ax is None:
+        fig, (ax_l, ax_r) = plt.subplots(1, 2, **kwargs)
+    else:
+        (ax_l, ax_r) = ax
+        fig = ax_l.figure
+    fig.set_size_inches(8.7, 3.2)
+    _set_subplot_labels(ax_l, title=t_l)                                    # Latent function
+    _set_subplot_labels(ax_r, xlabel='$x_0$', ylabel='$x_1$', title=t_r)    # Relative likelihood
+    return fig, (ax_l, ax_r)
+
+
+def plot_setup_1d(t_l = r'Latent function, $f(x)$', t_a = r'Absolute likelihood, $p(y | f(x))$',
+                  t_r = r'Relative likelihood, $P(x_0 \succ x_1 | f(x_0), f(x_1))$', ax = None, **kwargs):
+
+    if ax is None:
+        fig = plt.figure()
+        ax_l = fig.add_subplot(131, **kwargs)
+        ax_a = fig.add_subplot(132, **kwargs)
+        ax_r = fig.add_subplot(133, **kwargs)
+        fig.set_size_inches(14.7, 3.5)
+    else:
+        (ax_l, ax_a, ax_r) = ax
+        fig = ax_l.figure
+
+    _set_subplot_labels(ax_l, title=t_l)                                    # Latent function
+    _set_subplot_labels(ax_a, ylabel='$y$', title=t_a)                      # Absolute likelihood
+    _set_subplot_labels(ax_r, xlabel='$x_0$', ylabel='$x_1$', title=t_r)    # Relative likelihood
+
+    return fig, (ax_l, ax_a, ax_r)
+
+
+def plot_relative_queries(ax, uvr_train, fuvr_train, yr_train, class_icons, marker_options=None):
+    for uv, fuv, y in zip(uvr_train, fuvr_train, yr_train):
+        ax.plot(uv, fuv, 'b-', color=lighten(lines[0]), lw=0.8)
+        ax.plot(uv[(y + 1) / 2], fuv[(y + 1) / 2], class_icons[(y[0] + 1) / 2], **marker_options)
+
+
+def plot_absolute_likelihood(ax, p_a_y, xt, mu_t, y_samples, E_y=None, xa_train=None, ya_train=None):
+    dely = y_samples[1, 0] - y_samples[0, 0]
+    abs_extent = [xt[0, 0], xt[-1, 0], y_samples[0, 0] - 0.5*dely, y_samples[-1, 0] + 0.5*dely]
+    vmax = max(1.0, p_a_y.max())
+    h_pat = ax.imshow(p_a_y, origin='lower', extent=abs_extent, aspect='auto', vmin=0.0, vmax=vmax)
+    if xa_train is not None and xa_train.shape[0] > 0:
+        ax.plot(xa_train, ya_train, 'w+')
+    h_lines = ax.plot(xt, mu_t, c=lines[0])
+    legend_entries = [r'True mean, $E[y]$']
+    if E_y is not None:
+        h_lines.extend(ax.plot(xt, E_y, color=lines[3]))
+        legend_entries.append(r'Posterior mean, $E_{p(y|\mathcal{Y})}\left[y\right]$')
+    ax.legend(h_lines, legend_entries)
+    ax.set_xlim(xt[0], xt[-1])
+    ax.get_figure().colorbar(h_pat, ax=ax)
+    return (h_pat, h_lines)
+
+def plot_relative_likelihood(ax, p_y, extent, uvr_train=None, yr_train=None, class_icons=None, marker_options=None):
+    h_p = ax.imshow(p_y, origin='lower', extent=extent, vmin=0.0, vmax=1.0)
+    h_pc = ax.contour(p_y, levels=[0.5], origin='lower', linewidths=2, extent=extent)
+    plt.clabel(h_pc, inline=1, fontsize=10)
+    ax.get_figure().colorbar(h_p, ax=ax)
+
+    if uvr_train is not None: # and xt.shape[0] > 0:
+        for uv, y in zip(uvr_train, yr_train):
+            ax.plot(uv[0], uv[1], class_icons[(y[0] + 1) / 2], **marker_options)
+    return (h_p, h_pc)
+
+def true_plots(xt, ft, mu_t, rel_sigma, y_samples, p_a_y, p_r_y=None, xa_train=None, ya_train=None, uvr_train=None, fuvr_train=None, yr_train=None,
+               class_icons=['ko', 'wo'], marker_options={'mec':'k', 'mew':0.5}, *args, **kwargs):
+    if p_r_y is None:
+        return true_plots2D(xt, ft, mu_t, rel_sigma, y_samples, p_a_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train,
+               class_icons, marker_options, *args, **kwargs)
+
+    # Plot true function, likelihoods and observations
+    fig, (ax_l, ax_a, ax_r) = plot_setup_1d(**kwargs)
+
+    # True latent
+    plot_with_bounds(ax_l, xt, ft, rel_sigma, c=lines[0])
+    if uvr_train is not None:
+        plot_relative_queries(ax_l, uvr_train, fuvr_train, yr_train, class_icons, marker_options)
+
+    # True absolute likelihood
+    h_pat = plot_absolute_likelihood(ax_a, p_a_y, xt, mu_t, y_samples, xa_train=xa_train, ya_train=ya_train)
+
+    # True relative likelihood
+    rel_y_extent = [xt[0, 0], xt[-1, 0], xt[0, 0], xt[-1, 0]]
+    h_prt = plot_relative_likelihood(ax_r, p_r_y, rel_y_extent, uvr_train, yr_train, class_icons, marker_options)
+    return fig, (ax_l, ax_a, ax_r)
+
+
+def estimate_plots(xt, ft, mu_t, fhat, vhat, E_y, rel_sigma,
+                   y_samples, p_a_y, p_r_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train,
+                   class_icons = ['ko', 'wo'], marker_options = {'mec':'k', 'mew':0.5}, n_posterior_samples=0,
+                   posterior_plot_kwargs={}, **kwargs):
+    if p_r_y is None:
+        return estimate_plots2D(xt, ft, mu_t, fhat, vhat, E_y, rel_sigma,
+                         y_samples, p_a_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train,
+                         class_icons, marker_options, **kwargs)
+
+    fig, (ax_l, ax_a, ax_r) = plot_setup_1d(**kwargs)
+
+    # Posterior samples
+    if n_posterior_samples > 0:
+        f_post = np.random.multivariate_normal(fhat.flatten(), vhat, n_posterior_samples)
+        h_pp = ax_l.plot(xt, f_post.T, lw=0.8, **posterior_plot_kwargs)
+    else:
+        h_pp = None
+
+    # Latent function
+    hf, hpf = plot_with_bounds(ax_l, xt, ft, rel_sigma, c=lines[0])
+    hf_hat, hpf_hat = plot_with_bounds(ax_l, xt, fhat, np.sqrt(np.atleast_2d(vhat.diagonal()).T), c=lines[1])
+    if uvr_train is not None:
+        plot_relative_queries(ax_l, uvr_train, fuvr_train, yr_train, class_icons, marker_options)
+
+    ax_l.legend([hf, hf_hat], [r'True latent function, $f(x)$', r'$\mathcal{GP}$ estimate $\hat{f}(x)$'])
+
+    # Absolute posterior likelihood
+    h_abs = plot_absolute_likelihood(ax_a, p_a_y, xt, mu_t, y_samples, E_y=E_y, xa_train=xa_train, ya_train=ya_train)
+
+    # Relative posterior likelihood
+    rel_y_extent = [xt[0, 0], xt[-1, 0], xt[0, 0], xt[-1, 0]]
+    h_rel = plot_relative_likelihood(ax_r, p_r_y, rel_y_extent, uvr_train, yr_train, class_icons, marker_options)
+
+    return fig, (ax_l, ax_a, ax_r)
+
+
+# 2D Plot tools
+
+def plot_setup_2d(t_l = r'Latent function, $f(x)$', t_a = r'Absolute likelihood, $p(y | f(x))$', t_r = None, ax = None, **kwargs):
+
+    if ax is None:
+        fig = plt.figure()
+        ax_l = fig.add_subplot(121, projection='3d', **kwargs)
+        ax_a = fig.add_subplot(122, projection='3d', **kwargs)
+        fig.set_size_inches(10.0, 3.5)
+    else:
+        (ax_l, ax_a) = ax
+        fig = ax_l.figure
+
+    # Latent function
+    ax_l.set_title(t_l)
+    ax_l.set_xlabel('$x_0$')
+    ax_l.set_ylabel('$x_1$')
+    ax_l.set_zlabel('$f(x)$')
+
+    # Absolute likelihood
+    ax_a.set_title(t_a)
+    ax_a.set_xlabel('$x_0$')
+    ax_a.set_ylabel('$x_1$')
+    ax_a.set_zlabel('$y$')
+    return fig, (ax_l, ax_a)
+
+def true_plots2D(xt, ft, mu_t, rel_sigma, y_samples, p_a_y, xa_train=None, ya_train=None, uvr_train=None, fuvr_train=None, yr_train=None,
+               class_icons=['ko', 'wo'], marker_options={'mec':'k', 'mew':0.5}, *args, **kwargs):
+
+    # Plot true function, likelihoods and observations
+    nx = int(np.sqrt(xt.shape[0]))
+    x0 = xt[0:nx, 0]  # Assuming generated using make_meshlist
+    x1 = xt[0:-1:nx, 1]
+    xx, yy = np.meshgrid(x0, x1)
+
+    fig, (ax_l, ax_a) = plot_setup_2d(**kwargs)
+
+    # True latent
+    ax_l.plot_surface(xx, yy, np.reshape(ft, (nx, nx)), color=lines[0])
+    # ax_l.imshow(np.reshape(ft, (nx, nx)), extent=[x0[0], x1[-1], x1[0], x1[1]], origin='lower', aspect='auto')
+
+    # True absolute likelihood
+    h_yt = ax_a.plot_wireframe(xx, yy, np.reshape(mu_t, (nx, nx)), color=lines[1])
+
+    norm_py = p_a_y/p_a_y.max()
+    cc = cm.get_cmap('Blues')
+    h_py = []
+    for y, py in zip(y_samples, norm_py):
+        h_py.append(ax_a.scatter(xt[:, 0], xt[:, 1], y, s=py*15.0, marker='o', c=cc(py)))
+
+    if xa_train is not None and xa_train.shape[0] > 0:
+        ax_a.scatter(xa_train[:, 0], xa_train[:, 1], ya_train, c='w', marker='+')
+    ax_a.legend([h_yt], [r'True mean $E[y]$'])
+    # ax_a.set_xlim(xt[0], xt[-1])
+
+    return fig, (ax_l, ax_a)
+
+
+def estimate_plots2D(xt, ft, mu_t, fhat, vhat, E_y, rel_sigma,
+                   y_samples, p_a_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train,
+                   class_icons = ['ko', 'wo'], marker_options = {'mec':'k', 'mew':0.5}, *args, **kwargs):
+
+    # Plot estimated function, likelihoods and observations
+    nx = int(np.sqrt(xt.shape[0]))
+    x0 = xt[0:nx, 0]  # Assuming generated using make_meshlist
+    x1 = xt[0:-1:nx, 1]
+    xx, yy = np.meshgrid(x0, x1)
+
+    fig, (ax_l, ax_a) = plot_setup_2d(**kwargs)
+    cc = cm.get_cmap('inferno')
+
+    # Latent function estimate
+    # hf =ax_l.plot_wireframe(xx, yy, np.reshape(ft, (nx, nx)), color=lines[0])
+    tfc = np.reshape(vhat.diagonal(), (nx, nx))
+    norm = matplotlib.colors.Normalize(vmin=0, vmax=tfc.max())
+    hf_hat = ax_l.plot_surface(xx, yy, np.reshape(fhat, (nx, nx)), facecolors=cc(norm(tfc)))
+    m = cm.ScalarMappable(cmap=cc, norm=norm)
+    m.set_array([])
+    # ax_cb = fig.colorbar(m, ax=ax_l)
+
+    # ax_l.imshow(np.reshape(fhat, (nx, nx)), extent=[x0[0], x1[-1], x1[0], x1[1]], origin='lower', aspect='auto')
+    # ax_l.legend([hf], [r'True latent function, $f(x)$']) #, r'$\mathcal{GP}$ estimate $\hat{f}(x)$'])
+
+    if uvr_train.shape[0] > 0:
+        rel_segments = np.zeros((uvr_train.shape[0], 2, 3))
+        rel_segments[:, :, 0:2] = uvr_train
+        rel_segments[:, :, 2] = fuvr_train
+        rel_lines = Line3DCollection(rel_segments, color=lines[3])
+        ax_l.add_collection(rel_lines)
+
+        rel_hipoints = np.array([uv[(i+1)/2] for uv, i in zip(rel_segments, yr_train.flat)])
+        ax_l.plot(rel_hipoints[:, 0], rel_hipoints[:, 1], rel_hipoints[:, 2], 'ko', **marker_options)
+
+    # Absolute posterior likelihood
+    # h_yt = ax_a.plot_wireframe(xx, yy, np.reshape(mu_t, (nx, nx)), color=lines[1])
+    hEy = ax_a.plot_wireframe(xx, yy, np.reshape(E_y, (nx, nx)), color=lines[3])
+
+    cc = cm.get_cmap('Blues')
+    norm_py = p_a_y/p_a_y.max()
+    h_points = []
+    for y, py in zip(y_samples, norm_py):
+        h_points.append(ax_a.scatter(xt[:, 0], xt[:, 1], y, s=py*15.0, marker='o', c=cc(py)))
+
+    if xa_train.shape[0] > 0:
+        ax_a.plot(xa_train[:,0], xa_train[:,1], ya_train.flat, 'r^', color=lines[1])
+    ax_a.legend([hEy], [r'Posterior mean, $E_{p(y|\mathcal{Y})}\left[y\right]$'])
+    # fig.colorbar(h_pap, ax=ax_a)
+
+    return fig, (ax_l, ax_a)
+
+
+def reset_axes2d(est_ax):
+    for ax in est_ax:
+        ax.cla()
+        ax.set_xlim([0.0, 1.0])
+        ax.set_ylim([0.0, 1.0])
+
+def ensure_dir(file_path):
+    directory = os.path.dirname(file_path)
+    if not os.path.exists(directory):
+        os.makedirs(directory)
\ No newline at end of file
diff --git a/predictive_entropy_search.py b/predictive_entropy_search.py
new file mode 100644
index 0000000..e69de29
diff --git a/pref_active_learning.py b/pref_active_learning.py
new file mode 100644
index 0000000..441373e
--- /dev/null
+++ b/pref_active_learning.py
@@ -0,0 +1,204 @@
+# Simple 1D GP classification example
+import time
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import GPpref
+import plot_tools as ptt
+import active_learners
+import test_data
+import yaml
+from matplotlib.backends.backend_pdf import PdfPages
+
+nowstr = time.strftime("%Y_%m_%d-%H_%M")
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
+
+save_plots = True
+plot_type = 'video_frames'  # 'pdf'
+inter_frames = 30
+
+with open('./data/shortrun_2D.yaml', 'rt') as fh:
+    wave = yaml.safe_load(fh)
+
+try:
+    np.random.seed(wave['statrun_params']['randseed'])
+    n_rel_train, n_abs_train = wave['statrun_params']['n_rel_train'], wave['statrun_params']['n_abs_train']
+    n_queries =  wave['statrun_params']['n_queries']
+except KeyError:
+    np.random.seed(0)
+    n_rel_train = 1
+    n_abs_train = 0
+    n_queries = 20
+
+n_xplot = 31
+keep_f = True
+learner_index = -2
+
+log_hyp = np.log(wave['hyperparameters'])
+
+# Define polynomial function to be modelled
+d_x = wave['GP_params']['hyper_counts'][0]-1
+random_wave = test_data.MultiWave(n_dimensions=d_x, **wave['wave_params'])
+true_function = random_wave.out
+random_wave.print_values()
+
+if save_plots:
+    nowstr = time.strftime("%Y_%m_%d-%H_%M")
+    fig_dir = 'fig/' + nowstr + '/'
+    ptt.ensure_dir(fig_dir)
+    print "Figures will be saved to: {0}".format(fig_dir)
+    if plot_type == 'pdf':
+        pdf_pages = PdfPages(fig_dir+'posterior_all.pdf')
+
+
+rel_obs_fun = GPpref.RelObservationSampler(true_function, wave['GP_params']['rel_likelihood'], wave['rel_obs_params'])
+abs_obs_fun = GPpref.AbsObservationSampler(true_function, wave['GP_params']['abs_likelihood'], wave['abs_obs_params'])
+rel_sigma = wave['rel_obs_params']['sigma']
+
+# True function
+x_plot = np.linspace(0.0,1.0,n_xplot,dtype='float')
+x_test = ptt.make_meshlist(x_plot, d_x)
+f_true = abs_obs_fun.f(x_test)
+mu_true = abs_obs_fun.mean_link(x_test)
+abs_y_samples = abs_obs_fun.l.y_list
+p_abs_y_true = abs_obs_fun.observation_likelihood_array(x_test, abs_y_samples)
+if d_x is 1:
+    p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test)
+else:
+    p_rel_y_true = None
+
+# Training data - note the shifted domain to get the sample out of the way
+far_domain = np.tile(np.array([[-3.0], [-2.0]]), d_x)
+x_rel, uvi_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(n_rel_train, n_xdim=d_x, domain=far_domain)
+x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(n_abs_train, n_xdim=d_x, domain=far_domain)
+
+
+# Plot true functions
+plot_kwargs = {'xlim':[0.0, 1.0], 'ylim':[0.0, 1.0]}
+if plot_type != 'pdf':
+    if p_rel_y_true is None:
+        ax_t = [[], []]
+        fig_t = plt.figure()
+        fig_t.set_size_inches([11.44,  8.02])
+        ax_t[0].append(fig_t.add_subplot(221, projection='3d', **plot_kwargs))
+        ax_t[0].append(fig_t.add_subplot(222, projection='3d', **plot_kwargs))
+        ax_t[1].append(fig_t.add_subplot(223, projection='3d', **plot_kwargs))
+        ax_t[1].append(fig_t.add_subplot(224, projection='3d', **plot_kwargs))
+    else:
+        fig_t, ax_t = plt.subplots(2,3)
+    true_ax = ax_t[0]
+    est_ax = ax_t[1]
+else:
+    true_ax = None
+    est_ax = None
+
+fig_t, ax_t = ptt.true_plots(x_test, f_true, mu_true, rel_sigma,
+                                                 abs_y_samples, p_abs_y_true, p_rel_y_true,
+                                                 x_abs, y_abs, x_rel[uvi_rel], fuv_rel, y_rel,
+                                                 t_l=r'True latent function, $f(x)$', ax=true_ax, **plot_kwargs)
+if save_plots and (plot_type is 'pdf'):
+    fig_t.savefig(fig_dir+'true.pdf', bbox_inches='tight')
+
+# Construct active learner object
+
+# Construct GP object
+prefGP = GPpref.PreferenceGaussianProcess(x_rel, uvi_rel, x_abs, y_rel, y_abs, **wave['GP_params'])
+
+model_kwargs = {'x_rel':x_rel, 'uvi_rel':uvi_rel, 'x_abs':x_abs, 'y_rel':y_rel, 'y_abs':y_abs,
+                'rel_kwargs': wave['rel_obs_params'], 'abs_kwargs': wave['abs_obs_params']}
+model_kwargs.update(wave['GP_params'])
+learner_kwargs = wave['learners'][learner_index]
+learner = active_learners.Learner(**learner_kwargs)
+learner.build_model(model_kwargs)
+
+# Get initial solution
+learner.model.set_hyperparameters(log_hyp)
+f = learner.model.solve_laplace()
+learner.model.print_hyperparameters()
+
+fig_p, ax_p, pre_data = learner.model.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, abs_y_samples, ax=est_ax, **plot_kwargs)
+if save_plots:
+    wave['learners'] = [learner_kwargs]
+    with open(fig_dir+'params.yaml', 'wt') as fh:
+        yaml.safe_dump(wave, fh)
+    if plot_type is 'pdf':
+        pdf_pages.savefig(fig_p, bbox_inches='tight')
+    else:
+        fig_p.savefig(fig_dir+'posterior000.png', bbox_inches='tight')
+print learner_kwargs
+
+for obs_num in range(n_queries):
+    if learner.update_p_rel:
+        linear_p_rel = max(0.0, (n_queries - obs_num) / float(n_queries))
+        learner.obs_arguments['p_rel'] = linear_p_rel
+    # if 'p_rel' in obs_arguments:
+    #     obs_arguments['p_rel'] = max(0.0, (n_queries - obs_num) / float(n_queries))
+    t0 = time.time()
+    next_x = learner.select_observation()
+    if next_x.shape[0] == 1:
+        next_y, next_f = abs_obs_fun.generate_observations(next_x)
+        learner.model.add_observations(next_x, next_y, keep_f=keep_f)
+        print '{n:02} - Abs: x:{0}, y:{1}'.format(next_x[0], next_y[0], n=obs_num+1),
+    else:
+        next_y, next_uvi, next_fx = rel_obs_fun.gaussian_multi_pairwise_sampler(next_x)
+        next_fuv = next_fx[next_uvi][:, :, 0]
+        fuv_rel = np.concatenate((fuv_rel, next_fuv), 0)
+        learner.model.add_observations(next_x, next_y, next_uvi, keep_f=keep_f)
+        print '{n:02} - Rel: x:{0}, best_index:{1}'.format(next_x.flatten(), next_uvi[0, 1], n=obs_num+1),
+    print 't = {0:0.3f}s'.format(time.time()-t0)
+    f = learner.model.solve_laplace()
+
+    if save_plots:
+        ptt.reset_axes2d(est_ax)
+        fig_p, ax_p, post_data = learner.model.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel,
+                                                                     abs_y_samples, ax=est_ax, **plot_kwargs)
+        if plot_type is 'pdf':
+            pdf_pages.savefig(fig_p, bbox_inches='tight')
+        else:
+            fig_p.savefig(fig_dir + 'posterior{0:03d}.png'.format((obs_num+1)*inter_frames), bbox_inches='tight')
+            for iframe in range(1, inter_frames):
+                scale = float(iframe)/inter_frames
+                if pre_data[-1] is None:
+                    pre_data = pre_data[:-1]
+                inter_data = [(1-scale)*pre + scale*post for pre, post in zip(pre_data, post_data)]
+                if len(inter_data) < len(post_data):
+                    inter_data.append(None)
+                fig_p, ax_p = learner.model.update_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel,
+                                                                     abs_y_samples, *inter_data, ax_p=est_ax)
+                fig_p.savefig(fig_dir + 'posterior{0:03d}.png'.format(obs_num*inter_frames + iframe), bbox_inches='tight')
+            pre_data = post_data
+        plt.close(fig_p)
+        # fig_p.savefig(fig_dir+'posterior{0:02d}.pdf'.format(obs_num+1), bbox_inches='tight')
+
+
+learner.model.print_hyperparameters()
+
+if save_plots and plot_type is 'pdf':
+    pdf_pages.close()
+else:
+    fig_post, ax_p, h_plotobj = learner.model.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, abs_y_samples, **plot_kwargs)
+
+plt.show(block=False)
+print "Finished!"
+
+## SCRAP
+# learner = active_learners.UCBAbsRelD(**active_args)
+# # obs_arguments = {'req_improvement': 0.60, 'n_test': 50, 'gamma': 2.0, 'n_rel_samples': 5, 'p_thresh': 0.7}
+# obs_arguments = {'n_test': 100, 'p_rel': 0.5, 'n_rel_samples': 5, 'gamma': 2.0}
+
+# learner = active_learners.ExpectedImprovementRel(**GP_kwargs)
+# obs_arguments = {'n_test': 100, 'zeta': 0.1, 'p_rel':1.0}
+
+# learner = active_learners.SampledClassification(verbose=verbose, **GP_kwargs)
+# obs_arguments = {'n_test':50, 'n_samples':20, 'y_threshold':0.7, 'p_pref_tol':1e-3, 'n_mc_abs':50}
+
+# learner = active_learners.DetSelect(**GP_kwargs)
+# obs_arguments = {'n_test': 100, 'n_rel_samples': 5, 'gamma': 2.0, 'tau': 0.5}
+
+# learner = active_learners.ActiveLearner(**GP_kwargs)
+# obs_arguments =  {'p_rel':0.0, 'n_rel_samples': 5}
+
+# learner = active_learners.MaxVar(**GP_kwargs)
+# obs_arguments = {'n_rel_samples': 5, 'p_rel': -10.0, 'rel_tau': 0.1, 'abs_tau': 0.1}
diff --git a/read_wine_data.py b/read_wine_data.py
new file mode 100644
index 0000000..f79d409
--- /dev/null
+++ b/read_wine_data.py
@@ -0,0 +1,76 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import yaml
+import GPpref
+import scipy.optimize as op
+from utils.wine_data import WineQualityData
+
+
+optimise_hyper = False
+use_normalised = True
+R_limit = 0.1
+wine_type = 'white'
+
+# Get wine data:
+input_data = WineQualityData(wine_type=wine_type)
+
+fh, ah = plt.subplots(1, 1)
+ah.hist(input_data.data.quality, np.arange(-0.5, 11, 1.0))
+ah.set_xticks(np.arange(11))
+ah.set_xlabel('Score')
+ah.set_ylabel('Count')
+ah.set_title('{0} wine'.format(wine_type))
+
+with open('./data/statruns_wine.yaml', 'rt') as fh:
+    config = yaml.safe_load(fh)
+d_x = config['GP_params']['hyper_counts'][0]-1
+log_hyp = np.log(config['hyperparameters'])
+
+wine_norm = (input_data.data.values - input_data.data.values.min(axis=0)) / (input_data.data.values.max(axis=0) - input_data.data.values.min(axis=0))
+
+if use_normalised:
+    wine_data = wine_norm
+else:
+    wine_data = input_data.data.values
+fh2, ah2 = plt.subplots(3, 4)
+for i, aa in enumerate(ah2.flat):
+   aa.cla()
+   aa.scatter(wine_data[:,i], wine_data[:,-1], 2, wine_data[:,0])
+   aa.set_title('{0}, {1}'.format(input_data.data.keys()[i], config['hyperparameters'][i]))
+
+fh3, ah3 = plt.subplots()
+C = np.corrcoef(wine_norm, rowvar=False)
+hmat = ah3.matshow(np.abs(C))
+
+plt.show()
+for R, var_name in zip(C[0:-1,-1], input_data.data.keys()):
+    print('{0}: R = {1:0.2f}'.format(var_name, R))
+corr_variables = (abs(C[-1]) >= R_limit)
+
+# Subsample wine data down to only correlated variables
+# wine_data = wine_data[:, corr_variables]
+
+if optimise_hyper:
+    n_rel = 1
+    n_abs = 300
+    x_rel = wine_data[0:2*n_rel, 0:-1]
+    uvi_rel = np.array([[0, 1]])
+    fuv_rel = wine_data[0:2*n_rel, -1]
+    y_rel = np.array([1])
+
+    x_abs = wine_data[2*n_rel:(2*n_rel+n_abs), 0:-1]
+    y_abs = np.expand_dims(input_data.data.values[2 * n_rel:(2 * n_rel + n_abs), -1] - 2, axis=1).astype(dtype='int')
+    model_kwargs = {'x_rel': x_rel, 'uvi_rel': uvi_rel, 'x_abs': x_abs, 'y_rel': y_rel, 'y_abs': y_abs,
+                    'rel_kwargs': config['rel_obs_params'], 'abs_kwargs': config['abs_obs_params']}
+    model_kwargs.update(config['GP_params'])
+    model_kwargs['verbose'] = 2
+    prefGP = GPpref.PreferenceGaussianProcess(**model_kwargs)
+
+    prefGP.set_hyperparameters(log_hyp)
+    f = prefGP.calc_laplace(log_hyp)
+    print np.exp(log_hyp)
+    log_hyp_opt = op.fmin(prefGP.calc_nlml,log_hyp)
+    print np.exp(log_hyp_opt)
+    config['hyperparameters'] = np.exp(log_hyp_opt).tolist()
+    with open('data/wine_quality/learned_{0}_params.yaml'.format(wine_type), 'w') as fh:
+        yaml.safe_dump(config, fh)
diff --git a/test_data.py b/test_data.py
new file mode 100644
index 0000000..205ab70
--- /dev/null
+++ b/test_data.py
@@ -0,0 +1,216 @@
+import numpy as np
+import pickle
+
+
+def wrms(y_true, y_est, weight=True):
+    # RMS weighted by the true value of y (high value high importance)
+    if weight:
+        w = y_true
+    else:
+        w = 1.0
+    return np.sqrt(np.mean(((y_true - y_est)*w)**2))
+
+
+def wrms_misclass(y_true, y_est, w_power=2):
+    # This is the misclassification error, where the weight is the max of the true or predicted value (penalise
+    # predicting high values if the true value is low)
+    w = np.power(np.maximum(y_true, y_est), w_power)
+    return np.sqrt(np.mean(((y_true - y_est)*w)**2))
+
+def rel_error(y_true, prel_true, y_est, prel_est, weight=False):
+    if weight:
+        y_max = np.maximum(y_true.flatten(), y_est.flatten())
+    else:
+        y_max = np.ones(y_true.shape[0], dtype='float')
+    nx = prel_true.shape[0]
+    mean_p_err = 0.0
+    w_sum = 0.0
+    for i in range(nx):
+        for j in range(i, nx):
+            w = max(y_max[i], y_max[j])
+            w_sum += w
+            mean_p_err += w*np.abs(prel_true[i,j] - prel_est[i,j])
+    return mean_p_err/w_sum
+
+def ordinal_kld(p_y_true, p_y_est, w = np.array([1.0])):
+    kld = -(p_y_true * np.log(p_y_est/p_y_true)).sum(axis=0)
+    wkld = w*kld/w.mean()
+    return wkld.mean()
+
+class ObsObject(object):
+    def __init__(self, x_rel, uvi_rel, x_abs, y_rel, y_abs):
+        self.x_rel, self.uvi_rel, self.x_abs, self.y_rel, self.y_abs = x_rel, uvi_rel, x_abs, y_rel, y_abs
+
+    def get_obs(self):
+        return self.x_rel, self.uvi_rel, self.x_abs, self.y_rel, self.y_abs
+
+def obs_stats(obs_array, n_rel_samples):
+    for method in obs_array:
+        n_rel = 0.0
+        n_abs = 0.0
+        for ob in method['obs']:
+            n_rel += ob.x_rel.shape[0]/n_rel_samples
+            n_abs += ob.x_abs.shape[0]
+        print "{0}: p_rel = {1}, p_abs = {2}".format(method['name'], n_rel/(n_rel+n_abs), n_abs/(n_rel+n_abs))
+
+
+class VariableWave(object):
+    def __init__(self, amp_range, f_range, off_range, damp_range, n_components=1, n_dimensions=1):
+        self.amp_range = amp_range
+        self.f_range = f_range
+        self.off_range = off_range
+        self.damp_range = damp_range
+        self.n_components = n_components
+        self.n_dimensions = n_dimensions
+        self.randomize()
+
+    def out(self, x):
+        y = self.amplitude*np.cos(self.frequency * np.pi * (x-self.offset)) * np.exp(-self.damping*(x-self.offset)**2)
+        return y
+
+    def set_values(self, a, f, o, d):
+        self.amplitude = a
+        self.frequency = f
+        self.offset = o
+        self.damping = d
+
+    def randomize(self, print_vals=False):
+        self.amplitude = np.random.uniform(low=self.amp_range[0], high=self.amp_range[1]/self.n_components)
+        self.frequency = np.random.uniform(low=self.f_range[0], high=self.f_range[1])
+        self.offset = np.random.uniform(low=self.off_range[0], high=self.off_range[1])
+        self.damping = np.random.uniform(low=self.damp_range[0], high=self.damp_range[1])
+        if print_vals:
+            self.print_values()
+
+    def print_values(self):
+        print "a: {0:.2f}, f: {1:.2f}, o: {2:.2f}, d: {3:.2f}".format(self.amplitude, self.frequency, self.offset, self.damping)
+
+    def export_state(self):
+        return dict(amplitude=self.amplitude, frequency=self.frequency, offset=self.offset, damping=self.damping)
+
+
+class MultiWave(VariableWave):
+    def out(self, x):
+        y = np.zeros(np.array(x).shape)
+        for a, f, o, d in zip(self.amplitude, self.frequency, self.offset, self.damping):
+            y += a*np.cos(f*np.pi*(x-o)) * np.exp(-d*(x-o)**2)
+        return y.sum(axis=1, keepdims=True)
+
+    def randomize(self, print_vals=False):
+        self.amplitude = np.random.uniform(low=self.amp_range[0], high=self.amp_range[1], size=(self.n_components, self.n_dimensions))
+        self.frequency = np.random.uniform(low=self.f_range[0], high=self.f_range[1], size=(self.n_components, self.n_dimensions))
+        self.offset = np.random.uniform(low=self.off_range[0], high=self.off_range[1], size=(self.n_components, self.n_dimensions))
+        self.damping = np.random.uniform(low=self.damp_range[0], high=self.damp_range[1], size=(self.n_components, self.n_dimensions))
+        if print_vals:
+            self.print_values()
+
+    def print_values(self):
+        print "a: {0}, f: {1}, o: {2}, d: {3}".format(self.amplitude, self.frequency, self.offset, self.damping)
+
+    def get_values(self):
+        return self.amplitude, self.frequency, self.offset, self.damping
+
+
+class DoubleMultiWave(object):
+    def __init__(self, amp_range, f_range, off_range, damp_range, n_components=2):
+        self.f_low = MultiWave(amp_range[0:2], f_range[0:2], off_range[0:2], damp_range[0:2], n_components=1)
+        self.f_high = MultiWave(amp_range[2:4], f_range[2:4], off_range[2:4], damp_range[2:4], n_components=n_components-1)
+        self.randomize()
+
+    def out(self, x):
+        return self.f_low.out(x)+self.f_high.out(x)
+
+    def randomize(self, print_vals=False):
+        self.f_low.randomize()
+        self.f_high.randomize()
+        if print_vals:
+            self.print_values()
+
+    def print_values(self):
+        self.f_low.print_values()
+        self.f_high.print_values()
+
+
+class WaveSaver(object):
+    def __init__(self, n_trials, n_components, n_dim=1):
+        self.amplitude = np.zeros((n_trials, n_components, n_dim), dtype='float')
+        self.frequency = np.zeros((n_trials, n_components, n_dim), dtype='float')
+        self.offset = np.zeros((n_trials, n_components, n_dim), dtype='float')
+        self.damping = np.zeros((n_trials, n_components, n_dim), dtype='float')
+        self.n = 0
+
+    def set_vals(self, n, a, f, o, d):
+        self.n = n
+        self.amplitude[n] = a
+        self.frequency[n] = f
+        self.offset[n] = o
+        self.damping[n] = d
+
+    def save(self, filename):
+        with open(filename, 'wb') as fh:
+            pickle.dump(self, fh)
+
+    def get_vals(self, n):
+        return self.amplitude[n], self.frequency[n], self.offset[n], self.damping[n]
+
+def damped_wave(x):
+    y = np.cos(6 * np.pi * (x - 0.5)) * np.exp(-10 * (x - 0.5) ** 2)
+    return y
+
+def multi_peak(x):
+    y = np.cos(6 * np.pi * (x - 0.5))
+    return y
+
+def basic_sine(x):
+    y = (np.sin(x*2*np.pi + np.pi/4))/1.2
+    return y
+
+def zero_fun(x):
+    return 0*x
+
+def data1():
+    x_rel = np.array([[0.6], [0.7]])
+    uvi_rel = np.array([[0, 1], [1, 0]], dtype='int')
+    # uv_rel = x_rel[uvi_rel][:,:,0]
+    y_rel = np.array([[1], [1]], dtype='int')
+    fuv_rel = np.array([[-0.1, 0.1], [-0.1, 0.1]])
+
+    x_abs = np.array([[0.2]])
+    y_abs = np.array([[0.5]])
+    mu_abs = np.array([[0.0]])
+
+    return x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs
+
+
+def data2():
+    x_rel = np.array([[ 0.8517731 ], [ 0.66358258],
+                      [ 0.06054717], [ 0.45331369],
+                      [ 0.8461625 ], [ 0.58854979]])
+    uvi_rel = np.array([[0, 1], [2, 3], [4, 5]], dtype='int')
+    # uv_rel = x_rel[uvi_rel][:,:,0]
+    y_rel = np.array([[-1], [1], [1]], dtype='int')
+    fuv_rel = np.array([[0.0043639, -0.10653237], [0.01463141, 0.05046293],
+                        [0.01773679, 0.45730181]])
+
+    x_abs = np.array([[0.43432351]])
+    y_abs = np.array([[0.38966307]])
+    mu_abs = np.array([[0.0]])
+
+    return x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs
+
+
+def data3():
+    x_rel = np.array([[0.8517731 ], [0.66358258],
+                      [0.06054717], [0.45331369],
+                      [0.8461625 ], [0.58854979]])
+    uvi_rel = np.array([[0, 1], [2, 3], [4, 5]], dtype='int')
+    # uv_rel = x_rel[uvi_rel][:,:,0]
+    y_rel = np.array([[-1], [1], [1]], dtype='int')
+    fuv_rel = np.array([[0.0043639, -0.10653237], [0.01463141, 0.05046293],
+                        [0.01773679, 0.45730181]])
+
+    x_abs = np.array([[0.43432351], [0.03362113]])
+    y_abs = np.array([[0.38966307], [0.999]])
+    mu_abs = np.array([[0.0], [0.0]])
+
+    return x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/data_downloader.py b/utils/data_downloader.py
new file mode 100644
index 0000000..0afadfa
--- /dev/null
+++ b/utils/data_downloader.py
@@ -0,0 +1,42 @@
+from __future__ import print_function
+import os
+import urllib3
+import zipfile
+
+class MrDataGrabber(object):  # Does not work with bananas
+
+    def __init__(self, url, target_path):
+        self.url = url
+        self.path = target_path
+        self.target_basename = os.path.basename(self.url)
+        self.target_file = os.path.join(self.path, self.target_basename)
+
+    def download(self):
+        if os.path.exists(self.target_file):
+            print('Target file already downloaded, please remove if you wish to update.')
+        else:
+            http = urllib3.PoolManager()
+            r = http.request('GET', self.url, preload_content=False)
+
+            with open(self.target_file, 'wb') as out:
+                print('Downloading {0}...'.format(self.url), end='')
+                while True:
+                    data = r.read(2**16)
+                    if not data:
+                        break
+                    out.write(data)
+            r.release_conn()
+            print(' done.')
+
+            if os.path.splitext(self.target_basename)[1].lower() == '.zip':
+                self.unzip_data()
+        return self.target_file
+
+    def unzip_data(self, target_dir=None, force=False):
+        if target_dir is None:
+            target_dir = os.path.join(self.path, os.path.splitext(self.target_basename)[0])
+        if (not force) and os.path.isdir(target_dir):
+            print('Target directory already exists, not unzipping.')
+        else:
+            zip_ref = zipfile.ZipFile(self.target_file, 'r')
+            zip_ref.extractall(target_dir)
diff --git a/utils/wine_data.py b/utils/wine_data.py
new file mode 100644
index 0000000..fae5562
--- /dev/null
+++ b/utils/wine_data.py
@@ -0,0 +1,21 @@
+import pandas
+import os
+from utils.data_downloader import MrDataGrabber
+
+
+class WineQualityData(object):
+    def __init__(self, wine_type='red', file_loc='data/wine_quality/'):
+        url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-{0}.csv'.format(
+            wine_type)
+
+        try:
+            # Create target Directory
+            os.mkdir(file_loc)
+            print("Directory ", file_loc, " Created ")
+        except OSError:
+            print("Directory ", file_loc, " already exists")
+
+        self.download_data = MrDataGrabber(url, file_loc)
+        self.download_data.download()
+        self.file = self.download_data.target_file
+        self.data = pandas.read_csv(self.file, delimiter=';')