diff --git a/GP_preference_demo.py b/GP_preference_demo.py index ee18e19..49c8933 100644 --- a/GP_preference_demo.py +++ b/GP_preference_demo.py @@ -3,68 +3,127 @@ import matplotlib.pyplot as plt import GPpref import scipy.optimize as op +import plot_tools as ptt +import test_data +import yaml +# from scipy.stats import beta +plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']}) +plt.rc('text', usetex=True) -log_hyp = np.log([0.1,1.0,0.1]) # length_scale, sigma_f, sigma_probit -np.random.seed(1) - -n_train = 20 -true_sigma = 0.05 -delta_f = 1e-5 - -# Define polynomial function to be modelled -def true_function(x): - y = np.sin(x*2*np.pi + np.pi/4) + 0.2 - return y - -# Define noisy observation function -def obs_function(x, sigma): - fx = true_function(x) - noise = np.random.normal(scale=sigma, size=x.shape) - return fx + noise - -def noisy_preference_rank(uv, sigma): - fuv = obs_function(uv,sigma) - y = -1*np.ones((fuv.shape[0],1),dtype='int') - y[fuv[:,1] > fuv[:,0]] = 1 - return y, fuv - -# Main program -# Plot true function -x_plot = np.linspace(0.0,1.0,101,dtype='float') -y_plot = true_function(x_plot) +train_hyper = False +use_test_data = False # test_data.data3 # +verbose = 2 + +with open('./data/statruns_2D.yaml', 'rt') as fh: + wave = yaml.safe_load(fh) +try: + np.random.seed(wave['statrun_params']['randseed']) +except KeyError: + np.random.seed(0) +d_x = wave['GP_params']['hyper_counts'][0]-1 +random_wave = test_data.MultiWave(n_dimensions=d_x, **wave['wave_params']) +log_hyp = np.log(wave['hyperparameters']) + +n_rel_train = 5 +n_abs_train = 5 + +n_xplot = 31 +n_posterior_samples = 3 + +random_wave.print_values() +true_function = random_wave.out + +rel_obs_fun = GPpref.RelObservationSampler(true_function, wave['GP_params']['rel_likelihood'], wave['rel_obs_params']) +abs_obs_fun = GPpref.AbsObservationSampler(true_function, wave['GP_params']['abs_likelihood'], wave['abs_obs_params']) + +# True function +# This is a complicated (but memory efficient, maybe?) way to generate list of all grid points, there's probably a +# better (itertools way) +x_plot = np.linspace(0.0,1.0,n_xplot,dtype='float') +x_test = ptt.make_meshlist(x_plot, d_x) +f_true = abs_obs_fun.f(x_test) +mu_true = abs_obs_fun.mean_link(x_test) +abs_y_samples = abs_obs_fun.l.y_list +p_abs_y_true = abs_obs_fun.observation_likelihood_array(x_test, abs_y_samples) +if d_x is 1: + p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test) # Training data - this is a bit weird, but we sample x values, then the uv pairs # are actually indexes into x, because it is easier computationally. You can # recover the actual u,v values using x[ui],x[vi] -x_train = np.random.random((2*n_train,1)) -uvi_train = np.random.choice(range(2*n_train), (n_train,2), replace=False) -uv_train = x_train[uvi_train][:,:,0] +if use_test_data: + x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs = use_test_data() +else: + x_rel, uvi_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(n_rel_train, n_xdim=d_x) + x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(n_abs_train, n_xdim=d_x) + +# Construct GP object +wave['GP_params']['verbose'] = verbose +model_kwargs = {'x_rel':x_rel, 'uvi_rel':uvi_rel, 'x_abs':x_abs, 'y_rel':y_rel, 'y_abs':y_abs, + 'rel_kwargs': wave['rel_obs_params'], 'abs_kwargs': wave['abs_obs_params']} +model_kwargs.update(wave['GP_params']) +prefGP = GPpref.PreferenceGaussianProcess(**model_kwargs) + +prefGP.set_hyperparameters(log_hyp) +# If training hyperparameters, use external optimiser +if train_hyper: + log_hyp = op.fmin(prefGP.calc_nlml,log_hyp) + +f = prefGP.calc_laplace(log_hyp) +prefGP.print_hyperparameters() -# Get noisy observations f(uv) and corresponding ranks y_train -y_train, fuv_train = noisy_preference_rank(uv_train,true_sigma) +# Latent predictions +fhat, vhat = prefGP.predict_latent(x_test) -hf,ha = plt.subplots(1,1) -ha.plot(x_plot,y_plot,'r-') -for uv,fuv,y in zip(uv_train, fuv_train, y_train): - ha.plot(uv, fuv, 'b-') - ha.plot(uv[(y+1)/2],fuv[(y+1)/2],'k+') +# Posterior likelihoods +p_abs_y_post, E_y = prefGP.abs_posterior_likelihood(abs_y_samples, fhat=fhat, varhat=vhat) -ha.set_title('Training data') -ha.set_ylabel('f(x)') -ha.set_xlabel('x') +wrms = test_data.wrms(mu_true, E_y) +wrms2 = test_data.wrms_misclass(mu_true, E_y) +if d_x is 1: + p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test) + p_rel_y_post = prefGP.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) +else: + p_rel_y_true, p_rel_y_post = None, None -# GP hyperparameters -# note: using log scaling to aid learning hyperparameters with varied magnitudes +if d_x <= 2: + # Plot true functions + fig_t, ax_t = ptt.true_plots(x_test, f_true, mu_true, wave['rel_obs_params']['sigma'], + abs_y_samples, p_abs_y_true, p_rel_y_true, + t_l=r'True latent function, $f(x)$') -prefGP = GPpref.PreferenceGaussianProcess(x_train, uvi_train, y_train, delta_f=delta_f) + # Posterior estimates + fig_p, ax_p, h_plotobj = \ + ptt.estimate_plots(x_test, f_true, mu_true, fhat, vhat, E_y, wave['rel_obs_params']['sigma'], + abs_y_samples, p_abs_y_post, p_rel_y_post, + x_abs, y_abs, x_rel[uvi_rel], fuv_rel, y_rel, n_posterior_samples=n_posterior_samples, + posterior_plot_kwargs={'color':'grey', 'ls':'--'}, + t_l=r'$\mathcal{GP}$ latent function estimate $\hat{f}(x)$', + t_a=r'Posterior absolute likelihood, $p(u | \mathcal{Y}, \theta)$', + t_r=r'Posterior relative likelihood $P(x_0 \succ x_1 | \mathcal{Y}, \theta)$') + if d_x == 1: + p_err = test_data.rel_error(mu_true, p_rel_y_true, E_y, p_rel_y_post, weight=False) + print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}, p_err: {2:0.3f}".format(wrms, wrms2, p_err) + else: + print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}".format(wrms, wrms2) + plt.show(block=False) -# Pseudocode: -# FOr a set of hyperparameters, return log likelihood that can be used by an optimiser -theta0 = log_hyp +else: + print "Input state space dimension: {0} is too high to plot".format(d_x) + print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}".format(wrms, wrms2) -# log_hyp = op.fmin(prefGP.calc_nlml,theta0) -f,lml = prefGP.calc_laplace(log_hyp) -ha.plot(x_train, f, 'g^') -plt.show() +## SCRAP +# p_y = np.zeros((n_ysamples, n_xplot)) +# y_samples = np.linspace(0.01, 0.99, n_ysamples) +# iny = 1.0/n_ysamples +# E_y2 = np.zeros(n_xplot) +# +# normal_samples = np.random.normal(size=n_mcsamples) +# for i,(fstar,vstar) in enumerate(zip(fhat, vhat.diagonal())): +# f_samples = normal_samples*vstar+fstar +# aa, bb = prefGP.abs_likelihood.get_alpha_beta(f_samples) +# p_y[:, i] = [iny*np.sum(beta.pdf(yj, aa, bb)) for yj in y_samples] +# p_y[:, i] /= np.sum(p_y[:, i]) +# E_y2[i] = np.sum(np.dot(y_samples, p_y[:, i])) \ No newline at end of file diff --git a/GP_regression_demo.py b/GP_regression_demo.py index 4580219..fba2edf 100644 --- a/GP_regression_demo.py +++ b/GP_regression_demo.py @@ -3,52 +3,87 @@ import scipy.optimize as op import matplotlib.pyplot as plt import GPr -np.random.seed(0) +import plot_tools as ptt +plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']}) +plt.rc('text', usetex=True) + +f_sigma = 0.15 +n_samples = 10 +r_seed = 1 +n_posterior_samples = 3 +optimise_hp = False # Define polynomial function to be modelled def true_function(x): - c = np.array([3,5,-9,-3,2],float) - y = np.polyval(c,x) + # c = np.array([3,5,-9,-3,2],float) + # y = np.polyval(c,x) + y = np.sin(x*2*np.pi) return y + # Define noisy observation function -def obs_function(x): - y = true_function(x) + np.random.normal(0,0.1,len(x)) +def obs_function(x, sigma): + y = true_function(x) + np.random.normal(0, sigma, size=x.shape) return y + # Main program # Plot true function -x_plot = np.arange(0,1,0.01,float) -y_plot = true_function(x_plot) +x_plot = np.atleast_2d(np.arange(-0.5, 1.5, 0.01, dtype='float')).T +fx_plot = true_function(x_plot) plt.figure() -plt.plot(x_plot,y_plot,'k-') +plt.plot(x_plot, fx_plot, 'k-') # Training data -x_train = np.random.random(20) -y_train = obs_function(x_train) -plt.plot(x_train,y_train,'rx') +np.random.seed(r_seed) +x_train = np.atleast_2d(0.6*np.random.random(n_samples)+0.2).T +# x_train = np.array([-10.0]) +y_train = obs_function(x_train, f_sigma) +plt.plot(x_train, y_train, 'rx') # Test data -x_test = np.arange(0,1,0.01,float) +x_test = x_plot # GP hyperparameters # note: using log scaling to aid learning hyperparameters with varied magnitudes -log_hyp = np.log([1,1,0.1]) +log_hyp = np.log([0.3, 1.0, 0.2]) mean_hyp = 0 like_hyp = 0 # Initialise GP for hyperparameter training -initGP = GPr.GaussianProcess(log_hyp,mean_hyp,like_hyp,"SE","zero","zero",x_train,y_train) +initGP = GPr.GaussianProcess(log_hyp, mean_hyp, like_hyp, "SE", "zero", "zero", x_train, y_train) # Run optimisation routine to learn hyperparameters -opt_log_hyp = op.fmin(initGP.compute_likelihood,log_hyp) +if optimise_hp: + opt_log_hyp = op.fmin(initGP.compute_likelihood, log_hyp) +else: + opt_log_hyp = log_hyp -# Learnt GP with optimised hyperparameters +# Learned GP with optimised hyperparameters optGP = GPr.GaussianProcess(opt_log_hyp,mean_hyp,like_hyp,"SE","zero","zero",x_train,y_train) -y_test,cov_y = optGP.compute_prediction(x_test) +fhat_test, fhat_var = optGP.compute_prediction(x_test) + +h_fig,h_ax = plt.subplots() +h_fx, patch_fx = ptt.plot_with_bounds(h_ax, x_plot, fx_plot, f_sigma, c=ptt.lines[0]) +h_y, = h_ax.plot(x_train, y_train, 'rx', mew=1.0, ms=8) +h_fhat, patch_fhat = ptt.plot_with_bounds(h_ax, x_plot, fhat_test, np.sqrt(fhat_var.diagonal())[np.newaxis].T, c=ptt.lines[1], ls='--') +f_post = np.random.multivariate_normal(fhat_test.flatten(), fhat_var, n_posterior_samples) +h_pp = h_ax.plot(x_plot, f_post.T, '--', color='grey', lw=0.8) +h_ax.autoscale() + +# h_pp = h_ax.plot(x_plot, y_post.T, c='grey', ls='--', lw=0.8) + +# h_ax.set_ylim([-3, 3]) +gp_str = '$\hat{{f}}(x)|\mathcal{{Y}} \sim \mathcal{{GP}}(\mathbf{{0}}, K_{{SE}}:l={0:0.2f}, \sigma_f={1:0.2f}, \sigma_n={2:0.2f})$' +gp_l, gp_sigf, gp_sign = optGP.covFun.hyp +gp_str = gp_str.format(gp_l, gp_sigf, gp_sign) +h_ax.legend((h_fx, h_y, h_fhat),('$f(x)$', '$y \sim \mathcal{{N}}(f(x), {0:0.1f}^2)$'.format(f_sigma), gp_str), loc='best') +h_ax.set_xlabel('$x$') +#h_fig.savefig('fig/regression_example.pdf', bbox_inches='tight', transparent='true') + # Plot true and modelled functions -plt.plot(x_test,y_test,'b-') -plt.plot(x_test,y_test+np.sqrt(cov_y),'g--') -plt.plot(x_test,y_test-np.sqrt(cov_y),'g--') -plt.show() +# plt.plot(x_test,y_test,'b-') +# plt.plot(x_test,y_test+np.sqrt(cov_y),'g--') +# plt.plot(x_test,y_test-np.sqrt(cov_y),'g--') +plt.show(block=False) diff --git a/GPpref.py b/GPpref.py index f19ccd2..2c873a8 100644 --- a/GPpref.py +++ b/GPpref.py @@ -1,13 +1,34 @@ # Simple 1D GP classification example import numpy as np import GPy -from scipy.special import ndtr as std_norm_cdf +#from scipy.special import ndtr as std_norm_cdf, digamma, polygamma +from scipy.special import digamma, polygamma +from scipy.stats import norm, beta +from scipy.linalg import block_diag +import sys -#define a standard normal pdf +class LaplaceException(Exception): + pass + +# Define a standard normal pdf _sqrt_2pi = np.sqrt(2*np.pi) def std_norm_pdf(x): x = np.clip(x,-1e150,1e150) - return np.exp(-(x**2)/2)/_sqrt_2pi + return norm.pdf(x) + # return np.exp(-(x**2)/2)/_sqrt_2pi + + +def std_norm_cdf(x): + x = np.clip(x, -30, 100 ) + return norm.cdf(x) + + +def norm_pdf_norm_cdf_ratio(z): + # Inverse Mills ratio for stability + out = -z + out[z>-30] = std_norm_pdf(z[z>-30])/std_norm_cdf(z[z>-30]) + return out + # Define squared distance calculation function def squared_distance(A,B): @@ -21,6 +42,15 @@ def squared_distance(A,B): sqDist = A2 + B2 - AB return sqDist + +def print_hyperparameters(theta, log=False): + if log is True: + theta = np.exp(theta) + nl = len(theta)-3 + lstr = ', '.join(['%.2f']*nl) % tuple(theta[:nl]) + print "l: {0}, sig_f: {1:0.2f}, sig: {2:0.2f}, v: {3:0.2f}".format(lstr, theta[-3], theta[-2], theta[-1]) + + # Define squared exponential CovarianceFunction function class SquaredExponential(object): def __init__(self, logHyp, x): @@ -44,17 +74,30 @@ def compute_Kxz_matrix(self, z): return Kxz class PrefProbit(object): + type = 'preference' + y_type = 'discrete' + y_list = np.array([[-1], [1]], dtype='int') + def __init__(self, sigma=1.0): - self.set_sigma(sigma) + self.set_hyper([sigma]) self.log2pi = np.log(2.0*np.pi) + def set_hyper(self, hyper): + self.set_sigma(hyper[0]) + def set_sigma(self, sigma): self.sigma = sigma self._isqrt2sig = 1.0 / (self.sigma * np.sqrt(2.0)) self._i2var = self._isqrt2sig**2 + + def print_hyperparameters(self): + print "Probit relative, Gaussian noise on latent. ", + print "Sigma: {0:0.2f}".format(self.sigma) - def z_k(self, uvi, f, y): - zc = self._isqrt2sig * (f[uvi[:, 1]] - f[uvi[:, 0]]) + def z_k(self, y, f, scale=None): + if scale is None: + scale = self._isqrt2sig + zc = scale * (f[:, 1, None] - f[:, 0, None]) # Weird None is to preserve shape return y * zc def I_k(self, x, uv): # Jensen and Nielsen @@ -67,17 +110,21 @@ def I_k(self, x, uv): # Jensen and Nielsen def derivatives(self, uvi, y, f): nx = len(f) - z = self.z_k(uvi, f, y) - phi_z = std_norm_cdf(z) - N_z = std_norm_pdf(z) + z = self.z_k(y=y, f=self.get_rel_f(f, uvi)) + N_over_phi = norm_pdf_norm_cdf_ratio(z) + # phi_z = std_norm_cdf(z) + # N_z = std_norm_pdf(z) # First derivative (Jensen and Nielsen) dpy_df = np.zeros((nx, 1), dtype='float') - dpyuv_df = y * self._isqrt2sig * N_z / phi_z - dpy_df[uvi[:, 0]] += -dpyuv_df # This implements I_k (note switch because Jensen paper has funky backwards z_k) - dpy_df[uvi[:, 1]] += dpyuv_df + dpyuv_df = y * self._isqrt2sig * N_over_phi # N_z / phi_z + for i, (uvii, uvij) in enumerate(uvi): + dpy_df[uvii] += -dpyuv_df[i] ## ^^FIXED BIT - THANE LOOK HERE^^ + dpy_df[uvij] += dpyuv_df[i] # This implements I_k (note switch because Jensen paper has funky backwards z_k) + # dpy_df[uvi[:, 0]] += -dpyuv_df # NOTE: THESE TWO LINES ARE INCORRECT FOR REPEATED INDEXES!!!!! + # dpy_df[uvi[:, 1]] += dpyuv_df # This implements I_k (note switch because Jensen paper has funky backwards z_k) - inner = -self._i2var * (z * N_z / phi_z + (N_z / phi_z) ** 2) + inner = -self._i2var * (z * N_over_phi + (N_over_phi) ** 2) W = np.zeros((nx, nx), dtype='float') for uvik, ddpy_df in zip(uvi, inner): xi, yi = uvik @@ -85,63 +132,417 @@ def derivatives(self, uvi, y, f): W[yi, yi] -= ddpy_df # If x_i = x_i = v_k then I(x_i)*I(y_i) = -1*-1 = 1 W[xi, yi] -= -ddpy_df # Otherwise, I(x_i)*I(y_i) = -1*1 = -1 W[yi, xi] -= -ddpy_df - return W, dpy_df - def log_marginal(self, uvi, y, f, iK, logdetK): - z = self.z_k(uvi, f, y) + py = np.log(std_norm_cdf(z)) + return W, dpy_df, py + + def get_rel_f(self, f, uvi): + return np.hstack((f[uvi[:, 0]], f[uvi[:, 1]])) + + def likelihood(self, y, f, scale=None): + z = self.z_k(y, f, scale=scale) phi_z = std_norm_cdf(z) - psi = np.sum(np.log(phi_z)) - 0.5 * np.matmul(np.matmul(f.T, iK), f) - 0.5*logdetK - iK.shape[0]/2.0*self.log2pi - return psi.flat[0] + return phi_z + + def log_likelihood(self, y, f): + return np.log(self.likelihood(y, f)) + + def posterior_likelihood(self, fhat, varhat, uvi, y=1): # This is the likelihood assuming a Gaussian over f + var_star = 2*self.sigma**2 + np.atleast_2d([varhat[u, u] + varhat[v, v] - varhat[u, v] - varhat[v, u] for u,v in uvi]).T + p_y = self.likelihood(y, self.get_rel_f(fhat, uvi), 1.0/np.sqrt(var_star)) + return p_y + + def generate_samples(self, f): + fuv = f + np.random.normal(scale=self.sigma, size=f.shape) + y = -1 * np.ones((fuv.shape[0], 1), dtype='int') + y[fuv[:, 1] > fuv[:, 0]] = 1 + return y, fuv + + +class OrdinalProbit(object): + type = 'categorical' + y_type = 'discrete' + + def __init__(self, sigma=1.0, b=1.0, n_ordinals=5, eps=1.0e-10): + self.n_ordinals=n_ordinals + self.set_hyper([sigma, b]) + self.eps = eps + self.y_list = np.atleast_2d(np.arange(1, self.n_ordinals+1, 1, dtype='int')).T + + def set_hyper(self, hyper): + self.set_sigma(hyper[0]) + self.set_b(hyper[1]) + + def set_b(self, b): + if not hasattr(b, "__len__"): + b = abs(b) + self.b = np.hstack(([-np.Inf],np.linspace(-b, b, self.n_ordinals-1), [np.Inf])) + elif len(b) == self.n_ordinals+1: + self.b = b + elif len(b) == self.n_ordinals-1: + self.b = np.hstack(([-np.Inf], b, [np.Inf])) + else: + raise ValueError('Specified b should be a scalar or vector of breakpoints') + + def set_sigma(self, sigma): + self.sigma = sigma + self._isigma = 1.0/self.sigma + self._ivar = self._isigma**2 + + def print_hyperparameters(self): + print "Ordinal probit, {0} ordered categories.".format(self.n_ordinals), + print "Sigma: {0:0.2f}, b: ".format(self.sigma), + print self.b + + def z_k(self, y, f): + return self._isigma*(self.b[y] - f) + + def norm_pdf(self, y, f): + f = f*np.ones(y.shape, dtype='float') # This ensures the number of f values matches the number of y + out = np.zeros(y.shape, dtype='float') + for i in range(out.shape[0]): + if y[i] != 0 and y[i] != self.n_ordinals: # b0 = -Inf -> N(-Inf) = 0 + z = self._isigma*(self.b[y[i]] - f[i]) + out[i] = std_norm_pdf(z) + return out + + def norm_cdf(self, y, f, var_x=0.0): + ivar = self._isigma + var_x + f = f*np.ones(y.shape, dtype='float') + out = np.zeros(y.shape, dtype='float') + for i in range(out.shape[0]): + if y[i][0] == self.n_ordinals: + out[i] = 1.0 + elif y[i][0] != 0: + z = ivar*(self.b[y[i]] - f[i]) + out[i] = std_norm_cdf(z) + return out + + def z_pdf(self, y, f): + f = f*np.ones(y.shape, dtype='float') + out = np.zeros(y.shape, dtype='float') + for i in range(out.shape[0]): + if y[i] != 0 and y[i] != self.n_ordinals: # b0 = -Inf -> N(-Inf) = 0 + z = self._isigma*(self.b[y[i]] - f[i]) + out[i] = z*std_norm_pdf(z) + return out + + + def likelihood(self, y, f): + return self.norm_cdf(y, f) - self.norm_cdf(y-1, f) + + def log_likelihood(self, y, f): + return np.log(self.likelihood(y, f)) + + def derivatives(self, y, f): + + l = self.likelihood(y, f) + py = np.log(l) + + # First derivative - Chu and Gharamani + # Having issues with derivative (likelihood denominator drops to 0) + dpy_df = np.zeros(l.shape, dtype='float') + d2py_df2 = np.zeros(l.shape, dtype='float') + for i in range(l.shape[0]): + if l[i] < self.eps: + # l2 = self.likelihood(y[i], f[i]+self.delta_f) + # l0 = self.likelihood(y[i], f[i]-self.delta_f) + # dpy_df[i] = -(l2-l[i])/self.delta_f/l[i] # (ln(f))' = f'/f + # d2py_df2[i] = (l2 - 2*l[i] + l0)/self.delta_f**2/dpy_df[i]/l[i] + + if y[i] == 1: + dpy_df[i] = self._isigma*self.z_k(y[i], f[i]) + d2py_df2[i] = -self._ivar + elif y[i] == self.n_ordinals: + dpy_df[i] = self._isigma*self.z_k(y[i]-1, f[i]) + d2py_df2[i] = -self._ivar + else: + z1 = self.z_k(y[i], f[i]) + z2 = self.z_k(y[i]-1, f[i]) + ep = np.exp(-0.5*(z1**2 - z2**2)) + dpy_df[i] = self._isigma*(z1*ep-z2)/(ep - 1.0) + d2py_df2[i] = -(self._ivar*(1.0 - (z1**2 *ep - z2**2)/(ep - 1.0)) + dpy_df[i]**2) + else: + dpy_df[i] = -self._isigma*(self.norm_pdf(y[i], f[i]) - self.norm_pdf(y[i]-1, f[i])) / l[i] + d2py_df2[i] = -(dpy_df[i]**2 + self._ivar*(self.z_pdf(y[i], f[i]) - self.z_pdf(y[i]-1, f[i])) / l[i]) + + W = np.diagflat(-d2py_df2) + return W, dpy_df, py + + def posterior_likelihood(self, fhat, var_star, y=None): + if y is None: + y = self.y_list + py = np.zeros((len(y), len(fhat)), dtype='float') + mu = np.zeros(fhat.shape, dtype='float') + for i, (f, v) in enumerate(zip(fhat, var_star.diagonal())): + py[:,[i]] = self.norm_cdf(y, f, v) - self.norm_cdf(y-1, f, v) + mu[i] = (py[:,[i]]*y).sum() + if len(y) != self.n_ordinals or any(y != self.y_list): + print "Specified y vector is not full ordinal set." + mu = self.posterior_mean(fhat, var_star) + return py, mu + + def posterior_mean(self, fhat, var_star): + py, mu = self.posterior_likelihood(fhat, var_star) + return mu + + def mean_link(self, f): + mu = np.zeros(f.shape, dtype='float') + for i in range(f.shape[0]): + py = self.likelihood(self.y_list, f[i]) + mu[i] = (py*self.y_list).sum() # Expected value, first moment + return mu + + def generate_samples(self, f): + z = np.zeros(f.shape, dtype='int') + mu = np.zeros(f.shape, dtype='float') + for i in range(f.shape[0]): + py = self.likelihood(self.y_list, f[i]) + mu[i] = (py*self.y_list).sum() # Expected value, first moment + z[i] = np.sum(np.random.uniform() > py.cumsum())+1 + return z, mu + + +class AbsBoundProbit(object): + type = 'bounded continuous' + y_type = 'bounded' + y_list = np.atleast_2d(np.linspace(0.01, 0.99, 101)).T + + def __init__(self, sigma=1.0, v=10.0): + # v is the precision, kind of related to inverse of noise, high v is sharp distributions + # sigma is the slope of the probit, basically scales how far away from + # 0 the latent has to be to to move away from 0.5 output. Sigma should + # basically relate to the range of the latent function + self.set_hyper([sigma, v]) + self.log2pi = np.log(2.0*np.pi) + + def set_hyper(self, hyper): + self.set_sigma(hyper[0]) + self.set_v(hyper[1]) + + def set_sigma(self, sigma): + self.sigma = sigma + self._isqrt2sig = 1.0 / (self.sigma * np.sqrt(2.0)) + self._i2var = self._isqrt2sig**2 + + def set_v(self, v): + self.v = v + + def print_hyperparameters(self): + print "Beta distribution, probit mean link.", + print "Sigma: {0:0.2f}, v: {1:0.2f}".format(self.sigma, self.v) + + def mean_link(self, f): + ml = np.clip(std_norm_cdf(f*self._isqrt2sig), 1e-12, 1.0-1e-12) + return ml + + # def alpha(self, f): + # return self.v * self.mean_link(f) + # + # def beta(self, f): + # return self.v * (1-self.mean_link(f)) + + def get_alpha_beta(self, f): + ml = self.mean_link(f) + aa = self.v * ml + bb = self.v - aa # = self.v * (1-ml) + return aa, bb + + def likelihood(self, y, f): + aa, bb = self.get_alpha_beta(f) + return beta.pdf(y, aa, bb) + + def log_likelihood(self, y, f): + return np.log(self.likelihood(y,f)) + + def cdf(self, y, f): + aa, bb = self.get_alpha_beta(f) + return beta.cdf(y, aa, bb) + + def derivatives(self, y, f): + + aa, bb = self.get_alpha_beta(f) + + # Trouble with derivatives... + dpy_df = self.v*self._isqrt2sig*std_norm_pdf(f*self._isqrt2sig) * (np.log(y) - np.log(1-y) - digamma(aa) + digamma(bb)) + + Wdiag = - self.v*self._isqrt2sig*std_norm_pdf(f*self._isqrt2sig) * ( + f * self._i2var * (np.log(y) - np.log(1.0-y) - digamma(aa) + digamma(bb)) + + self.v * self._isqrt2sig * std_norm_pdf(f*self._isqrt2sig) * (polygamma(1, aa) + polygamma(1, bb)) ) + + W = np.diagflat(Wdiag) + + py = np.log(beta.pdf(y, aa, bb)) + + return -W, dpy_df, py + + def posterior_likelihood(self, fhat, varhat, y=None, normal_samples=None): # This is MC sampled due to no closed_form solution + if normal_samples is None: + normal_samples = np.random.normal(size=1000) + iny = 1.0/len(normal_samples) + + if y is None: + y = self.y_list + + # Sampling from posterior to show likelihoods + p_y = np.zeros((len(y), fhat.shape[0])) + for i, (fstar, vstar) in enumerate(zip(fhat, varhat.diagonal())): + f_samples = normal_samples * vstar + fstar + p_y[:, i] = [iny * np.sum(self.likelihood(yj, f_samples)) for yj in y] + return p_y, self.posterior_mean(fhat, varhat) + + def posterior_mean(self, fhat, var_star): + #mu_t = self.mean_link(fhat) + vv = np.atleast_2d(var_star.diagonal()).T + E_x = np.clip(std_norm_cdf(fhat/(np.sqrt(2*self.sigma**2 + vv))), 1e-12, 1.0-1e-12) + return E_x + + def generate_samples(self, f): + mu = self.mean_link(f) + a, b = self.get_alpha_beta(f) + z = np.zeros(f.shape, dtype='float') + for i, (aa,bb) in enumerate(zip(a,b)): + z[i] = beta.rvs(aa, bb) + return z, mu + class PreferenceGaussianProcess(object): - def __init__(self, x_train, uvi_train, y_train, likelihood=PrefProbit, delta_f = 1e-6): - # log_hyp are log of hyperparameters, note that it is [length_0, ..., length_d, sigma_f, sigma_probit] - self._xdim = x_train.shape[1] - self._nx = x_train.shape[0] - self.x_train = x_train - self.y_train = y_train - self.uvi_train = uvi_train + def __init__(self, x_rel, uvi_rel, x_abs, y_rel, y_abs, rel_likelihood='PrefProbit', delta_f=1.0e-5, + abs_likelihood='AbsBoundProbit', verbose=False, hyper_counts=[2, 1, 2], rel_kwargs={}, abs_kwargs={}): + # hyperparameters are split by hyper_counts, where hyper_counts should contain three integers > 0, the first is + # the number of hypers in the GP covariance, second is the number in the relative likelihood, last is the number + # in the absolute likelihood. hyper_counts.sum() should be equal to len(log_hyp) + # log_hyp are log of hyperparameters, note that it is [length_0, ..., length_d, sigma_f, sigma_probit, v_beta] + + # Training points are split into relative and absolute for calculating f, but combined for predictions. + self.set_observations(x_rel, uvi_rel, x_abs, y_rel, y_abs) + self.delta_f = delta_f + self.rel_likelihood = getattr(sys.modules[__name__], rel_likelihood)(**rel_kwargs) # This calls the constructor from string + self.abs_likelihood = getattr(sys.modules[__name__], abs_likelihood)(**abs_kwargs) + + self.verbose = verbose + + if self._xdim is not None: + self.kern = GPy.kern.RBF(self._xdim, ARD=True) + + self.Ix = np.eye(self._nx) - self.likelihood = likelihood() + self.f = None - self.kern = GPy.kern.RBF(self._xdim, ARD=True) + self.hyper_counts = np.array(hyper_counts) + self.init_extras() + def init_extras(self): + pass - def calc_laplace(self, loghyp, f=None): + def set_observations(self, x_rel, uvi_rel, x_abs, y_rel, y_abs): + if x_rel.shape[0] is not 0: + self._xdim = x_rel.shape[1] + elif x_abs.shape[0] is not 0: + self._xdim = x_abs.shape[1] + else: + self._xdim = None + # raise Exception("No Input Points") + self._n_rel = x_rel.shape[0] + self._n_abs = x_abs.shape[0] + self.x_rel = x_rel + self.y_rel = y_rel + self.x_abs = x_abs + self.y_abs = y_abs + self.uvi_rel = uvi_rel + + + self.x_train_all = np.concatenate((self.x_rel, self.x_abs), 0) + + self._nx = self.x_train_all.shape[0] + self.Ix = np.eye(self._nx) + + def add_observations(self, x, y, uvi=None, keep_f=False): + # keep_f is used to reset the Laplace solution. If it's a small update, it's sometimes better to keep the old + # values and append some 0's for the new observations (keep_f = True), otherwise it is reset (keep_f = False) + # Default is keep_f = False + if self._xdim is None: + self._xdim = x.shape[1] + self.kern = GPy.kern.RBF(self._xdim, ARD=True) + if uvi is None: # Absolute observation/s + if keep_f: + self.f = np.vstack((self.f, np.zeros((x.shape[0], 1)))) + x_abs = np.concatenate((self.x_abs, np.atleast_2d(x)), 0) + y_abs = np.concatenate((self.y_abs, np.atleast_2d(y)), 0) + self.set_observations(self.x_rel, self.uvi_rel, x_abs, self.y_rel, y_abs) + else: # Relative observation/s + if keep_f: # The rel observations are stored at the front of f[0:_n_rel] + self.f = np.vstack((self.f[0:self._n_rel], np.zeros((x.shape[0], 1)), self.f[self._n_rel:])) + x_rel = np.concatenate((self.x_rel, np.atleast_2d(x)), 0) + y_rel = np.concatenate((self.y_rel, np.atleast_2d(y)), 0) + uvi_rel = np.concatenate((self.uvi_rel, uvi + self.x_rel.shape[0]), 0) + self.set_observations(x_rel, uvi_rel, self.x_abs, y_rel, self.y_abs) + + def set_hyperparameters(self, loghyp): + assert sum(self.hyper_counts) == len(loghyp), "Sum of hyper_counts must match length of log_hyp" + assert self.hyper_counts[0] == self._xdim+1, "Currently only supporting sq exp covariance, hyper_counts[0] must be x_dim + 1" self.kern.lengthscale = np.exp(loghyp[0:self._xdim]) - self.kern.variance = (np.exp(loghyp[self._xdim]))**2 - self.likelihood.set_sigma = np.exp(loghyp[-1]) + self.kern.variance = 1.0 # (np.exp(loghyp[self._xdim]))**2 + dex = self.hyper_counts.cumsum() + self.rel_likelihood.set_hyper(np.exp(loghyp[dex[0]:dex[1]])) + self.abs_likelihood.set_hyper(np.exp(loghyp[dex[1]:dex[2]])) + # self.rel_likelihood.set_sigma(np.exp(loghyp[-3])) # Do we need different sigmas for each likelihood? Yes! + # self.abs_likelihood.set_sigma(np.exp(loghyp[-2])) # I think this sigma relates to sigma_f in the covariance, and is actually possibly redundant + # self.abs_likelihood.set_v(np.exp(loghyp[-1])) # Should this relate to the rel_likelihood probit noise? - if f is None: - f = np.zeros((self._nx, 1)) + def calc_laplace(self, loghyp=None): + if loghyp is not None: + self.set_hyperparameters(loghyp) + + if self.f is None or self.f.shape[0] is not self._nx or np.isnan(self.f).any(): + f = np.zeros((self._nx, 1), dtype='float') + else: + f = self.f # With current hyperparameters: - Ix = np.eye(self._nx) - K = self.kern.K(self.x_train) - eps = 1e-6 - inv_ok = False + self.Kxx = self.kern.K(self.x_train_all) - while not inv_ok: - try: - L = np.linalg.cholesky(K + eps*Ix) - iK = np.linalg.solve(L.T, np.linalg.solve(L, Ix)) - # detK = (np.product(L.diagonal()))**2 - logdetK = np.sum(np.log(L.diagonal())) - inv_ok = True - except np.linalg.linalg.LinAlgError: - eps = eps*10 - print "Inversion issue, adding noise: {0}".format(eps) + self.iK, self.logdetK = self._safe_invert_noise(self.Kxx) # First, solve for \hat{f} and W (mode finding Laplace approximation, Newton-Raphson) f_error = self.delta_f + 1 + nloops = 0 while f_error > self.delta_f: - W, dpy_df = self.likelihood.derivatives(self.uvi_train, self.y_train, f) - g = (iK + W) - f_new = np.matmul(np.linalg.inv(g), np.matmul(W, f) + dpy_df) - lml = self.likelihood.log_marginal(self.uvi_train, self.y_train, f_new, iK, logdetK) + # Is splitting these apart correct? Will there be off-diagonal elements of W_abs that should be + # non-zero because of the relative training points? + f_rel = f[0:self._n_rel] + f_abs = f[self._n_rel:] + + # Get relative Hessian and Gradient + if self._n_rel>0 and self._n_abs>0: + W_rel, dpy_df_rel, py_rel = self.rel_likelihood.derivatives(self.uvi_rel, self.y_rel, f_rel) + # Get Absolute Hessian and Gradient + # Note that y_abs has to be in [0,1] + W_abs, dpy_df_abs, py_abs = self.abs_likelihood.derivatives(self.y_abs, f_abs) + + # Combine W, gradient + py = py_abs.sum() + py_rel.sum() + dpy_df = np.concatenate((dpy_df_rel, dpy_df_abs), axis=0) + W = block_diag(W_rel, W_abs) + + elif self._n_rel>0: + W, dpy_df, py = self.rel_likelihood.derivatives(self.uvi_rel, self.y_rel, f_rel) + + elif self._n_abs>0: + W, dpy_df, py = self.abs_likelihood.derivatives(self.y_abs, f_abs) + + # # print "Total" + # print "Dpy, W:" + # print dpy_df + # print W + lambda_eye = 0.0*np.eye(self.iK.shape[0]) + + g = (self.iK + W - lambda_eye) + f_new = np.matmul(np.linalg.inv(g), np.matmul(W-lambda_eye, f) + dpy_df) + #lml = self.rel_likelihood.log_marginal(self.uvi_rel, self.y_rel, f_new, iK, logdetK) ## Jensen version (iK + W)^-1 = K - K((I + WK)^-1)WK (not sure how to get f'K^-1f though... # ig = K - np.matmul(np.matmul(np.matmul(K, np.linalg.inv(Ix + np.matmul(W, K))), W), K) @@ -151,11 +552,197 @@ def calc_laplace(self, loghyp, f=None): df = np.abs((f_new - f)) f_error = np.max(df) - print f_error,lml + # print "F Error: " + str(f_error) #,lml + # print "F New: " + str(f_new) f = f_new + nloops += 1 + if nloops > 10000: + raise LaplaceException("Maximum loops exceeded in calc_laplace!!") + if self.verbose > 1: + lml = py - 0.5*np.matmul(f.T, np.matmul(self.iK, f)) - 0.5*np.log(np.linalg.det(np.matmul(W, self.Kxx) + self.Ix)) + print "Laplace iteration {0:02d}, log p(y|f) = {1:0.2f}".format(nloops, lml[0,0]) + + self.W = W + self.f = f + self.iKf = np.matmul(self.iK, self.f) + self.KWI = np.matmul(self.W, self.Kxx) + self.Ix + + return f#, lml + + def _safe_invert_noise(self, mat, start_noise=1.0e-6): + eps = start_noise + inv_ok = False + + while not inv_ok: + try: + L = np.linalg.cholesky(mat + eps*self.Ix) + imat = np.linalg.solve(L.T, np.linalg.solve(L, self.Ix)) + # detK = (np.product(L.diagonal()))**2 + logdet = np.sum(np.log(L.diagonal())) + inv_ok = True + except np.linalg.linalg.LinAlgError: + eps = max(1e-6, eps*10.0) + print "Inversion issue, adding noise: {0}".format(eps) + return imat, logdet + + def calc_nlml(self, loghyp, f=None): + if f is None: + f = self.calc_laplace(loghyp) + # Now calculate the log likelihoods (remember log(ax) = log a + log x) + log_py_f_rel = self.rel_likelihood.log_likelihood(self.y_rel, self.rel_likelihood.get_rel_f(f, self.uvi_rel)) + log_py_f_abs = self.abs_likelihood.log_likelihood(self.y_abs, f[self._n_rel:]) #TODO: I would prefer to use the indexing in absolute ratings too for consistency + fiKf = np.matmul(f.T, self.iKf) + lml = log_py_f_rel.sum()+log_py_f_abs.sum() - 0.5*fiKf - 0.5*np.log(np.linalg.det(self.KWI)) + return -lml + + def predict_latent(self, x): + assert hasattr(self, 'iKf') + kt = self.kern.K(self.x_train_all, x) + mean_latent = np.matmul(kt.T, self.iKf) + Ktt = self.kern.K(x) + try: + iKW = np.linalg.inv(self.KWI) + except np.linalg.linalg.LinAlgError: + raise + var_latent = Ktt - np.matmul(kt.T, np.matmul(iKW, np.matmul(self.W, kt))) + return mean_latent, var_latent + + def sample_latent_posterior(self, mean, covariance, n_samples = 1): + assert mean.shape[0] == covariance.shape[0] + assert covariance.shape[1] == covariance.shape[0] + y_post = np.random.multivariate_normal(mean.flatten(), covariance, n_samples) + return y_post + + def _check_latent_input(self, x=None, fhat=None, varhat=None): + if (fhat is None or varhat is None): + if x is not None: + fhat, varhat = self.predict_latent(x) + else: + raise ValueError('Must supply either x or fhat and varhat') + return fhat, varhat + + def rel_posterior_likelihood(self, uvi, y, x=None, fhat=None, varhat=None): + fhat, varhat = self._check_latent_input(x, fhat, varhat) + p_y = self.rel_likelihood.posterior_likelihood(fhat, varhat, uvi, y) + return p_y + + def rel_posterior_likelihood_array(self, x=None, fhat=None, varhat=None, y=-1): + fhat, varhat = self._check_latent_input(x, fhat, varhat) + p_y = np.zeros((fhat.shape[0], fhat.shape[0]), dtype='float') + cross_uvi = np.zeros((fhat.shape[0],2), dtype='int') + cross_uvi[:, 1] = np.arange(fhat.shape[0]) + for i in cross_uvi[:, 1]: + cross_uvi[:, 0] = i + p_y[:, i:i+1] = self.rel_likelihood.posterior_likelihood(fhat, varhat, cross_uvi, y) + return p_y + + def rel_posterior_MAP(self, uvi, x=None, fhat=None, varhat=None): + p_y = self.rel_posterior_likelihood(uvi, y=1, x=x, fhat=fhat, varhat=varhat) + return 2*np.array(p_y < 0.5, dtype='int')-1 + + def abs_posterior_likelihood(self, y=None, x=None, fhat=None, varhat=None, **kwargs): # Currently sample-based + fhat, varhat = self._check_latent_input(x, fhat, varhat) + return self.abs_likelihood.posterior_likelihood(fhat, varhat, y=y, **kwargs) + + def abs_posterior_mean(self, x=None, fhat=None, varhat=None): + fhat, varhat = self._check_latent_input(x, fhat, varhat) + return self.abs_likelihood.posterior_mean(fhat, varhat) + + def print_hyperparameters(self): + print "COV: '{0}', l: {1}, sigma_f: {2}".format(self.kern.name, self.kern.lengthscale.values, np.sqrt(self.kern.variance.values)) + print "REL: ", + self.rel_likelihood.print_hyperparameters() + print "ABS: ", + self.abs_likelihood.print_hyperparameters() + + +class ObservationSampler(object): + def __init__(self, true_fun, likelihood_type, likelihood_kwargs): + self.f = true_fun + ltype = getattr(sys.modules[__name__], likelihood_type) + self.l = ltype(**likelihood_kwargs) + + def generate_observations(self, x): + fx = self.f(x) + y, ff = self.l.generate_samples(fx) + return y, ff + + @staticmethod + def _gen_x_obs(n, n_xdim=1, domain=None): + # Domain should be 2 x n_xdim, i.e [[x0_lo, x1_lo, ... , xn_lo], [x0_hi, x1_hi, ... , xn_hi ]] + x_test = np.random.uniform(size=(n, n_xdim)) + if domain is not None: + x_test = x_test * np.diff(domain, axis=0) + domain[0, :] + return x_test + + def gaussian_multi_pairwise_sampler(self, x): + # Find the maximum from a set of n samples (x should be n by d) + # Return pairwise relationships that one point is higher than the others + # Sample from Gaussian distributions around function values + fx = np.random.normal(loc=self.f(x), scale=self.l.sigma) + max_xi = np.argmax(fx) + other_xi = np.delete(np.arange(x.shape[0]), max_xi) + y = np.ones((x.shape[0]-1, 1), dtype='int') + uvi = np.hstack((np.atleast_2d(other_xi).T, max_xi*y)) + return y, uvi, fx + +class AbsObservationSampler(ObservationSampler): + def observation_likelihood_array(self, x, y=None): + fx = self.f(x) + if y is None: + y = self.l.y_list + p_y = np.zeros((y.shape[0], fx.shape[0]), dtype='float') + for i, fxi in enumerate(fx): + p_y[:, i:i + 1] = self.l.likelihood(y, fxi[0]) + return p_y + + def mean_link(self, x): + fx = self.f(x) + return self.l.mean_link(fx) + + def generate_n_observations(self, n, n_xdim=1, domain=None): + x = self._gen_x_obs(n, n_xdim, domain) + y, mu = self.generate_observations(x) + return x, y, mu + + +class RelObservationSampler(ObservationSampler): + def observation_likelihood_array(self, x, y=-1): + fx = self.f(x) + p_y = np.zeros((x.shape[0], x.shape[0]), dtype='float') + cross_fx = np.hstack((np.zeros((x.shape[0], 1)), fx)) + for i, fxi in enumerate(fx): + cross_fx[:, 0] = fxi + p_y[:, i:i + 1] = self.l.likelihood(y, cross_fx) + return p_y + + def generate_observations(self, x, uvi): + fx = self.f(x) + y, ff = self.l.generate_samples(fx[uvi][:, :, 0]) + return y, ff + + def generate_n_observations(self, n, n_xdim=1, domain=None): + x = self._gen_x_obs(2*n, n_xdim, domain) + uvi = np.arange(2*n).reshape((n, 2)) + # uv = x[uvi] # [:, :, 0] + y, fuv = self.generate_observations(x, uvi) + return x, uvi, y, fuv + + + + +# SCRAP: + # Estimate dpy_df + # delta = 0.001 + # print "Estimated dpy_df" + # est_dpy_df = (self.log_likelihood(y, f+delta) - self.log_likelihood(y, f-delta))/(2*delta) + # print est_dpy_df + # print 'log likelihood' + # print np.sum(self.log_likelihood(y, f)) + # + # print "Estimated W" + # est_W_diag = (self.log_likelihood(y, f+2*delta) - 2*self.log_likelihood(y,f) + self.log_likelihood(y, f-2*delta))/(2*delta)**2 + # + # print est_W_diag - return f, lml - def calc_nlml(self, loghyp): - f,lml = self.calc_laplace(loghyp) - return -lml \ No newline at end of file diff --git a/GPr.py b/GPr.py index 96be1e3..93e6a43 100644 --- a/GPr.py +++ b/GPr.py @@ -43,23 +43,23 @@ def __init__(self,log_hyp,mean_hyp,like_hyp,covFunName,meanFunName,likeFunName, # Define GP prediction function def compute_prediction(self,testInput): - Kxz = self.covFun.compute_Kxz_matrix(testInput) - Kxx = self.covFun.compute_Kxx_matrix() - iKxx = np.linalg.inv(Kxx) + Kxx = self.covFun.compute_K_matrix() + Kxz = self.covFun.compute_K_matrix(X1=testInput) + Kzz = self.covFun.compute_K_matrix(testInput, testInput) + iKxx = np.linalg.inv(Kxx + self.covFun.sn2*np.eye(Kxx.shape[0])) Kzx = Kxz.T - K_diag = np.diagonal(np.dot(np.dot(Kzx,iKxx),Kxz)) - K_noise = self.covFun.sf2*np.ones(np.size(testInput,axis=0)) - fz = np.dot(np.dot(Kzx,iKxx),self.trainTarget.T) - cov_fz = K_noise - K_diag + K_diag = np.dot(np.dot(Kzx,iKxx),Kxz) + fz = np.dot(np.dot(Kzx,iKxx), self.trainTarget) + cov_fz = Kzz - K_diag return fz, cov_fz # Define GP negative log marginal likelihood function - def compute_likelihood(self,hyp): + def compute_likelihood(self, hyp): n = np.size(self.trainInput,axis=0) - covSE = SquaredExponential(hyp,self.trainInput) - Kxx = covSE.compute_Kxx_matrix() + covSE = SquaredExponential(hyp, self.trainInput) + Kxx = covSE.compute_K_matrix() m = self.meanFun.y - L = np.linalg.cholesky(Kxx) + L = np.linalg.cholesky(Kxx +self.covFun.sn2*np.eye(Kxx.shape[0])) iKxx = np.linalg.solve(L.T,np.linalg.solve(L,np.eye(n))) y = np.reshape(self.trainTarget,(len(self.trainTarget),1)) err_y = np.dot(np.dot((y-m).T,iKxx),(y-m))/2 @@ -93,18 +93,16 @@ def __init__(self,logHyp,x): self.hyp = np.exp(self.logHyp) # hyperparameters n = len(self.hyp) self.M = self.hyp[:n-2] # length scales - self.sf2 = self.hyp[n-2]**2 # squared exponential variance + self.sf2 = self.hyp[n-2]**2 # sigma_f variance self.sn2 = self.hyp[n-1]**2 # noise variance - - def compute_Kxx_matrix(self): - scaledX = self.x/self.M - sqDist = squared_distance(scaledX,scaledX) - Kxx = self.sn2*np.eye(np.size(self.x,axis=0))+self.sf2*np.exp(-0.5*sqDist) - return Kxx - - def compute_Kxz_matrix(self,z): - scaledX = self.x/self.M - scaledZ = z/self.M + + def compute_K_matrix(self, X0=None, X1=None): + if X0 is None: + X0 = self.x + if X1 is None: + X1 = self.x + scaledX = X0/self.M + scaledZ = X1/self.M sqDist = squared_distance(scaledX,scaledZ) - Kxz = self.sf2*np.exp(-0.5*sqDist) - return Kxz \ No newline at end of file + K = self.sf2*np.exp(-0.5*sqDist) + return K diff --git a/active_learners.py b/active_learners.py new file mode 100644 index 0000000..7cd49f4 --- /dev/null +++ b/active_learners.py @@ -0,0 +1,725 @@ +import numpy as np +import GPpref +from scipy.stats import beta +import plot_tools as ptt +import time +import sys + +def calc_ucb(fhat, vhat, gamma=2.0, sigma_offset=0.0): + # return fhat + gamma * (np.sqrt(np.atleast_2d(vhat.diagonal()).T) - sigma_offset) + return fhat.flatten() + gamma * (np.sqrt(np.diagonal(vhat)) - sigma_offset) + + +def softmax_selector(x, tau=1.0): + # High tau is more random + ex = np.exp((x - x.max())/tau) + Px = ex/ex.sum() + return np.random.choice(len(x), p=Px) + +class Learner(object): + def __init__(self, model_type, obs_args, name, update_p_rel = False): + self.model_type = getattr(sys.modules[__name__], model_type) + self.obs_arguments = obs_args + self.name = name + self.update_p_rel = update_p_rel + + def build_model(self, model_kwargs): + self.model = self.model_type(**model_kwargs) + + def select_observation(self): + return self.model.select_observation(**self.obs_arguments) + +class ActiveLearner(GPpref.PreferenceGaussianProcess): + def init_extras(self): + self._default_uvi = np.array([[0, 1]]) + self._plus_y_obs = np.ones((1, 1), dtype='int') + self._minus_y_obs = -1*self._plus_y_obs + + def solve_laplace(self, log_hyp=None): + self.f = self.calc_laplace(log_hyp) + return self.f + + def get_observations(self): + return self.x_rel, self.uvi_rel, self.x_abs, self.y_rel, self.y_abs + + def select_observation(self, p_rel=0.5, domain=None, n_rel_samples=2): + if np.random.uniform() > p_rel: # i.e choose an absolute sample + n_rel_samples = 1 + return self.uniform_domain_sampler(n_rel_samples, domain) + + def uniform_domain_sampler(self, n_samples, domain=None, sortx=False): + # Domain should be 2 x n_xdim, i.e [[x0_lo, x1_lo, ... , xn_lo], [x0_hi, x1_hi, ... , xn_hi ]] + x_test = np.random.uniform(size=(n_samples, self._xdim)) + if domain is not None: + x_test = x_test*np.diff(domain, axis=0) + domain[0, :] + if sortx: + x_test = np.sort(x_test, axis=0) + return x_test + + def linear_domain_sampler(self, n_samples, domain=None): + # One dimension only at the moment! + assert self._xdim == 1 + x_test = np.atleast_2d(np.linspace(.0, 1.0, n_samples, self._xdim)).T + if domain is not None: + x_test = x_test*np.diff(domain, axis=0) + domain[0, :] + return x_test + + + def get_latent_ests(self, x_test, abs_y_samples ): + # Latent predictions + d_x = x_test.shape[1] + fhat, vhat = self.predict_latent(x_test) + p_abs_y_post, E_y = self.abs_posterior_likelihood(abs_y_samples, fhat=fhat, varhat=vhat) + if d_x is 1: + p_rel_y_post = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) + else: + p_rel_y_post = None + return fhat, vhat, p_abs_y_post, E_y, p_rel_y_post + + + def create_posterior_plot(self, x_test, f_true, mu_true, rel_sigma, fuv_train, abs_y_samples, **plot_kwargs): + + fhat, vhat, p_abs_y_post, E_y, p_rel_y_post = self.get_latent_ests(x_test, abs_y_samples) + + x_train, uvi_train, x_abs_train, y_train, y_abs_train = self.get_observations() + uv_train = x_train[uvi_train] + + # Posterior estimates + fig_p, ax_p = \ + ptt.estimate_plots(x_test, f_true, mu_true, fhat, vhat, E_y, rel_sigma, + abs_y_samples, p_abs_y_post, p_rel_y_post, + x_abs_train, y_abs_train, uv_train, fuv_train, y_train, + t_l=r'$\mathcal{GP}$ latent function estimate $\hat{f}(x)$', + t_a=r'Posterior absolute likelihood, $p(y | \mathcal{Y}, \theta)$', + t_r=r'Posterior relative likelihood $P(x_0 \succ x_1 | \mathcal{Y}, \theta)$', **plot_kwargs) + return fig_p, ax_p, [fhat, vhat, p_abs_y_post, E_y, p_rel_y_post] + + def update_posterior_plot(self, x_test, f_true, mu_true, rel_sigma, fuv_train, abs_y_samples, + fhat, vhat, p_abs_y_post, E_y, p_rel_y_post, ax_p): + x_train, uvi_train, x_abs_train, y_train, y_abs_train = self.get_observations() + uv_train = x_train[uvi_train] + + ptt.reset_axes2d(ax_p) + # Posterior estimates + fig_p, ax_p = \ + ptt.estimate_plots(x_test, f_true, mu_true, fhat, vhat, E_y, rel_sigma, + abs_y_samples, p_abs_y_post, p_rel_y_post, + x_abs_train, y_abs_train, uv_train, fuv_train, y_train, + t_l=r'$\mathcal{GP}$ latent function estimate $\hat{f}(x)$', + t_a=r'Posterior absolute likelihood, $p(y | \mathcal{Y}, \theta)$', + t_r=r'Posterior relative likelihood $P(x_0 \succ x_1 | \mathcal{Y}, \theta)$', ax=ax_p) + return fig_p, ax_p + + +class MaxVar(ActiveLearner): + l2pie = 1.0 + np.log(2*np.pi) + def logistic(self, H, n_rel): + L = 2.0 + return 1.0 / (1.0 + np.exp(-L * (H-self.kern.variance)) ) + + # Max variance + def select_observation(self, domain=None, n_test=100, n_rel_samples = 2, p_rel = 0.0, rel_tau = 1.0, abs_tau = 1.0e-5,w_v=1.0, selector='det'): + # p_rel is the likelihood of selecting a relative query + # tau are softmax temperatures (low is more greedy, high is more random) + # w_v is the UCB weighting for variance, where UCB = w_v*variance + (1-w_v)*mean + # det_type is the type of value used to select between + # If p_rel < 0 then we will select based on variance magnitude + if p_rel < 0.0: + p_select = 1.0 + else: + p_select = p_rel + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + vv = np.sqrt(np.diagonal(vhat)) + + if p_select >= 1.0 or np.random.uniform() < p_select: # i.e choose a relative sample + available_indexes = set(range(len(x_test))) + dK = np.zeros(len(available_indexes)) + best_n = [softmax_selector(vv, tau=rel_tau)] + while len(best_n) < n_rel_samples: + dK[best_n[-1]] = -1.0e10 # Bit of a scam because it is still possible to sample this value + available_indexes.remove(best_n[-1]) + best_n.append(-1) + for cn in available_indexes: + best_n[-1] = cn + K = vhat[np.ix_(best_n, best_n)] + dK[cn] = np.linalg.det(K) + best_n[-1] = softmax_selector(dK/dK.max(), tau=rel_tau) # np.argmax(dK) # + else: + best_n = [softmax_selector(w_v*vv+(1.0-w_v)*fhat.flatten(), tau=abs_tau)] #[np.argmax(ucb)] # + + # This chooses an absolute query based on determinant. Choose relK using beta cdf likelihood + if p_rel < 0.0: + best_abs = softmax_selector(w_v * vv + (1.0 - w_v) * fhat.flatten(), tau=abs_tau) + if selector == 'det': + best_detK = -p_rel*np.sqrt(dK[best_n[-1]]) + K_ratio = best_detK/(vv.max() + best_detK) + elif selector == 'entropy': + H_rel = 0.5*(n_rel_samples*self.l2pie + np.log(dK[best_n[-1]])) + H_abs = 0.5*(self.l2pie + np.log(vhat[best_abs, best_abs])) + K_ratio = H_rel/(H_rel + H_abs) + p_select = beta.cdf(K_ratio, -p_rel, -p_rel) + if np.random.uniform() > p_select: + best_n = [best_abs] + return x_test[best_n, :] + +class UCBLatent(ActiveLearner): + # All absolute returns + def select_observation(self, domain=None, n_test=100, gamma=2.0): + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + ucb = calc_ucb(fhat, vhat, gamma) + return x_test[[np.argmax(ucb)], :] + + +class UCBLatentSoftmax(ActiveLearner): + # All absolute returns + def select_observation(self, domain=None, n_test=100, gamma=2.0, tau=1.0): + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + ucb = calc_ucb(fhat, vhat, gamma) + return x_test[[softmax_selector(ucb, tau)], :] + +class UCBCovarianceSoftmax(ActiveLearner): + # All absolute returns + def select_observation(self, domain=None, n_test=100, gamma=2.0, tau=1.0): + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + ucb = fhat.flatten() + gamma * (vhat.sum(axis=1) ** 0.25) + return x_test[[softmax_selector(ucb, tau)], :] + +class UCBOut(ActiveLearner): + # NOT FULLY IMPLEMENTED - BROKEN + def select_observation(self, domain=None, n_test=100, gamma=2.0): + # Don't know how to recover the second moment of the predictive distribution, so this isn't done + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + Ey = self.expected_y(x_test, fhat, vhat) + return x_test[[np.argmax(Ey)], :] + + +class ProbabilityImprovementAbs(ActiveLearner): + def mean_var_sampler(self, n_test, domain): + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + shat = np.atleast_2d(np.sqrt(vhat.diagonal())).T + return x_test, fhat, shat + + @staticmethod + def delta(f_star, f_est, sig_est, zeta): + return (f_est - f_star - zeta) / sig_est + + def probability_improvement(self, f_star, f_est, sig_est, zeta): + Z = self.delta(f_star, f_est, sig_est, zeta) + PI = GPpref.std_norm_cdf(Z) + return PI + + def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs): + f_star = self.f.max() + x_test, fhat, shat = self.mean_var_sampler(n_test, domain) + PI = self.probability_improvement(f_star, fhat, shat, zeta) + return x_test[[np.argmax(PI)], :] + + +class ProbabilityImprovementRel(ProbabilityImprovementAbs): + def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs): + i_star = np.argmax(self.f) + x_out = np.zeros((2, self.x_train_all.shape[1]), dtype='float') + x_out[0] = self.x_train_all[i_star] + x_out[1] = super(ProbabilityImprovementRel, self).select_observation(domain, n_test, zeta, *args, **kwargs)[0] + return x_out + + +class ExpectedImprovementAbs(ProbabilityImprovementAbs): + def expected_improvement(self, f_star, f_est, sig_est, zeta): + Z = self.delta(f_star, f_est, sig_est, zeta) + EI = sig_est * (Z * GPpref.std_norm_cdf(Z) + GPpref.std_norm_pdf(Z)) + return EI + + def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs): + f_star = self.f.max() + x_test, fhat, shat = self.mean_var_sampler(n_test, domain) + EI = self.expected_improvement(f_star, fhat, shat, zeta) + return x_test[[np.argmax(EI)], :] + + +class ExpectedImprovementRel(ExpectedImprovementAbs): + def select_observation(self, domain=None, n_test=100, zeta=0.0, *args, **kwargs): + i_star = np.argmax(self.f) + x_out = np.zeros((2, self.x_train_all.shape[1]), dtype='float') + x_out[0] = self.x_train_all[i_star] + x_out[1] = super(ExpectedImprovementRel, self).select_observation(domain, n_test, zeta, *args, **kwargs)[0] + return x_out + + +# class PredictiveEntropy(ActiveLearner): +# # All absolute returns +# def select_observation(self, domain=None, n_test=100): +# +# x_test = self.uniform_domain_sampler(n_test, domain) +# fhat, vhat = self.predict_latent(x_test) +# ucb = calc_ucb(fhat, vhat, gamma) +# return x_test[[softmax_selector(ucb, tau)], :] + +class ABSThresh(ActiveLearner): + def select_observation(self, domain=None, n_test=100, p_thresh=0.7): + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + aa, bb = self.abs_likelihood.get_alpha_beta(fhat) + p_under_thresh = beta.cdf(p_thresh, aa, bb) + # ucb = calc_ucb(fhat, vhat, gamma) + return x_test[[np.argmax(p_under_thresh * (1.0 - p_under_thresh))], :] + + +class UCBAbsRel(ActiveLearner): + def select_observation(self, domain=None, n_test=100, p_rel=0.5, n_rel_samples=2, gamma=2.0, tau=1.0): + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + ucb = calc_ucb(fhat, vhat, gamma) + + if np.random.uniform() < p_rel: # i.e choose a relative sample + best_n = [softmax_selector(ucb, tau=tau)] #[np.argmax(ucb)] # + # sq_dist = GPpref.squared_distance(x_test, x_test) + p_rel_y = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) + + while len(best_n) < n_rel_samples: + # ucb = ucb*sq_dist[best_n[-1], :] # Discount ucb by distance + ucb[best_n[-1]] = 0.0 + ucb *= 4*p_rel_y[best_n[-1],:]*(1.0 - p_rel_y[best_n[-1],:]) # Reduce by likelihood that each point is better than previous best + best_n.append(softmax_selector(ucb, tau=tau)) + # best_n.append(np.argmax(ucb)) + else: + best_n = [softmax_selector(ucb, tau=tau/2.0)] #[np.argmax(ucb)] # + return x_test[best_n, :] + + +class UCBAbsRelD(ActiveLearner): + def select_observation(self, domain=None, n_test=100, p_rel=0.5, n_rel_samples=2, gamma=2.0, tau=1.0): + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + ucb = calc_ucb(fhat, vhat, gamma) + + if np.random.uniform() < p_rel: # i.e choose a relative sample + best_n = [softmax_selector(ucb, tau=tau)] #[np.argmax(ucb)] # + p_rel_y = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) + # sq_dist = np.sqrt(GPpref.squared_distance(x_test, x_test)) + while len(best_n) < n_rel_samples: + # ucb *= sq_dist[best_n[-1], :] # Discount ucb by distance + ucb[best_n[-1]] = 0.0 + ucb *= 4*p_rel_y[best_n[-1],:]*(1.0 - p_rel_y[best_n[-1],:]) # Divide by likelihood that each point is better than previous best + best_n.append(softmax_selector(ucb, tau=tau)) + #best_n.append(np.argmax(ucb)) + else: + best_n = [softmax_selector(ucb, tau=tau/2.0)] #[np.argmax(ucb)] # + return x_test[best_n, :] + +class DetRelBoo(ActiveLearner): + def select_observation(self, domain=None, n_test=100, n_rel_samples=2, gamma=2.0, tau=1.0): + x_test = self.uniform_domain_sampler(n_test, domain, sortx=True) + fhat, vhat = self.predict_latent(x_test) + # ucb = calc_ucb(fhat, vhat, gamma) + + # Select the first location using the mean, uncertainty and likelihood of improvement over everywhere (?) else + p_rel = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) + pp_rel = p_rel.mean(axis=0) + ucb = 4*pp_rel*(1-pp_rel)*np.diagonal(vhat)**0.5 # THIS SEEMS BACKWARD!!! WHY DOES IT WORK? + + available_indexes = set(range(len(x_test))) + dK = np.zeros(len(available_indexes)) + + best_n = [softmax_selector(ucb, tau=tau)] #[np.argmax(ucb)] # + uvi = self._default_uvi.copy() + uvi[0][0] = best_n[0] + while len(best_n) < n_rel_samples: + dK[best_n[-1]] = -1e5 # Bit of a scam because it is still possible to sample this value + available_indexes.remove(best_n[-1]) + best_n.append(-1) + for cn in available_indexes: + best_n[-1] = cn; uvi[0][1] = cn + K = vhat[np.ix_(best_n, best_n)] + # p_y = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=self._plus_y_obs) + dK[cn] = (gamma*np.sqrt(np.linalg.det(K)) + fhat[cn]) # *p_y + best_n[-1] = softmax_selector(dK, tau=tau) # np.argmax(dK) # + return x_test[best_n, :] + +class DetSelect(ActiveLearner): + def select_observation(self, domain=None, n_test=100, n_rel_samples=2, gamma=2.0, rel_tau=1.0, abs_tau=1.0): + x_test = self.uniform_domain_sampler(n_test, domain, sortx=True) + fhat, vhat = self.predict_latent(x_test) + # ucb = calc_ucb(fhat, vhat, gamma).flatten() + + # Select the first location using the mean, uncertainty and likelihood of improvement over everywhere (?) else + p_rel = self.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) + pp_rel = p_rel.mean(axis=0) + x_std = np.diagonal(vhat)**0.5 + rel_value = 4*pp_rel*(1-pp_rel)*(x_std.max() - x_std) # fhat.flatten() + gamma*(vhat.sum(axis=1)**0.25) # + + available_indexes = set(range(len(x_test))) + dK = np.zeros(len(available_indexes)) + best_n = [softmax_selector(rel_value, tau=rel_tau)] #[np.argmax(rel_value)] # + uvi = self._default_uvi.copy() + uvi[0][0] = best_n[0] + while len(best_n) < n_rel_samples: + dK[best_n[-1]] = -1e5 # Bit of a scam because it is still possible to sample this value + available_indexes.remove(best_n[-1]) + best_n.append(-1) + for cn in available_indexes: + best_n[-1] = cn; uvi[0][1] = cn + K = vhat[np.ix_(best_n, best_n)] + p_y = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=self._plus_y_obs) + dK[cn] = p_y*(gamma*np.sqrt(np.linalg.det(K)) + fhat[cn]) + best_n[-1] = softmax_selector(dK, tau=rel_tau) # np.argmax(dK) # + + K = vhat[np.ix_(best_n, best_n)] + mdK = gamma*np.sqrt(np.linalg.det(K)) + fhat[cn] + # mdK = dK.max() + ucb = calc_ucb(fhat, vhat, gamma) + p_rel = mdK/(mdK + ucb.max()) + + if abs_tau > 0 and np.random.uniform() > p_rel: # i.e choose an abs: + # best_n = [softmax_selector(fhat.flatten() + gamma*(vhat.sum(axis=1)**0.25), tau)] #[best_n[0]] # + best_n = [softmax_selector(ucb, tau=abs_tau)] + return x_test[best_n, :] + + +class PeakComparitor(ActiveLearner): + + def test_observation(self, x, y, x_test, gamma): + self.store_observations() + self.add_observations(x, y, self._default_uvi) + self.solve_laplace() + fhat, vhat = self.predict_latent(x_test) + ucb = calc_ucb(fhat, vhat, gamma) + self.reset_observations() + return ucb.max() + + def store_observations(self): + crx, cuv, cax, cry, cay = self.get_observations() + self.crx, self.cuv, self.cax, self.cry, self.cay = crx.copy(), cuv.copy(), cax.copy(), cry.copy(), cay.copy() + self.crf = self.f.copy() + + def reset_observations(self): + try: + self.set_observations(self.crx, self.cuv, self.cax, self.cry, self.cay) + self.f = self.crf + except AttributeError: + print "reset_observations failed: existing observations not found" + + def select_observation(self, domain=None, n_test=50, gamma=2.0, n_rel_samples=2): + n_comparators = n_rel_samples-1 + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + ucb = calc_ucb(fhat, vhat, gamma) + max_xi = np.argmax(ucb) # Old method used highest x, not ucb + other_xi = np.delete(np.arange(n_test), max_xi) + uvi = np.vstack((max_xi * np.ones(n_test - 1, dtype='int'), other_xi)).T + + p_pref = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=-1) + V = np.zeros(n_test - 1) + x = np.zeros((2, 1), dtype='float') + x[0] = x_test[max_xi] + + # Now calculate the expected value for each observation pair + for i,uvi1 in enumerate(other_xi): + x[1] = x_test[uvi1] + V[i] += p_pref[i]*self.test_observation(x, self._minus_y_obs, x_test, gamma) + if (1 - p_pref[i]) > 1e-3: + V[i] += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, x_test, gamma) + + best_n = np.argpartition(V, -n_comparators)[-n_comparators:] + # best = np.argmax(V) + cVmax = np.argmax(ucb) # This is repeated in case I want to change max_xi + + if ucb[cVmax] > V.max(): + return x_test[[cVmax], :] + else: + xi = np.zeros(n_comparators+1, dtype='int') + xi[0] = max_xi + xi[1:] = other_xi[best_n] + return x_test[xi, :] + + +class LikelihoodImprovement(PeakComparitor): + + def test_observation(self, x, y, x_test, max_xi): + self.store_observations() + self.add_observations(x, y, self._default_uvi) + f = self.solve_laplace() + fhat, vhat = self.predict_latent(x_test) + new_xi = np.argmax(fhat) + p_new_is_better = self.rel_likelihood.posterior_likelihood(fhat, vhat, np.array([[max_xi, new_xi]]), self._plus_y_obs) + self.reset_observations() + return p_new_is_better + + def select_observation(self, domain=None, n_test=50, req_improvement=0.6, n_rel_samples=2, gamma=1.5, p_thresh=0.7): + n_comparators = n_rel_samples-1 + x_test = self.uniform_domain_sampler(n_test, domain) + fhat, vhat = self.predict_latent(x_test) + max_xi = np.argmax(fhat) + other_xi = np.delete(np.arange(n_test), max_xi) + uvi = np.vstack((max_xi * np.ones(n_test - 1, dtype='int'), other_xi)).T + + p_pref = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=-1) + V = np.zeros(n_test - 1) + x = np.zeros((2, 1), dtype='float') + x[0] = x_test[max_xi] + + + # Now calculate the expected value for each observation pair + for i,uvi1 in enumerate(other_xi): + x[1] = x_test[uvi1] + V[i] += p_pref[i]*self.test_observation(x, self._minus_y_obs, x_test, max_xi) + if (1-p_pref[i]) > 1e-3: + V[i] += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, x_test, max_xi) + + Vmax = V.max() + # best_n = np.argpartition(V, -n_comparators)[-n_comparators:] + # best = np.argmax(V) + if self.verbose: + print 'V_max = {0}'.format(Vmax) + + if Vmax < req_improvement: + # aa, bb = self.abs_likelihood.get_alpha_beta(fhat) + # p_under_thresh = beta.cdf(p_thresh, aa, bb) + # return x_test[[np.argmax(p_under_thresh*(1.0-p_under_thresh))], :] + ucb = calc_ucb(fhat, vhat, gamma, self.rel_likelihood.sigma) + return x_test[[np.argmax(ucb)], :] + else: + best_n = [] + while len(best_n) < n_comparators: + cbest = np.argmax(V) + best_n.append(cbest) + V = V * np.sqrt(GPpref.squared_distance(x_test[[other_xi[cbest]], :], x_test[other_xi])[0]) + xi = np.zeros(n_comparators+1, dtype='int') + xi[0] = max_xi + xi[1:] = other_xi[best_n] + return x_test[xi, :] + + +class SampledThreshold(PeakComparitor): + + def calculate_threshold_utility(self, fhat, vhat, n_samples, y_threshold): + f_sampled = self.sample_latent_posterior(fhat, vhat, n_samples = n_samples) + threshold_utility = 0.0 + # For each one, evaluate the probability mass above the threshold in the absolute function + for fs in f_sampled: + threshold_utility += self.point_utility(fhat, fs, y_threshold) + return threshold_utility/n_samples + + def point_utility(self, fhat, fs, y_threshold): + # This is the mean probability mass above the threshold value + pmass = 0.0 + for fi in fs: + pmass += 1.0 - self.abs_likelihood.cdf(y_threshold, fi) + return pmass/fs.shape[0] + + def test_observation(self, x, y, uvi, x_test, n_samples, y_threshold, f = None): + self.add_observations(x, y, uvi, keep_f=True) + self.solve_laplace() + fhat, vhat = self.predict_latent(x_test) + util = self.calculate_threshold_utility(fhat, vhat, n_samples, y_threshold) + self.reset_observations() + return util + + def select_observation(self, domain=None, x_test=None, n_test=50, n_samples=50, y_threshold=0.8, p_pref_tol=1e-3, n_mc_abs=5): + # n_test is the number of test point locations on the input function + # n_samples is the number of functions sampled from the posterior for estimating the utility + # n_mc_abs is the number of proposed observations sampled from the current posterior for absolute estimates + # Generate a set of test points in the domain (if not specified) + if x_test is None: + # x_test = self.linear_domain_sampler(n_test, domain) + x_test = self.uniform_domain_sampler(n_test, domain) + # x_test.sort(axis=0) + n_test = len(x_test) + + # Sample a set of functions from the current posterior + # We save the f value because otherwise it gets out of whack when we add observations + flap = self.solve_laplace() + fhat, vhat = self.predict_latent(x_test) + # base_utility = self.calculate_threshold_utility(fhat, vhat, n_samples, y_threshold) + + # Check a random set of pairwise relatives and all absolutes (massive sampling) + + # Generate a set of random pairs (this randomly pairs all x_test points) + uvi = np.random.choice(n_test, (n_test/2, 2), replace=False) + p_pref = self.rel_likelihood.posterior_likelihood(fhat, vhat, uvi, y=-1) + V_max_rel = 0.0 + + self.store_observations() + + # Relative observations + t_rel = time.time() + # Now calculate the expected value for each observation pair + for i, uv in enumerate(uvi): + x = x_test[uv] + V_rel = 0.0 + try: + if p_pref[i] < p_pref_tol: + V_rel += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap) + elif p_pref[i] > 1.0-p_pref_tol: + V_rel += p_pref[i]*self.test_observation(x, self._minus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap) + else: + V_rel += (1-p_pref[i])*self.test_observation(x, self._plus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap) + V_rel += p_pref[i] * self.test_observation(x, self._minus_y_obs, self._default_uvi, x_test, n_samples, y_threshold, f=flap) + if V_rel >= V_max_rel: + V_max_rel = V_rel + x_best_rel = x + except GPpref.LaplaceException as exc: + print "Failed in relative test observation, x = [{0}, {1}]".format(x[0], x[1]) + raise exc + + # best_n = np.argpartition(V, -n_comparators)[-n_comparators:] + # best = np.argmax(V) + if self.verbose: + print 'V_max_rel = {0}, x = {2}, t = {1}s'.format(V_max_rel[0], time.time()-t_rel, x_best_rel[:,0]) + + # Absolute queries + V_max = 0.0 + t_rel = time.time() + for i, x in enumerate(x_test): + F = fhat[i] + np.random.randn(n_mc_abs)*np.sqrt(vhat[i,i]) + Y, mu = self.abs_likelihood.generate_samples(F) + V_abs = 0.0 + Y = np.clip(Y, 1e-2, 1-1e-2) # I had stability problems in Laplace with values approaching 0 or 1 + for y in Y: + try: + V_abs += self.test_observation(x, y, None, x_test, n_samples, y_threshold, f=flap) + except ValueError: + print "NaN Issue" + except GPpref.LaplaceException as exc: + print "Failed in absolute test observation, x = {0}, y = {1}".format(x, y) + raise exc + V_abs /= n_mc_abs + if V_abs > V_max: + V_max = V_abs + x_best = x + if self.verbose: + print 'V_max_abs = {0}, x = {2}, t = {1}s'.format(V_max, time.time() - t_rel, x_best) + + if V_max_rel > V_max: + x_best = x_best_rel + + return x_best + + +class SampledClassification(SampledThreshold): + def calculate_threshold_utility(self, fhat, vhat, n_samples, y_threshold): + # Class accuracy + # Note this assumes the sampled functions are used to make the classification decision, and we calculate the + # probability mass from the current mean estimate above the threshold + P_below = self.abs_likelihood.cdf(y_threshold, fhat) + + f_sampled = self.sample_latent_posterior(fhat, vhat, n_samples=n_samples) + threshold_utility = 0.0 + # For each one, evaluate the probability mass above the threshold in the absolute function + for fs in f_sampled: + fm = self.abs_likelihood.mean_link(fs) + predict_below = fm < y_threshold + predicted_accuracy = P_below[predict_below].sum() + (1.0 - P_below[~predict_below]).sum() + threshold_utility += predicted_accuracy/len(fhat) + return threshold_utility/n_samples + + +class OrdinalSampler(PeakComparitor): + + def model_value(self, fhat, vhat, y_threshold): + # Probability of correct classification + p_y, mu = self.abs_likelihood.posterior_likelihood(fhat, vhat) + selection = np.argmax(p_y, axis=0)+1 + value = 0.0 + for i in range(len(fhat)): + if selection[i] >= y_threshold: + value += p_y[y_threshold-1:,i].sum() + else: + value += p_y[0:y_threshold-1,i].sum() + return value/len(fhat) + + def test_observation(self, x, y, uvi, x_test, y_threshold): + self.add_observations(x, y, uvi, keep_f=True) + self.solve_laplace() + fhat, vhat = self.predict_latent(x_test) + util = self.model_value(fhat, vhat, y_threshold) + self.reset_observations() + return util + + def get_rating_set_uv(self, n_ratings): + y = np.ones((n_ratings - 1, 1), dtype='int') + uv = [] + for i in range(n_ratings): + other_i = np.delete(np.arange(n_ratings), i) + uv.append(np.hstack((np.atleast_2d(other_i).T, y*i))) + return y, uv + + + def select_observation(self, domain=None, x_test=None, n_test=50, y_threshold=5, p_pref_tol=1e-3, n_rel_samples=2, n_mc_samples=100): + # Generate a set of test points in the domain (if not specified) + if x_test is None: + # x_test = self.linear_domain_sampler(n_test, domain) + x_test = self.uniform_domain_sampler(n_test, domain) + n_test = len(x_test) + + # We save the f value because otherwise it gets out of whack when we add observations + flap = self.solve_laplace() + fhat, vhat = self.predict_latent(x_test) + + # Sample a set of functions from the current posterior + f_post = np.random.multivariate_normal(fhat.flatten(), vhat, n_mc_samples) + + # Generate a set of random groups of n_rel_samples from the input space + uvi = np.array([np.random.choice(n_test, n_rel_samples, replace=False) for i in range(n_test)]) + V_max_rel = 0.0 + + self.store_observations() + + # Relative observations + t_rel = time.time() + y_uv, full_uv = self.get_rating_set_uv(n_rel_samples) + + # Calculate the expected value for each observation set + for i, uv in enumerate(uvi): + x = x_test[uv] + p_pref = np.zeros(n_rel_samples) + for fp in f_post: + p_pref[np.argmax(fp[uv])] += 1 + p_pref /= n_mc_samples + V_rel = 0.0 + + for i, p_x in enumerate(p_pref): + if p_x > p_pref_tol: + V_rel += p_x*self.test_observation(x, y_uv, full_uv[i], x_test, y_threshold) + if V_rel >= V_max_rel: + V_max_rel = V_rel + x_best_rel = x + + # best_n = np.argpartition(V, -n_comparators)[-n_comparators:] + # best = np.argmax(V) + if self.verbose >= 1: + print 'V_max_rel = {0}, x = {2}, t = {1}s'.format(V_max_rel, time.time()-t_rel, x_best_rel[:,0]) + + # Absolute queries + V_max = 0.0 + t_rel = time.time() + p_y, mu = self.abs_likelihood.posterior_likelihood(fhat, vhat) + for x, ppy in zip(x_test, p_y.T): + V_abs = 0.0 + for y_obs, p_obs in enumerate(ppy): + if p_obs > p_pref_tol: + V_abs += p_obs*self.test_observation(x, y_obs+1, None, x_test, y_threshold) + if V_abs > V_max: + V_max = V_abs + x_best = np.array([x]) + if self.verbose >= 1: + print 'V_max_abs = {0}, t = {1}s'.format(V_max, time.time() - t_rel) + + if V_max_rel > V_max: + x_best = x_best_rel + + return x_best + + +class OrdinalOptimist(OrdinalSampler): + def model_value(self, fhat, vhat, y_threshold): + # Probability of values above threshold + p_y, mu = self.abs_likelihood.posterior_likelihood(fhat, vhat) + value = p_y[y_threshold-1:,:].sum()/len(fhat) + return value/len(fhat) diff --git a/active_statruns.py b/active_statruns.py new file mode 100644 index 0000000..eadc825 --- /dev/null +++ b/active_statruns.py @@ -0,0 +1,201 @@ +# Simple 1D GP classification example +import time +import numpy as np +import GPpref +import plot_tools as ptt +import active_learners +import test_data +import plot_statruns +import yaml +import argparse + +np.set_printoptions(precision=3) +wrms_fun = test_data.wrms_misclass +wrms_args = {'w_power': 1} + +now_time = time.strftime("%Y_%m_%d-%H_%M") +yaml_config ='./data/statruns_dec2017.yaml' + +parser = argparse.ArgumentParser(description='Statruns for active learning with preference GP') +parser.add_argument('-np', '--no-plots', dest='make_plots', action='store_false', help='Turn off plots (default False)') +parser.add_argument('-y', '--yaml-config', default=yaml_config, help='YAML config file') + +args = parser.parse_args() + +print "Using YAML config: {0}".format(args.yaml_config) +if not args.make_plots: + print "No plot output." +with open(args.yaml_config, 'rt') as fh: + run_parameters = yaml.safe_load(fh) + +log_hyp = np.log(run_parameters['hyperparameters']) + +# Statrun parameters +statrun_params = run_parameters['statrun_params'] +np.random.seed(statrun_params['randseed']) +n_best_points = statrun_params['n_best_points'] +n_trials = statrun_params['n_trials'] +n_queries = statrun_params['n_queries'] +if 'calc_relative_error' in run_parameters['statrun_params']: + calc_relative_error = statrun_params['calc_relative_error'] +else: + calc_relative_error = False + +n_rel_samples = run_parameters['statrun_params']['n_rel_samples'] + +# Define polynomial function to be modelled +d_x = run_parameters['GP_params']['hyper_counts'][0]-1 +random_wave = test_data.MultiWave(n_dimensions=d_x, **run_parameters['wave_params']) + +now_time = time.strftime("%Y_%m_%d-%H_%M") +data_dir = 'data/' + now_time + '/' +ptt.ensure_dir(data_dir) +print "Data will be saved to: {0}".format(data_dir) +waver = test_data.WaveSaver(n_trials, random_wave.n_components, n_dim=d_x) + + +# True function +x_plot = np.linspace(0.0, 1.0, statrun_params['n_xtest'], dtype='float') + +try: + if statrun_params['sample_type'] is 'uniform': + x_test = ptt.make_meshlist(x_plot, d_x) + else: + x_test = np.random.uniform(0.0, 1.0, + size = (statrun_params['n_xtest'], d_x)) +except KeyError: + print('Config does not contain statrun_params: sample_type value. Default to uniform sampling') + x_test = ptt.make_meshlist(x_plot, d_x) + +far_domain = np.tile(np.array([[-3.0], [-2.0]]), d_x) + +# Construct active learner objects +n_learners = len(run_parameters['learners']) +learners = [] +names = [] +obs_array = [] +full_obs = {} +for l in run_parameters['learners']: + if 'n_rel_samples' in l['obs_args']: + l['obs_args']['n_rel_samples'] = n_rel_samples + learners.append(active_learners.Learner(**l)) + names.append(l['name']) + obs_array.append({'name': l['name'], 'obs': []}) + full_obs[l['name']] = [None] * n_trials + +wrms_results = np.zeros((n_learners, n_queries+1, n_trials)) +true_pos_results = np.zeros((n_learners, n_queries+1, n_trials), dtype='int') +selected_error = np.zeros((n_learners, n_queries+1, n_trials)) +if calc_relative_error: + relative_error = np.zeros((n_learners, n_queries+1, n_trials)) +else: + relative_error = None + +with open(data_dir + 'params.yaml', 'wt') as fh: + yaml.safe_dump(run_parameters, fh) + +trial_number = 0 +while trial_number < n_trials: + + try: + print 'Trial {0}'.format(trial_number) + random_wave.randomize() + random_wave.print_values() + waver.set_vals(trial_number, *random_wave.get_values()) + rel_obs_fun = GPpref.RelObservationSampler(random_wave.out, run_parameters['GP_params']['rel_likelihood'], run_parameters['rel_obs_params']) + abs_obs_fun = GPpref.AbsObservationSampler(random_wave.out, run_parameters['GP_params']['abs_likelihood'], run_parameters['abs_obs_params']) + + f_true = abs_obs_fun.f(x_test) + y_abs_true = abs_obs_fun.mean_link(x_test) + best_points = np.argpartition(y_abs_true.flatten(), -n_best_points)[-n_best_points:] + best_points_set = set(best_points) + if calc_relative_error: + p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test) + + # Initial data + x_rel, uvi_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(statrun_params['n_rel_train'], + n_xdim=d_x, domain=far_domain) + x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(statrun_params['n_abs_train'], n_xdim=d_x, + domain=far_domain) + model_kwargs = {'x_rel': x_rel, 'uvi_rel': uvi_rel, 'x_abs': x_abs, 'y_rel': y_rel, 'y_abs': y_abs, + 'rel_kwargs': run_parameters['rel_obs_params'], 'abs_kwargs': run_parameters['abs_obs_params']} + model_kwargs.update(run_parameters['GP_params']) + + # Get initial solution + for nl, learner in enumerate(learners): + learner.build_model(model_kwargs) + learner.model.set_hyperparameters(log_hyp) + f = learner.model.solve_laplace() + fhat, vhat = learner.model.predict_latent(x_test) + y_abs_est = learner.model.abs_posterior_mean(x_test, fhat, vhat) + + best_points_est = set(np.argpartition(y_abs_est.flatten(), -n_best_points)[-n_best_points:]) + wrms_results[nl, 0, trial_number] = wrms_fun(y_abs_true, y_abs_est, **wrms_args) + true_pos_results[nl, 0, trial_number] = len(best_points_set.intersection(best_points_est)) + selected_error[nl, 0, trial_number] = test_data.wrms(y_abs_true[best_points], y_abs_est[best_points], weight=False) + if calc_relative_error: + p_rel_y_post = learner.model.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) + relative_error[nl, 0, trial_number] = test_data.rel_error(y_abs_true, p_rel_y_true, y_abs_est, p_rel_y_post, weight=True) + + obs_tuple = learner.model.get_observations() + full_obs[learner.name][trial_number] = [test_data.ObsObject(*obs_tuple)] + + for obs_num in range(n_queries): + t0 = time.time() + linear_p_rel = max(0.0, (n_queries-obs_num)/float(n_queries)) + + for nl, learner in enumerate(learners): + # Update p_rel for certain methods (hacky?) + if learner.update_p_rel: + learner.obs_arguments['p_rel'] = linear_p_rel + + next_x = learner.model.select_observation(**learner.obs_arguments) + next_uvi = None + if next_x.shape[0] == 1: + next_y, next_f = abs_obs_fun.generate_observations(next_x) + # print 'Abs: x:{0}, y:{1}'.format(next_x[0], next_y[0]) + else: + next_y, next_uvi, next_fx = rel_obs_fun.gaussian_multi_pairwise_sampler(next_x) + next_fuv = next_fx[next_uvi][:,:,0] + fuv_rel = np.concatenate((fuv_rel, next_fuv), 0) + # print 'Rel: x:{0}, best_index:{1}'.format(next_x.flatten(), next_uvi[0, 1]) + full_obs[learner.name][trial_number].append((next_x, next_y, next_uvi)) + learner.model.add_observations(next_x, next_y, next_uvi) + f = learner.model.solve_laplace() + fhat, vhat = learner.model.predict_latent(x_test) + y_abs_est = learner.model.abs_posterior_mean(x_test, fhat, vhat) + + # Get selected best point set and error results + best_points_est = set(np.argpartition(y_abs_est.flatten(), -n_best_points)[-n_best_points:]) + wrms_results[nl, obs_num+1, trial_number] = wrms_fun(y_abs_true, y_abs_est, **wrms_args) + true_pos_results[nl, obs_num+1, trial_number] = len(best_points_set.intersection(best_points_est)) + selected_error[nl, obs_num+1, trial_number] = test_data.wrms(y_abs_true[best_points], y_abs_est[best_points], weight=False) + if calc_relative_error: + p_rel_y_post = learner.model.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat) + relative_error[nl, obs_num+1, trial_number] = test_data.rel_error(y_abs_true, p_rel_y_true, y_abs_est, + p_rel_y_post, weight=True) + if calc_relative_error: + print "{0}, t={t:0.2f}s, tp = {1}, wrms = {2}, p_err={3}".format(obs_num, true_pos_results[:, obs_num+1, trial_number], wrms_results[:, obs_num+1, trial_number], relative_error[:, obs_num+1, trial_number], t=time.time()-t0) + else: + print "{0}, t={t:0.2f}s, tp = {1}, wrms = {2}".format(obs_num, true_pos_results[:, obs_num+1, trial_number], wrms_results[:, obs_num+1, trial_number], t=time.time()-t0) + for nl, learner in enumerate(learners): + obs_tuple = learner.model.get_observations() + obs_array[nl]['obs'].append(test_data.ObsObject(*obs_tuple)) + + + except Exception as e: + print 'Exception error is: %s, attempting a new wave' % e + continue + + trial_number += 1 + if relative_error is not None: + plot_statruns.save_data(data_dir, wrms_results, true_pos_results, selected_error, obs_array, full_obs=full_obs, + rel_err=relative_error, t=trial_number) + else: + plot_statruns.save_data(data_dir, wrms_results, true_pos_results, selected_error, obs_array, full_obs=full_obs, + t=trial_number) + + waver.save(data_dir+'wave_data.pkl') + +if args.make_plots: + hfig = plot_statruns.plot_results(wrms_results, true_pos_results, selected_error, obs_array, relative_error=relative_error, data_dir=data_dir, bars=True, norm_comparator=0) \ No newline at end of file diff --git a/beta_to_discrete.py b/beta_to_discrete.py new file mode 100644 index 0000000..6f86c37 --- /dev/null +++ b/beta_to_discrete.py @@ -0,0 +1,61 @@ +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.patches import Rectangle +import GPpref +import nice_plot_colors as npc + +def convert_to_discrete(b, f, n_cat=5): + P = np.zeros(n_cat+1, dtype='float') + for i in range(1, n_cat+1): + P[i] = b.cdf(float(i)/n_cat, f) - P[0:i].sum() + return P[1:] + +def draw_probs(ax, x, P): + del_x = x[1]-x[0] + ddel_x = del_x / P.shape[0] + for xi, p in zip(x, P.T): # Loop over columns + ip = np.argsort(p, ) + for i in ip[::-1]: + box = Rectangle((xi+ddel_x*i, 0.0), ddel_x, p[i], color=npc.bars[i]) + ax.add_artist(box) + +# Examine how continuous variable is converted to discrete categories through beta distribution +sigma = 1.0 +v = 10.0 +n = 5 + +beta_likelihood = GPpref.AbsBoundProbit(sigma=sigma, v=v) + +f = np.linspace(-3.0, 3.0, 7) + +y = np.linspace(0.0, 1.0, 201) +py = np.zeros((len(f), len(y)), dtype='float') +Py = np.zeros((len(f), n), dtype='float') +for i, fi in enumerate(f): + py[i] = beta_likelihood.likelihood(y, fi) + Py[i] = convert_to_discrete(beta_likelihood, fi, n) + +fh, ah = plt.subplots() +xbars = np.arange(0.0, 1.0, step=1.0/n) +draw_probs(ah, xbars, Py) +ah2 = ah.twinx() +lines = [] +for i, p in enumerate(py): + lines.extend(ah2.plot(y, p, color=npc.lines[i])) +ah2.set_ybound(lower=0.0) +ah2.legend(lines, ['${0:0.1f}\sigma$'.format(fi) for fi in f]) + +# Logistic ordinal +log_sigma = 0.1 +b = np.linspace(-3.0, 3.0, n) # Log breakpoints +Py_ord = np.zeros((len(f), n), dtype='float') +for i, fi in enumerate(f): + py[i] = beta_likelihood.likelihood(y, fi) + Py[i] = convert_to_discrete(beta_likelihood, fi, n) + + + +plt.show() + + + diff --git a/config/statruns2_oct2017.yaml b/config/statruns2_oct2017.yaml new file mode 100644 index 0000000..6501171 --- /dev/null +++ b/config/statruns2_oct2017.yaml @@ -0,0 +1,28 @@ +# Hyperparameters are: [l, sig_f, sig_rel, sig_beta, v_beta] +hyperparameters: [0.025, 1.0, 0.2, 1.0, 40.0] +abs_obs_params: {sigma: 1.0, v: 40.0} +rel_obs_params: {sigma: 0.2} +wave_params: + amp_range: [1.5, 1.8] + damp_range: [250.0, 350.0] + f_range: [10.0, 30.0] + n_components: 4 + off_range: [0.01, 0.99] +statrun_params: + n_queries: 80 + n_trials: 100 + n_rel_train: 0 + n_abs_train: 1 + randseed: 0 + n_best_points: 30 + n_xtest: 200 + calc_relative_error: True +learner_params: {delta_f: 1.0e-05, n_rel_samples: 5} +# High tau for softmax is more random +learners: +# - name: MaxVar (abs) +# model_type: MaxVar +# obs_args: {p_rel: 0.0} + - name: HUCB ($p_{rel}=-1.0, w_v=0.75, \tau_r=0.1, \tau_a=0.05$) (rel, abs) + model_type: MaxVar + obs_args: {n_rel_samples: 5, p_rel: -1.0, rel_tau: 0.1, abs_tau: 0.05, w_v: 0.75, selector: entropy} diff --git a/mcmc.py b/mcmc.py new file mode 100644 index 0000000..923a53b --- /dev/null +++ b/mcmc.py @@ -0,0 +1,93 @@ +import numpy as np + + +class Enum(set): + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + +Samplers = Enum(['Metropolis', 'MetropolisHastings', 'Gibbs']) + + +class MCMCSampler(object): + def __init__(self, func, limits, q=None, seed=None, func_kwargs={}): + self.f = func + # self.sampler = sampler ## , sampler=Samplers.'MetropolisHastings' + self.limits = limits + self.lim_range = self.limits[1, :] - self.limits[0, :] + self.n_dim = self.limits.shape[1] + np.random.seed(seed) + if q == None: + q_std = self.lim_range / 10.0 + q = (lambda x: x + np.random.normal(size=self.n_dim) * q_std) + self.q = q + self.func_kwargs = func_kwargs + + @property + def sampler(self): + return self._sampler + + @sampler.setter + def sampler(self, s): + if s not in Samplers: raise Exception("Specified sampler must be in Sampler set") + self._sampler = s + + @property + def limits(self): + return self._limits + + @limits.setter + def limits(self, l): + l = np.array(l) + if l.shape[0] != 2: raise Exception("Limits must be a 2*n_dim array") + self._limits = l + + def generate_sample(self, X_current, f_current): + # Generate proposed sample + X_proposed = self.q(X_current) + while True: + inlimits = np.logical_and(X_proposed >= self.limits[0, :], X_proposed <= self.limits[1, :]) + if not inlimits.all(): + X_proposed = self.q(X_current) + else: + break + + # Acceptance ratio + f_proposed = self.f(X_proposed, **self.func_kwargs) + alpha = f_proposed / f_current + + # Accept? + pp = np.random.rand(1) + if pp < alpha: # Accept + return X_proposed, f_proposed + return X_current, f_current + + def sample_chain(self, n_samples, burn_in=None, X_start=None): + if burn_in is None: + burn_in = np.floor_divide(n_samples, 10) + if X_start is None: + X_start = self.limits[0, :] + np.random.rand(self.n_dim) * self.lim_range + X_current = X_start + f_current = self.f(X_current, **self.func_kwargs) + + # Run for burn-in and throw away samples + for t in range(burn_in): + X_current, f_current = self.generate_sample(X_current, f_current) + if t % 100 == 0: + print "Burn-in: {0}/{1}".format(t, burn_in) + + X = np.zeros((n_samples, self.n_dim)) + f_X = np.zeros(n_samples) + X[0, :] = X_current + f_X[0] = f_current + + for t in range(1, n_samples): + X_current, f_current = self.generate_sample(X_current, f_current) + X[t, :] = X_current + f_X[t] = f_current + if t % 100 == 0: + print "Chain sampling: {0}/{1}".format(t, n_samples) + + return X, f_X \ No newline at end of file diff --git a/nice_plot_colors.py b/nice_plot_colors.py new file mode 100644 index 0000000..c318b28 --- /dev/null +++ b/nice_plot_colors.py @@ -0,0 +1,31 @@ +import numpy as np + +lines = np.array([[57,106,177], [218,124,48], [62,150,81], [204,37,41], [83,81,84], [107,76,154], [146,36,40], [148,139,61]], dtype='float')/255 +bars = np.array([[114,147,203], [225,151,76], [132,186,91], [211,94,96], [128,133,133], [144,103,167], [171,104,87], [204,194,16]], dtype='float')/255 + +# These look better in print +vlines = ['cornflowerblue', 'green', 'firebrick', 'orange', 'black', 'indigo'] +vbars = ['steelblue', 'darkgreen', 'darkred', 'darkorange', 'grey', 'mediumvioletred'] + +lines = np.hstack((lines, np.ones((lines.shape[0],1)))) +bars = np.hstack((bars, np.ones((bars.shape[0],1)))) + + +def darken(c,power=2): + co = np.array(c).copy() + co = np.clip(co**power, 0, 1.0) + co[-1] = c[-1] + return co + + +def lighten(c, power=2): + co = 1.0-np.array(c).copy() + co = darken(co, power) + return 1.0-co + + +def greyify(c, grey=0.5, target_level=0.5): + co = np.array(c).copy() + co = np.clip(co + grey*(target_level-co), 0, 1.0) + co[-1] = c[-1] + return co diff --git a/ordinal_likelihoods.py b/ordinal_likelihoods.py new file mode 100644 index 0000000..fe0dd9a --- /dev/null +++ b/ordinal_likelihoods.py @@ -0,0 +1,128 @@ +import numpy as np +import matplotlib.pyplot as plt +import GPpref +import nice_plot_colors as npc +import plot_tools +from scipy.stats import norm +plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']}) +plt.rc('text', usetex=True) + +def beta_discrete(b, f, n_cat=5): + P = np.zeros(n_cat+1, dtype='float') + for i in range(1, n_cat+1): + P[i] = b.cdf(float(i)/n_cat, f) - P[0:i].sum() + return P[1:] + + +# Examine how continuous variable is converted to discrete categories through beta distribution and logistic ordinal +n_ords = 5 + +n_f = 101 +f = np.linspace(-3.0, 3.0, n_f) + +# Integrated beta +sigma = 1.0 +v = 15.0 +beta_likelihood = GPpref.AbsBoundProbit(sigma=sigma, v=v) +Py = np.zeros((n_f, n_ords), dtype='float') +for i, fi in enumerate(f): + Py[i] = beta_discrete(beta_likelihood, fi, n_ords) + +fh, ah = plt.subplots() +beta_lines = [] +for i, p in enumerate(Py.T): + beta_lines.extend(ah.plot(f, p, color=npc.lines[i])) +ah.legend(beta_lines, ['$p(y={0}|f)$'.format(ni+1) for ni in range(n_ords)]) + +# Logistic ordinal +ord_sigma = 0.25 +ord_b = 1.2 +b = np.linspace(-ord_b, ord_b, n_ords-1) # Log breakpoints +Py_ord = np.zeros((n_f, n_ords), dtype='float') +z = lambda ff, bb: (bb-ff) / ord_sigma +Py_ord[:, 0] = norm.cdf(z(f, b[0])) +Py_ord[:, -1] = 1.0-norm.cdf(z(f, b[-1])) +for i in range(1, n_ords-1): + Py_ord[:,i] = norm.cdf(z(f, b[i])) - norm.cdf(z(f, b[i-1])) + +ord_lines = [] +for i, p in enumerate(Py_ord.T): + ord_lines.extend(ah.plot(f, p, color=npc.lines[i], ls='--')) + +ah.set_xlabel('$f(x)$') +ah.set_ylabel('$p(y|f)$') + + +# Testing the Ordinal Probit class in GPPref +ord_kwargs = {'sigma': ord_sigma, 'b': ord_b, 'n_ordinals': n_ords, 'eps': 1.0e-5} +orpro = GPpref.OrdinalProbit(**ord_kwargs) +Py_c = np.zeros((n_f, n_ords), dtype='float') +dPy_c = np.zeros((n_f, n_ords), dtype='float') +d2Py_c = np.zeros((n_f, n_ords), dtype='float') +fv = np.atleast_2d(f).T +y = np.ones(fv.shape, dtype='int') +for i in range(n_ords): + d2p, dp, p = orpro.derivatives(y*(i+1), fv) + d2Py_c[:,i], dPy_c[:,i], Py_c[:,i] = d2p.diagonal(), dp.flat, p.flat + +fh2, ah2 = plt.subplots(2, 2) +ah2 = ah2.flatten() +ah2[0].plot(f, np.exp(Py_c)) +ah2[0].set_title('$P(y|f(x))$') +ah2[0].set_ylabel('$P(y)$') + +ah2[1].plot(f, Py_c) +ah2[1].set_title('$ln P(y|f(x))$') +ah2[1].set_ylabel('$ln P(y)$') + +ah2[2].plot(f, dPy_c) +ah2[2].set_title('$\partial ln P(y|f(x) / \partial f(x)$') +ah2[2].set_ylabel('$\partial ln P(y) / \partial f(x)$') + +ah2[3].plot(f, d2Py_c) +ah2[3].set_title('$\partial^2 l(y,f(x)) / \partial f(x)^2$') +ah2[3].set_ylabel('$\partial^2 ln P(y) / \partial f(x)^2$') +for a in ah2: + a.set_xlabel('$f(x)$') + + +# Test the sampler +def fun(x): + return x + # return 2.5*np.sin(x/2.0) +obs_fun = GPpref.AbsObservationSampler(fun, 'OrdinalProbit', ord_kwargs) +x_plot = f +x_test = np.atleast_2d(x_plot).T +f_true = obs_fun.f(x_test) +y_samples = np.atleast_2d(np.arange(1, n_ords+1, 1, dtype='int')).T +p_y_array = obs_fun.observation_likelihood_array(x_test, y_samples) +z, mu = obs_fun.generate_observations(x_test) + +fh3, ah3 = plt.subplots(1, 2) +plot_tools.plot_with_bounds(ah3[0], x_test, f_true, ord_sigma, c=npc.lines[0]) +abs_extent = [x_test[0, 0], x_test[-1, 0], y_samples[0, 0]-0.5, y_samples[-1, 0]+0.5] +h_pat = ah3[1].imshow(p_y_array, origin='lower', extent=abs_extent, aspect='auto') +ah3[1].plot(x_test, z, 'w+') +h_yt, = ah3[1].plot(x_test, mu, c=npc.lines[1]) +ah3[1].legend([h_yt], ['$E[y]$']) +fh3.colorbar(h_pat, ax=ah3[1]) + + +# Plot some beta likelihood stuff for Thane +# beta_likelihood = GPpref.AbsBoundProbit(sigma=1.0, v=25.0) +# y = np.atleast_2d(np.linspace(0.0, 1.0, 201)).T +# f = np.linspace(-2.0, 2.0, 5) +# p_y = np.zeros((y.shape[0], len(f)), dtype='float') +# for i, fxi in enumerate(f): +# p_y[:, i:i + 1] = beta_likelihood.likelihood(y, fxi) +# fh4, ah4 = plt.subplots() +# h_b = plt.plot(y, p_y) +# ah4.legend(h_b, ['$f = {0:0.1f}$'.format(fi) for fi in f]) +# ah4.set_ylim([0, 10.0]) +# ah4.set_xlabel('$y$') +# ah4.set_ylabel('$p(y|f)$') +# ah4.set_title('Symmetric beta likelihood') +plt.show(block=False) + + + diff --git a/plot_acquisition.py b/plot_acquisition.py new file mode 100644 index 0000000..26f97c1 --- /dev/null +++ b/plot_acquisition.py @@ -0,0 +1,135 @@ +# Simple 1D GP classification example +import time +import numpy as np +import matplotlib.pyplot as plt +import GPpref +import plot_tools as ptt +import active_learners +import test_data +from matplotlib.backends.backend_pdf import PdfPages + +nowstr = time.strftime("%Y_%m_%d-%H_%M") +plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']}) +plt.rc('text', usetex=True) + +save_plots = False + +# log_hyp = np.log([0.1,0.5,0.1,10.0]) # length_scale/s, sigma_f, sigma_n_rel, sigma_beta, v_beta +# log_hyp = np.log([0.07, 0.75, 0.25, 1.0, 28.1]) +# log_hyp = np.log([0.05, 1.5, 0.09, 2.0, 50.0]) +log_hyp = np.log([0.02, 0.5, 0.1, 0.8, 60.0]) +np.random.seed(2) + +n_rel_train = 1 +n_abs_train = 1 +rel_sigma = 0.05 +delta_f = 1e-5 + +beta_sigma = 0.8 +beta_v = 80.0 + +n_xplot = 101 +n_mcsamples = 1000 +n_ysamples = 101 + +n_queries = 10 + +# Define polynomial function to be modelled +# true_function = test_data.multi_peak +# random_wave = test_data.VariableWave([0.6, 1.0], [5.0, 10.0], [0.0, 1.0], [10.0, 20.0]) +random_wave = test_data.MultiWave(amp_range=[0.6, 1.2], f_range=[10.0, 30.0], off_range=[0.1, 0.9], + damp_range=[250.0, 350.0], n_components=3) +random_wave.randomize() +# random_wave.set_values(1.0, 6.00, 0.2, 10.50) +true_function = random_wave.out +random_wave.print_values() + +if save_plots: + nowstr = time.strftime("%Y_%m_%d-%H_%M") + fig_dir = 'fig/' + nowstr + '/' + ptt.ensure_dir(fig_dir) + print "Figures will be saved to: {0}".format(fig_dir) + pdf_pages = PdfPages(fig_dir+'posterior_all.pdf') + acq_pdf = PdfPages(fig_dir+'acquisition.pdf') + +rel_obs_fun = GPpref.RelObservationSampler(true_function, GPpref.PrefProbit(sigma=rel_sigma)) +abs_obs_fun = GPpref.AbsObservationSampler(true_function, GPpref.AbsBoundProbit(sigma=beta_sigma, v=beta_v)) + +# True function +x_plot = np.linspace(0.0, 1.0, n_xplot,dtype='float') +x_test = np.atleast_2d(x_plot).T +f_true = abs_obs_fun.f(x_test) +mu_true = abs_obs_fun.mean_link(x_test) +mc_samples = np.random.normal(size=n_mcsamples) +abs_y_samples = np.atleast_2d(np.linspace(0.01, 0.99, n_ysamples)).T +p_abs_y_true = abs_obs_fun.observation_likelihood_array(x_test, abs_y_samples) +p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test) + +# Training data +x_rel, uvi_rel, uv_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(n_rel_train, n_xdim=1) +x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(n_abs_train, n_xdim=1) + + +# Plot true functions +fig_t, (ax_t_l, ax_t_a, ax_t_r) = ptt.true_plots(x_test, f_true, mu_true, rel_sigma, + abs_y_samples, p_abs_y_true, p_rel_y_true, + x_abs, y_abs, uv_rel, fuv_rel, y_rel, + t_l=r'True latent function, $f(x)$') +if save_plots: + fig_t.savefig(fig_dir+'true.pdf', bbox_inches='tight') + +# Construct active learner object +# learner = active_learners.UCBAbsRelD(x_rel, uvi_rel, x_abs, y_rel, y_abs, delta_f=delta_f, +# rel_likelihood=GPpref.PrefProbit(), abs_likelihood=GPpref.AbsBoundProbit()) +# # obs_arguments = {'req_improvement': 0.60, 'n_test': 50, 'gamma': 2.0, 'n_rel_samples': 5, 'p_thresh': 0.7} +# obs_arguments = {'n_test': 100, 'p_rel': 0.5, 'n_rel_samples': 5, 'gamma': 2.0} + +learner = active_learners.ExpectedImprovementRel(x_rel, uvi_rel, x_abs, y_rel, y_abs, delta_f=delta_f, + rel_likelihood=GPpref.PrefProbit(), abs_likelihood=GPpref.AbsBoundProbit()) +acq_fun = learner.expected_improvement +obs_arguments = {'n_test': 100, 'zeta': 0.1} + +# Get initial solution +learner.set_hyperparameters(log_hyp) +f = learner.solve_laplace() +learner.print_hyperparameters() + +if save_plots: + fig_p, (ax_p_l, ax_p_a, ax_p_r), h_plotobj = learner.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, + abs_y_samples, mc_samples) + fig_a, ax_a = plt.subplots() + pdf_pages.savefig(fig_p, bbox_inches='tight') + # fig_p.savefig(fig_dir+'posterior00.pdf', bbox_inches='tight') + +for obs_num in range(n_queries): + obs_arguments['p_rel'] = max(0.0, (n_queries - obs_num) / float(n_queries)) + next_x = learner.select_observation(**obs_arguments) + y_acq = learner.expected_improvement() + if next_x.shape[0] == 1: + next_y, next_f = abs_obs_fun.generate_observations(next_x) + learner.add_observations(next_x, next_y) + print 'Abs: x:{0}, y:{1}'.format(next_x[0], next_y[0]) + else: + next_y, next_uvi, next_fx = rel_obs_fun.gaussian_multi_pairwise_sampler(next_x) + next_fuv = next_fx[next_uvi][:, :, 0] + fuv_rel = np.concatenate((fuv_rel, next_fuv), 0) + learner.add_observations(next_x, next_y, next_uvi) + print 'Rel: x:{0}, best_index:{1}'.format(next_x.flatten(), next_uvi[0, 1]) + + f = learner.solve_laplace() + if save_plots: + fig_p, (ax_p_l, ax_p_a, ax_p_r), h_plotobj = learner.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, + abs_y_samples, mc_samples) + pdf_pages.savefig(fig_p, bbox_inches='tight') + # fig_p.savefig(fig_dir+'posterior{0:02d}.pdf'.format(obs_num+1), bbox_inches='tight') + plt.close(fig_p) + +learner.print_hyperparameters() + +if not save_plots: + fig_p, (ax_p_l, ax_p_a, ax_p_r), h_plotobj = learner.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, + abs_y_samples, mc_samples) +else: + pdf_pages.close() + +plt.show() diff --git a/plot_statruns.py b/plot_statruns.py new file mode 100644 index 0000000..eefa2ef --- /dev/null +++ b/plot_statruns.py @@ -0,0 +1,326 @@ +import numpy as np +import matplotlib.pyplot as plt +import pickle +import yaml +import test_data +import GPpref +import active_learners +from Tkinter import Tk +from tkFileDialog import askdirectory +import argparse +# from test_data import ObsObject +plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']}) +plt.rc('text', usetex=True) + + +def single_plot(data, x=None, names=None, title='', xlabel='Number of samples', ylabel='', bars=False, percentile=0, precut=0, errorevery=5): + data = data[:,precut:,:] + n_trials = data.shape[2] + + if x is None: + x = np.arange(data.shape[1])+precut + + hf, hax = plt.subplots() + hl = [] + for dd in data: + mean_err = np.nanmean(dd, axis=1) + if percentile < 0: + err_lo = np.nanstd(dd, axis=1, ddof=1) / np.sqrt(n_trials) + err_hi = err_lo + elif percentile == 0: + err_lo = np.nanstd(dd, axis=1) + err_hi = err_lo + else: + err_lo = mean_err - np.percentile(dd, percentile, axis=1) + err_hi = np.percentile(dd, percentile + 50, axis=1) - mean_err + err = np.array([err_lo, err_hi]) + + if bars: + hl.append(hax.errorbar(x, mean_err, yerr=err, capsize=2.0, errorevery=errorevery)) + else: + hl.append(hax.plot(x, mean_err)[0]) + hax.legend(hl, names, loc='best') + hax.set_title(title) + hax.set_xlabel(xlabel) + hax.set_ylabel(ylabel) + return hf, hax + +def plot_results(wrms_results, true_pos_results, selected_error, obs_array, relative_error=None, data_dir=None, bars=True, norm_comparator=0, exclusions=[]): + methods_indexes = [] + for i in range(wrms_results.shape[0]): + if i not in exclusions: + methods_indexes.append(i) + methods_indexes = np.array(methods_indexes) + + names = [obs_array[i]['name'] for i in methods_indexes] + + wrms_results=wrms_results[methods_indexes,:,:] + true_pos_results=true_pos_results[methods_indexes,:,:] + selected_error=selected_error[methods_indexes,:,:] + + + f0, ax0 = single_plot(wrms_results, names=names, ylabel='Weighted RMSE', bars=bars, percentile=25) + f1, ax1 = single_plot(true_pos_results, names=names, ylabel='True positive selections (out of 30)', bars=True, precut=1, percentile=25) + f2, ax2 = single_plot(selected_error, names=names, ylabel='RMSE of best paths', bars=True, precut=1, percentile=25) + f = [f0, f1, f2] + ax = [ax0, ax1, ax2] + + try: + norm_wrms = wrms_results/wrms_results[norm_comparator] + f3, ax3 = single_plot(norm_wrms, names=names, title='Normalized weighted RMSE', bars=bars, percentile=0) + f.append(f3) + ax.append(ax3) + except: + pass + + if relative_error is not None: + relative_error=relative_error[methods_indexes,:,:] + fr, axr = single_plot(relative_error, names=names, ylabel='Mean relative prediction error', bars=True) + f.append(fr); ax.append(axr) + if data_dir is not None: + fr.savefig(data_dir + 'rel_error.pdf', bbox_inches='tight') + + if data_dir is not None: + f0.savefig(data_dir + 'wrms.pdf', bbox_inches='tight') + f1.savefig(data_dir + 'true_pos.pdf', bbox_inches='tight') + f2.savefig(data_dir + 'rms_best.pdf', bbox_inches='tight') + # hl = [] + # for i in range(mean_err.shape[0]): + # hl.append(plt.errorbar(np.arange(mean_err.shape[1]), mean_err[i,:], yerr=std_err[i, :])) + # plt.legend(hl, names) + plt.show() + return f + + +def _save_file(file, data): + with open(file, 'wb') as fh: + pickle.dump(data, fh) + return + +def save_data(data_dir, wrms, true_pos, sel_err, obs, rel_err=None, full_obs=None, t=None): + if t is None: + t = wrms.shape[2] + _save_file(data_dir + 'wrms.pkl', wrms[:,:,:t]) + _save_file(data_dir + 'true_pos.pkl', true_pos[:,:,:t]) + _save_file(data_dir + 'selected_error.pkl', sel_err[:,:,:t]) + _save_file(data_dir + 'obs.pkl', obs) + if rel_err is not None: + _save_file(data_dir + 'relative_error.pkl', rel_err[:,:,:t]) + if full_obs is not None: + fobs = {key:full_obs[key][:t] for key in full_obs} + _save_file(data_dir + 'full_obs.pkl', fobs) + +def _load_file(file): + with open(file, 'rb') as fh: + data = pickle.load(fh) # Dimensions n_learners, n_queries+1, n_trials + return data + +def _load_params(file): + with open(file, 'rt') as fh: + params = yaml.safe_load(fh) + return params + +def get_data_dir(): + Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing + data_dir = askdirectory(initialdir='./data/') + if data_dir == '': # Cancel clicked + raise IOError('No directory selected') + elif data_dir[-1] is not '/': + data_dir += '/' + return data_dir + +def load_data(data_dir=None): + if data_dir is None: + data_dir = get_data_dir() + + wrms_results = _load_file(data_dir+'wrms.pkl') + true_pos_results = _load_file(data_dir+'true_pos.pkl') + selected_error = _load_file(data_dir+'selected_error.pkl') + obs_array = _load_file(data_dir+'obs.pkl') + try: + relative_error = _load_file(data_dir+'relative_error.pkl') + except IOError: + print "No relative error data found." + relative_error = None + try: + full_obs = _load_file(data_dir+'full_obs.pkl') + except IOError: + print "No full observation set found." + full_obs = None + return data_dir, wrms_results, true_pos_results, selected_error, obs_array, relative_error, full_obs + + +def load_and_plot(save_plots=True, data_dir=None, *args, **kwargs): + try: + data_dir, wrms_results, true_pos_results, selected_error, obs_array, relative_error, full_obs = load_data(data_dir) + except IOError: + return None + + if save_plots is False: + data_dir = None + hf = plot_results(wrms_results, true_pos_results, selected_error, obs_array, data_dir=data_dir, relative_error=relative_error, *args, **kwargs) + plt.show() + return hf + +def get_selection(): + _dd, wrms, true_pos, sel_err, obs, rel_err, full_obs = load_data() + print "The following models were found:" + for i, obs_item in enumerate(obs): + print "{0}: {1}".format(i, obs_item['name']) + dex = input('Select desired models by index as an array: ') + wrms, true_pos, sel_err = wrms[dex, :, :], true_pos[dex, :, :], sel_err[dex, :, :] + if rel_err is not None: + rel_err = rel_err[dex, :, :] + obs = [obs[i] for i in dex] + # finished = input('Finished? (0/1)') + return wrms, true_pos, sel_err, obs, rel_err + + +def load_multiple(*args, **kwargs): + wrms, true_pos, sel_err, obs, rel_err = get_selection() + app = lambda a, b: np.append(a, b, axis=0) + while True: + try: + wrms2, true_pos2, sel_err2, obs2, rel_err2 = get_selection() + except IOError: + break + wrms, true_pos, sel_err = app(wrms, wrms2), app(true_pos, true_pos2), app(sel_err, sel_err2) + if rel_err is not None: rel_err = app(rel_err, rel_err2) + obs.extend(obs2) + hf = plot_results(wrms, true_pos, sel_err, obs, relative_error=rel_err, *args, **kwargs) + plt.show() + if 'data_dir' in kwargs: + save_data(kwargs['data_dir'], wrms, true_pos, sel_err, obs, rel_err) + print 'Data saved to {0}'.format(kwargs['data_dir']) + return hf, wrms, true_pos, sel_err, obs, rel_err + + +def build_ordinal_wrms(max_y = 0.8, data_dir = None, make_plots = True, *args, **kwargs): + if data_dir is None: + data_dir = get_data_dir() + run_parameters = _load_params(data_dir+'params.yaml') + wave_data = _load_file(data_dir + 'wave_data.pkl') + full_obs = _load_file(data_dir + 'full_obs.pkl') + random_wave = test_data.MultiWave(**run_parameters['wave_params']) + + if full_obs is None: + raise IOError('full_obs.pkl not found, cannot reconstruct without full observation history') + + log_hyp = np.log(run_parameters['hyperparameters']) + n_learners, n_queries = len(run_parameters['learners']), run_parameters['statrun_params']['n_queries'] + n_trials = len(full_obs[run_parameters['learners'][0]['name']]) + x_plot = np.linspace(0.0, 1.0, run_parameters['statrun_params']['n_xtest'], dtype='float') + x_test = np.atleast_2d(x_plot).T + + wkld = np.zeros((n_learners, n_queries+1, n_trials), dtype='float') + max_count = np.zeros((n_learners, n_queries+1, n_trials), dtype='float') + + learners, names = [], [] + for l in run_parameters['learners']: + if 'n_rel_samples' in l['obs_args']: + l['obs_args']['n_rel_samples'] = run_parameters['statrun_params']['n_rel_samples'] + learners.append(active_learners.Learner(**l)) + names.append(l['name']) + + for trial_number in range(n_trials): + print 'Trial {0}'.format(trial_number) + a, f, o, d = wave_data.amplitude[trial_number], wave_data.frequency[trial_number], wave_data.offset[trial_number], wave_data.damping[trial_number] + random_wave.set_values(a, f, o, d) + random_wave.print_values() + abs_obs_fun = GPpref.AbsObservationSampler(random_wave.out, run_parameters['GP_params']['abs_likelihood'], + run_parameters['abs_obs_params']) + + min_max_label = np.floor(max_y * abs_obs_fun.l.y_list[-1]) + + p_y_true = abs_obs_fun.observation_likelihood_array(x_test) + true_y = abs_obs_fun.l.y_list[p_y_true.argmax(axis=0)] # True maximum likelihood labels + max_y_true = (true_y >= min_max_label) # True best label indexes (bool array) + n_max = float(max_y_true.sum()) + + for obs_num in range(0, n_queries+1): + for nl, learner in enumerate(learners): + # Get initial observations and build models + if obs_num == 0: + obs0 = full_obs[learner.name][trial_number][0] + x_rel, uvi_rel, x_abs, y_rel, y_abs = obs0.x_rel, obs0.uvi_rel, obs0.x_abs, obs0.y_rel, obs0.y_abs + model_kwargs = {'x_rel': x_rel, 'uvi_rel': uvi_rel, 'x_abs': x_abs, 'y_rel': y_rel, 'y_abs': y_abs, + 'rel_kwargs': run_parameters['rel_obs_params'], + 'abs_kwargs': run_parameters['abs_obs_params']} + model_kwargs.update(run_parameters['GP_params']) + learner.build_model(model_kwargs) + learner.model.set_hyperparameters(log_hyp) + + else: + next_x, next_y, next_uvi = full_obs[learner.name][trial_number][obs_num] + learner.model.add_observations(next_x, next_y, next_uvi) + + learner.model.solve_laplace() + fhat, vhat = learner.model.predict_latent(x_test) + p_y_est, mu_est = learner.model.abs_likelihood.posterior_likelihood(fhat, vhat) + est_y = abs_obs_fun.l.y_list[p_y_est.argmax(axis=0)] + max_y_est = (est_y >= min_max_label) + + wkld[nl, obs_num, trial_number] = test_data.ordinal_kld(p_y_true, p_y_est, np.maximum(true_y, est_y)) + max_count[nl, obs_num, trial_number] = np.logical_and(max_y_true, max_y_est).sum() / n_max + + + _save_file(data_dir+'wkld.pkl', wkld) + _save_file(data_dir+'max_count.pkl', max_count) + if make_plots: + plot_ordinal_results(wkld, max_count, run_parameters=run_parameters, data_dir=data_dir) + +def load_ordinal_results(data_dir = None): + if data_dir is None: + data_dir = get_data_dir() + + wkld = _load_file(data_dir+'wkld.pkl') + max_count = _load_file(data_dir+'max_count.pkl') + + plot_ordinal_results(wkld, max_count, data_dir=data_dir) + +def plot_ordinal_results(wkld, max_count, run_parameters = None, data_dir=None, bars=True, exclusions=[]): + if run_parameters is None: + with open(data_dir + 'params.yaml', 'rt') as fh: + run_parameters = yaml.safe_load(fh) + + names = [] + for l in run_parameters['learners']: + names.append(l['name']) + + methods_indexes = [] + for i in range(len(names)): + if i not in exclusions: + methods_indexes.append(i) + methods_indexes = np.array(methods_indexes) + + wkld=wkld[methods_indexes,:,:] + max_count=max_count[methods_indexes,:,:] + + f0, ax0 = single_plot(wkld, names=names, ylabel='Weighted KLD', bars=bars, percentile=-1) + f1, ax1 = single_plot(max_count, names=names, ylabel='Proportion of selected max points', bars=True, precut=1, percentile=-1) + f = [f0, f1] + ax = [ax0, ax1] + + if data_dir is not None: + f0.savefig(data_dir + 'wkld.pdf', bbox_inches='tight') + f1.savefig(data_dir + 'max_count.pdf', bbox_inches='tight') + plt.show() + return f + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Statrun plots for active learning preference GP') + parser.add_argument('-np', '--no-plots', dest='make_plots', action='store_false', + help='Turn off plots (default False)') + parser.add_argument('-ow', '--ordinal-wrms', dest='owrms', action='store_true', + help='Build ordinal wrms results (default False)') + parser.add_argument('-lm', '--load-multiple', dest='load_multiple', action='store_true', + help='Load from multiple directories (default False)') + parser.add_argument('-d', '--data-dir', default=None, help='Data directory') + args = parser.parse_args() + + if args.owrms: + build_ordinal_wrms(data_dir=args.data_dir, make_plots=args.make_plots) + if args.load_multiple: + load_multiple(save_plots=args.make_plots) + else: + load_and_plot(save_plots=args.make_plots, bars=True) \ No newline at end of file diff --git a/plot_tools.py b/plot_tools.py new file mode 100644 index 0000000..faebfee --- /dev/null +++ b/plot_tools.py @@ -0,0 +1,301 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.patches import Polygon +import matplotlib.cm as cm +import matplotlib.colors +from nice_plot_colors import * +from cycler import cycler +from mpl_toolkits.mplot3d.art3d import Line3DCollection +# plt.rc('axes', prop_cycle=(cycler('color', [greyify(c, .5, .8) for c in reversed(lines)]))) +plt.rc('axes', prop_cycle=(cycler('color', lines))) + + +def xgen(xp, n_dim): + for i in range(n_dim): + yield xp + + +def make_meshlist(x_plot, d_x): + xx = xgen(x_plot, d_x) + x_mesh = np.vstack(np.meshgrid(*xx, copy=False)).reshape(d_x, -1).T + return x_mesh + + +def make_poly_array(x,y,sigma): + nx = len(x) + sigma = np.atleast_2d(sigma) + xy = np.zeros((2*nx, 2)) + xy[:,0] = np.append(x, x[::-1]) + xy[:,1] = np.append(y-sigma, y[::-1]+sigma[::-1]) + return xy + + +def plot_with_bounds(ax, x, y, s, c=lines[0], lw=1.5, *args, **kwargs): + isort = np.argsort(x.flat) + xx, yy = x[isort], y[isort] + try: + ss = s[isort] + except: + ss = s + xy = make_poly_array(xx, yy, ss) + h_patch = Polygon(xy, ec=c, fc=lighten(c, 3), alpha=0.5) + h_fx, = ax.plot(xx, yy, lw=lw, c=c, *args, **kwargs) + ax.add_patch(h_patch) + clim = ax.get_ylim() + ax.set_ylim(bottom = min(clim[0],xy[:,1].min()), top = max(clim[1], xy[:,1].max())) + return h_fx, h_patch + + +def _set_subplot_labels(ax, xlabel='$x$', ylabel='$f(x)$', title=''): + ax.set_title(title) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + + +def plot_setup_1drel(t_l = r'Latent function, $f(x)$', t_r = r'Relative likelihood, $P(x_0 \succ x_1 | f(x_0), f(x_1))$', ax=None, **kwargs): + if ax is None: + fig, (ax_l, ax_r) = plt.subplots(1, 2, **kwargs) + else: + (ax_l, ax_r) = ax + fig = ax_l.figure + fig.set_size_inches(8.7, 3.2) + _set_subplot_labels(ax_l, title=t_l) # Latent function + _set_subplot_labels(ax_r, xlabel='$x_0$', ylabel='$x_1$', title=t_r) # Relative likelihood + return fig, (ax_l, ax_r) + + +def plot_setup_1d(t_l = r'Latent function, $f(x)$', t_a = r'Absolute likelihood, $p(y | f(x))$', + t_r = r'Relative likelihood, $P(x_0 \succ x_1 | f(x_0), f(x_1))$', ax = None, **kwargs): + + if ax is None: + fig = plt.figure() + ax_l = fig.add_subplot(131, **kwargs) + ax_a = fig.add_subplot(132, **kwargs) + ax_r = fig.add_subplot(133, **kwargs) + fig.set_size_inches(14.7, 3.5) + else: + (ax_l, ax_a, ax_r) = ax + fig = ax_l.figure + + _set_subplot_labels(ax_l, title=t_l) # Latent function + _set_subplot_labels(ax_a, ylabel='$y$', title=t_a) # Absolute likelihood + _set_subplot_labels(ax_r, xlabel='$x_0$', ylabel='$x_1$', title=t_r) # Relative likelihood + + return fig, (ax_l, ax_a, ax_r) + + +def plot_relative_queries(ax, uvr_train, fuvr_train, yr_train, class_icons, marker_options=None): + for uv, fuv, y in zip(uvr_train, fuvr_train, yr_train): + ax.plot(uv, fuv, 'b-', color=lighten(lines[0]), lw=0.8) + ax.plot(uv[(y + 1) / 2], fuv[(y + 1) / 2], class_icons[(y[0] + 1) / 2], **marker_options) + + +def plot_absolute_likelihood(ax, p_a_y, xt, mu_t, y_samples, E_y=None, xa_train=None, ya_train=None): + dely = y_samples[1, 0] - y_samples[0, 0] + abs_extent = [xt[0, 0], xt[-1, 0], y_samples[0, 0] - 0.5*dely, y_samples[-1, 0] + 0.5*dely] + vmax = max(1.0, p_a_y.max()) + h_pat = ax.imshow(p_a_y, origin='lower', extent=abs_extent, aspect='auto', vmin=0.0, vmax=vmax) + if xa_train is not None and xa_train.shape[0] > 0: + ax.plot(xa_train, ya_train, 'w+') + h_lines = ax.plot(xt, mu_t, c=lines[0]) + legend_entries = [r'True mean, $E[y]$'] + if E_y is not None: + h_lines.extend(ax.plot(xt, E_y, color=lines[3])) + legend_entries.append(r'Posterior mean, $E_{p(y|\mathcal{Y})}\left[y\right]$') + ax.legend(h_lines, legend_entries) + ax.set_xlim(xt[0], xt[-1]) + ax.get_figure().colorbar(h_pat, ax=ax) + return (h_pat, h_lines) + +def plot_relative_likelihood(ax, p_y, extent, uvr_train=None, yr_train=None, class_icons=None, marker_options=None): + h_p = ax.imshow(p_y, origin='lower', extent=extent, vmin=0.0, vmax=1.0) + h_pc = ax.contour(p_y, levels=[0.5], origin='lower', linewidths=2, extent=extent) + plt.clabel(h_pc, inline=1, fontsize=10) + ax.get_figure().colorbar(h_p, ax=ax) + + if uvr_train is not None: # and xt.shape[0] > 0: + for uv, y in zip(uvr_train, yr_train): + ax.plot(uv[0], uv[1], class_icons[(y[0] + 1) / 2], **marker_options) + return (h_p, h_pc) + +def true_plots(xt, ft, mu_t, rel_sigma, y_samples, p_a_y, p_r_y=None, xa_train=None, ya_train=None, uvr_train=None, fuvr_train=None, yr_train=None, + class_icons=['ko', 'wo'], marker_options={'mec':'k', 'mew':0.5}, *args, **kwargs): + if p_r_y is None: + return true_plots2D(xt, ft, mu_t, rel_sigma, y_samples, p_a_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train, + class_icons, marker_options, *args, **kwargs) + + # Plot true function, likelihoods and observations + fig, (ax_l, ax_a, ax_r) = plot_setup_1d(**kwargs) + + # True latent + plot_with_bounds(ax_l, xt, ft, rel_sigma, c=lines[0]) + if uvr_train is not None: + plot_relative_queries(ax_l, uvr_train, fuvr_train, yr_train, class_icons, marker_options) + + # True absolute likelihood + h_pat = plot_absolute_likelihood(ax_a, p_a_y, xt, mu_t, y_samples, xa_train=xa_train, ya_train=ya_train) + + # True relative likelihood + rel_y_extent = [xt[0, 0], xt[-1, 0], xt[0, 0], xt[-1, 0]] + h_prt = plot_relative_likelihood(ax_r, p_r_y, rel_y_extent, uvr_train, yr_train, class_icons, marker_options) + return fig, (ax_l, ax_a, ax_r) + + +def estimate_plots(xt, ft, mu_t, fhat, vhat, E_y, rel_sigma, + y_samples, p_a_y, p_r_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train, + class_icons = ['ko', 'wo'], marker_options = {'mec':'k', 'mew':0.5}, n_posterior_samples=0, + posterior_plot_kwargs={}, **kwargs): + if p_r_y is None: + return estimate_plots2D(xt, ft, mu_t, fhat, vhat, E_y, rel_sigma, + y_samples, p_a_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train, + class_icons, marker_options, **kwargs) + + fig, (ax_l, ax_a, ax_r) = plot_setup_1d(**kwargs) + + # Posterior samples + if n_posterior_samples > 0: + f_post = np.random.multivariate_normal(fhat.flatten(), vhat, n_posterior_samples) + h_pp = ax_l.plot(xt, f_post.T, lw=0.8, **posterior_plot_kwargs) + else: + h_pp = None + + # Latent function + hf, hpf = plot_with_bounds(ax_l, xt, ft, rel_sigma, c=lines[0]) + hf_hat, hpf_hat = plot_with_bounds(ax_l, xt, fhat, np.sqrt(np.atleast_2d(vhat.diagonal()).T), c=lines[1]) + if uvr_train is not None: + plot_relative_queries(ax_l, uvr_train, fuvr_train, yr_train, class_icons, marker_options) + + ax_l.legend([hf, hf_hat], [r'True latent function, $f(x)$', r'$\mathcal{GP}$ estimate $\hat{f}(x)$']) + + # Absolute posterior likelihood + h_abs = plot_absolute_likelihood(ax_a, p_a_y, xt, mu_t, y_samples, E_y=E_y, xa_train=xa_train, ya_train=ya_train) + + # Relative posterior likelihood + rel_y_extent = [xt[0, 0], xt[-1, 0], xt[0, 0], xt[-1, 0]] + h_rel = plot_relative_likelihood(ax_r, p_r_y, rel_y_extent, uvr_train, yr_train, class_icons, marker_options) + + return fig, (ax_l, ax_a, ax_r) + + +# 2D Plot tools + +def plot_setup_2d(t_l = r'Latent function, $f(x)$', t_a = r'Absolute likelihood, $p(y | f(x))$', t_r = None, ax = None, **kwargs): + + if ax is None: + fig = plt.figure() + ax_l = fig.add_subplot(121, projection='3d', **kwargs) + ax_a = fig.add_subplot(122, projection='3d', **kwargs) + fig.set_size_inches(10.0, 3.5) + else: + (ax_l, ax_a) = ax + fig = ax_l.figure + + # Latent function + ax_l.set_title(t_l) + ax_l.set_xlabel('$x_0$') + ax_l.set_ylabel('$x_1$') + ax_l.set_zlabel('$f(x)$') + + # Absolute likelihood + ax_a.set_title(t_a) + ax_a.set_xlabel('$x_0$') + ax_a.set_ylabel('$x_1$') + ax_a.set_zlabel('$y$') + return fig, (ax_l, ax_a) + +def true_plots2D(xt, ft, mu_t, rel_sigma, y_samples, p_a_y, xa_train=None, ya_train=None, uvr_train=None, fuvr_train=None, yr_train=None, + class_icons=['ko', 'wo'], marker_options={'mec':'k', 'mew':0.5}, *args, **kwargs): + + # Plot true function, likelihoods and observations + nx = int(np.sqrt(xt.shape[0])) + x0 = xt[0:nx, 0] # Assuming generated using make_meshlist + x1 = xt[0:-1:nx, 1] + xx, yy = np.meshgrid(x0, x1) + + fig, (ax_l, ax_a) = plot_setup_2d(**kwargs) + + # True latent + ax_l.plot_surface(xx, yy, np.reshape(ft, (nx, nx)), color=lines[0]) + # ax_l.imshow(np.reshape(ft, (nx, nx)), extent=[x0[0], x1[-1], x1[0], x1[1]], origin='lower', aspect='auto') + + # True absolute likelihood + h_yt = ax_a.plot_wireframe(xx, yy, np.reshape(mu_t, (nx, nx)), color=lines[1]) + + norm_py = p_a_y/p_a_y.max() + cc = cm.get_cmap('Blues') + h_py = [] + for y, py in zip(y_samples, norm_py): + h_py.append(ax_a.scatter(xt[:, 0], xt[:, 1], y, s=py*15.0, marker='o', c=cc(py))) + + if xa_train is not None and xa_train.shape[0] > 0: + ax_a.scatter(xa_train[:, 0], xa_train[:, 1], ya_train, c='w', marker='+') + ax_a.legend([h_yt], [r'True mean $E[y]$']) + # ax_a.set_xlim(xt[0], xt[-1]) + + return fig, (ax_l, ax_a) + + +def estimate_plots2D(xt, ft, mu_t, fhat, vhat, E_y, rel_sigma, + y_samples, p_a_y, xa_train, ya_train, uvr_train, fuvr_train, yr_train, + class_icons = ['ko', 'wo'], marker_options = {'mec':'k', 'mew':0.5}, *args, **kwargs): + + # Plot estimated function, likelihoods and observations + nx = int(np.sqrt(xt.shape[0])) + x0 = xt[0:nx, 0] # Assuming generated using make_meshlist + x1 = xt[0:-1:nx, 1] + xx, yy = np.meshgrid(x0, x1) + + fig, (ax_l, ax_a) = plot_setup_2d(**kwargs) + cc = cm.get_cmap('inferno') + + # Latent function estimate + # hf =ax_l.plot_wireframe(xx, yy, np.reshape(ft, (nx, nx)), color=lines[0]) + tfc = np.reshape(vhat.diagonal(), (nx, nx)) + norm = matplotlib.colors.Normalize(vmin=0, vmax=tfc.max()) + hf_hat = ax_l.plot_surface(xx, yy, np.reshape(fhat, (nx, nx)), facecolors=cc(norm(tfc))) + m = cm.ScalarMappable(cmap=cc, norm=norm) + m.set_array([]) + # ax_cb = fig.colorbar(m, ax=ax_l) + + # ax_l.imshow(np.reshape(fhat, (nx, nx)), extent=[x0[0], x1[-1], x1[0], x1[1]], origin='lower', aspect='auto') + # ax_l.legend([hf], [r'True latent function, $f(x)$']) #, r'$\mathcal{GP}$ estimate $\hat{f}(x)$']) + + if uvr_train.shape[0] > 0: + rel_segments = np.zeros((uvr_train.shape[0], 2, 3)) + rel_segments[:, :, 0:2] = uvr_train + rel_segments[:, :, 2] = fuvr_train + rel_lines = Line3DCollection(rel_segments, color=lines[3]) + ax_l.add_collection(rel_lines) + + rel_hipoints = np.array([uv[(i+1)/2] for uv, i in zip(rel_segments, yr_train.flat)]) + ax_l.plot(rel_hipoints[:, 0], rel_hipoints[:, 1], rel_hipoints[:, 2], 'ko', **marker_options) + + # Absolute posterior likelihood + # h_yt = ax_a.plot_wireframe(xx, yy, np.reshape(mu_t, (nx, nx)), color=lines[1]) + hEy = ax_a.plot_wireframe(xx, yy, np.reshape(E_y, (nx, nx)), color=lines[3]) + + cc = cm.get_cmap('Blues') + norm_py = p_a_y/p_a_y.max() + h_points = [] + for y, py in zip(y_samples, norm_py): + h_points.append(ax_a.scatter(xt[:, 0], xt[:, 1], y, s=py*15.0, marker='o', c=cc(py))) + + if xa_train.shape[0] > 0: + ax_a.plot(xa_train[:,0], xa_train[:,1], ya_train.flat, 'r^', color=lines[1]) + ax_a.legend([hEy], [r'Posterior mean, $E_{p(y|\mathcal{Y})}\left[y\right]$']) + # fig.colorbar(h_pap, ax=ax_a) + + return fig, (ax_l, ax_a) + + +def reset_axes2d(est_ax): + for ax in est_ax: + ax.cla() + ax.set_xlim([0.0, 1.0]) + ax.set_ylim([0.0, 1.0]) + +def ensure_dir(file_path): + directory = os.path.dirname(file_path) + if not os.path.exists(directory): + os.makedirs(directory) \ No newline at end of file diff --git a/predictive_entropy_search.py b/predictive_entropy_search.py new file mode 100644 index 0000000..e69de29 diff --git a/pref_active_learning.py b/pref_active_learning.py new file mode 100644 index 0000000..441373e --- /dev/null +++ b/pref_active_learning.py @@ -0,0 +1,204 @@ +# Simple 1D GP classification example +import time +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import GPpref +import plot_tools as ptt +import active_learners +import test_data +import yaml +from matplotlib.backends.backend_pdf import PdfPages + +nowstr = time.strftime("%Y_%m_%d-%H_%M") +plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']}) +plt.rc('text', usetex=True) + +save_plots = True +plot_type = 'video_frames' # 'pdf' +inter_frames = 30 + +with open('./data/shortrun_2D.yaml', 'rt') as fh: + wave = yaml.safe_load(fh) + +try: + np.random.seed(wave['statrun_params']['randseed']) + n_rel_train, n_abs_train = wave['statrun_params']['n_rel_train'], wave['statrun_params']['n_abs_train'] + n_queries = wave['statrun_params']['n_queries'] +except KeyError: + np.random.seed(0) + n_rel_train = 1 + n_abs_train = 0 + n_queries = 20 + +n_xplot = 31 +keep_f = True +learner_index = -2 + +log_hyp = np.log(wave['hyperparameters']) + +# Define polynomial function to be modelled +d_x = wave['GP_params']['hyper_counts'][0]-1 +random_wave = test_data.MultiWave(n_dimensions=d_x, **wave['wave_params']) +true_function = random_wave.out +random_wave.print_values() + +if save_plots: + nowstr = time.strftime("%Y_%m_%d-%H_%M") + fig_dir = 'fig/' + nowstr + '/' + ptt.ensure_dir(fig_dir) + print "Figures will be saved to: {0}".format(fig_dir) + if plot_type == 'pdf': + pdf_pages = PdfPages(fig_dir+'posterior_all.pdf') + + +rel_obs_fun = GPpref.RelObservationSampler(true_function, wave['GP_params']['rel_likelihood'], wave['rel_obs_params']) +abs_obs_fun = GPpref.AbsObservationSampler(true_function, wave['GP_params']['abs_likelihood'], wave['abs_obs_params']) +rel_sigma = wave['rel_obs_params']['sigma'] + +# True function +x_plot = np.linspace(0.0,1.0,n_xplot,dtype='float') +x_test = ptt.make_meshlist(x_plot, d_x) +f_true = abs_obs_fun.f(x_test) +mu_true = abs_obs_fun.mean_link(x_test) +abs_y_samples = abs_obs_fun.l.y_list +p_abs_y_true = abs_obs_fun.observation_likelihood_array(x_test, abs_y_samples) +if d_x is 1: + p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test) +else: + p_rel_y_true = None + +# Training data - note the shifted domain to get the sample out of the way +far_domain = np.tile(np.array([[-3.0], [-2.0]]), d_x) +x_rel, uvi_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(n_rel_train, n_xdim=d_x, domain=far_domain) +x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(n_abs_train, n_xdim=d_x, domain=far_domain) + + +# Plot true functions +plot_kwargs = {'xlim':[0.0, 1.0], 'ylim':[0.0, 1.0]} +if plot_type != 'pdf': + if p_rel_y_true is None: + ax_t = [[], []] + fig_t = plt.figure() + fig_t.set_size_inches([11.44, 8.02]) + ax_t[0].append(fig_t.add_subplot(221, projection='3d', **plot_kwargs)) + ax_t[0].append(fig_t.add_subplot(222, projection='3d', **plot_kwargs)) + ax_t[1].append(fig_t.add_subplot(223, projection='3d', **plot_kwargs)) + ax_t[1].append(fig_t.add_subplot(224, projection='3d', **plot_kwargs)) + else: + fig_t, ax_t = plt.subplots(2,3) + true_ax = ax_t[0] + est_ax = ax_t[1] +else: + true_ax = None + est_ax = None + +fig_t, ax_t = ptt.true_plots(x_test, f_true, mu_true, rel_sigma, + abs_y_samples, p_abs_y_true, p_rel_y_true, + x_abs, y_abs, x_rel[uvi_rel], fuv_rel, y_rel, + t_l=r'True latent function, $f(x)$', ax=true_ax, **plot_kwargs) +if save_plots and (plot_type is 'pdf'): + fig_t.savefig(fig_dir+'true.pdf', bbox_inches='tight') + +# Construct active learner object + +# Construct GP object +prefGP = GPpref.PreferenceGaussianProcess(x_rel, uvi_rel, x_abs, y_rel, y_abs, **wave['GP_params']) + +model_kwargs = {'x_rel':x_rel, 'uvi_rel':uvi_rel, 'x_abs':x_abs, 'y_rel':y_rel, 'y_abs':y_abs, + 'rel_kwargs': wave['rel_obs_params'], 'abs_kwargs': wave['abs_obs_params']} +model_kwargs.update(wave['GP_params']) +learner_kwargs = wave['learners'][learner_index] +learner = active_learners.Learner(**learner_kwargs) +learner.build_model(model_kwargs) + +# Get initial solution +learner.model.set_hyperparameters(log_hyp) +f = learner.model.solve_laplace() +learner.model.print_hyperparameters() + +fig_p, ax_p, pre_data = learner.model.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, abs_y_samples, ax=est_ax, **plot_kwargs) +if save_plots: + wave['learners'] = [learner_kwargs] + with open(fig_dir+'params.yaml', 'wt') as fh: + yaml.safe_dump(wave, fh) + if plot_type is 'pdf': + pdf_pages.savefig(fig_p, bbox_inches='tight') + else: + fig_p.savefig(fig_dir+'posterior000.png', bbox_inches='tight') +print learner_kwargs + +for obs_num in range(n_queries): + if learner.update_p_rel: + linear_p_rel = max(0.0, (n_queries - obs_num) / float(n_queries)) + learner.obs_arguments['p_rel'] = linear_p_rel + # if 'p_rel' in obs_arguments: + # obs_arguments['p_rel'] = max(0.0, (n_queries - obs_num) / float(n_queries)) + t0 = time.time() + next_x = learner.select_observation() + if next_x.shape[0] == 1: + next_y, next_f = abs_obs_fun.generate_observations(next_x) + learner.model.add_observations(next_x, next_y, keep_f=keep_f) + print '{n:02} - Abs: x:{0}, y:{1}'.format(next_x[0], next_y[0], n=obs_num+1), + else: + next_y, next_uvi, next_fx = rel_obs_fun.gaussian_multi_pairwise_sampler(next_x) + next_fuv = next_fx[next_uvi][:, :, 0] + fuv_rel = np.concatenate((fuv_rel, next_fuv), 0) + learner.model.add_observations(next_x, next_y, next_uvi, keep_f=keep_f) + print '{n:02} - Rel: x:{0}, best_index:{1}'.format(next_x.flatten(), next_uvi[0, 1], n=obs_num+1), + print 't = {0:0.3f}s'.format(time.time()-t0) + f = learner.model.solve_laplace() + + if save_plots: + ptt.reset_axes2d(est_ax) + fig_p, ax_p, post_data = learner.model.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, + abs_y_samples, ax=est_ax, **plot_kwargs) + if plot_type is 'pdf': + pdf_pages.savefig(fig_p, bbox_inches='tight') + else: + fig_p.savefig(fig_dir + 'posterior{0:03d}.png'.format((obs_num+1)*inter_frames), bbox_inches='tight') + for iframe in range(1, inter_frames): + scale = float(iframe)/inter_frames + if pre_data[-1] is None: + pre_data = pre_data[:-1] + inter_data = [(1-scale)*pre + scale*post for pre, post in zip(pre_data, post_data)] + if len(inter_data) < len(post_data): + inter_data.append(None) + fig_p, ax_p = learner.model.update_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, + abs_y_samples, *inter_data, ax_p=est_ax) + fig_p.savefig(fig_dir + 'posterior{0:03d}.png'.format(obs_num*inter_frames + iframe), bbox_inches='tight') + pre_data = post_data + plt.close(fig_p) + # fig_p.savefig(fig_dir+'posterior{0:02d}.pdf'.format(obs_num+1), bbox_inches='tight') + + +learner.model.print_hyperparameters() + +if save_plots and plot_type is 'pdf': + pdf_pages.close() +else: + fig_post, ax_p, h_plotobj = learner.model.create_posterior_plot(x_test, f_true, mu_true, rel_sigma, fuv_rel, abs_y_samples, **plot_kwargs) + +plt.show(block=False) +print "Finished!" + +## SCRAP +# learner = active_learners.UCBAbsRelD(**active_args) +# # obs_arguments = {'req_improvement': 0.60, 'n_test': 50, 'gamma': 2.0, 'n_rel_samples': 5, 'p_thresh': 0.7} +# obs_arguments = {'n_test': 100, 'p_rel': 0.5, 'n_rel_samples': 5, 'gamma': 2.0} + +# learner = active_learners.ExpectedImprovementRel(**GP_kwargs) +# obs_arguments = {'n_test': 100, 'zeta': 0.1, 'p_rel':1.0} + +# learner = active_learners.SampledClassification(verbose=verbose, **GP_kwargs) +# obs_arguments = {'n_test':50, 'n_samples':20, 'y_threshold':0.7, 'p_pref_tol':1e-3, 'n_mc_abs':50} + +# learner = active_learners.DetSelect(**GP_kwargs) +# obs_arguments = {'n_test': 100, 'n_rel_samples': 5, 'gamma': 2.0, 'tau': 0.5} + +# learner = active_learners.ActiveLearner(**GP_kwargs) +# obs_arguments = {'p_rel':0.0, 'n_rel_samples': 5} + +# learner = active_learners.MaxVar(**GP_kwargs) +# obs_arguments = {'n_rel_samples': 5, 'p_rel': -10.0, 'rel_tau': 0.1, 'abs_tau': 0.1} diff --git a/read_wine_data.py b/read_wine_data.py new file mode 100644 index 0000000..f79d409 --- /dev/null +++ b/read_wine_data.py @@ -0,0 +1,76 @@ +import numpy as np +import matplotlib.pyplot as plt +import yaml +import GPpref +import scipy.optimize as op +from utils.wine_data import WineQualityData + + +optimise_hyper = False +use_normalised = True +R_limit = 0.1 +wine_type = 'white' + +# Get wine data: +input_data = WineQualityData(wine_type=wine_type) + +fh, ah = plt.subplots(1, 1) +ah.hist(input_data.data.quality, np.arange(-0.5, 11, 1.0)) +ah.set_xticks(np.arange(11)) +ah.set_xlabel('Score') +ah.set_ylabel('Count') +ah.set_title('{0} wine'.format(wine_type)) + +with open('./data/statruns_wine.yaml', 'rt') as fh: + config = yaml.safe_load(fh) +d_x = config['GP_params']['hyper_counts'][0]-1 +log_hyp = np.log(config['hyperparameters']) + +wine_norm = (input_data.data.values - input_data.data.values.min(axis=0)) / (input_data.data.values.max(axis=0) - input_data.data.values.min(axis=0)) + +if use_normalised: + wine_data = wine_norm +else: + wine_data = input_data.data.values +fh2, ah2 = plt.subplots(3, 4) +for i, aa in enumerate(ah2.flat): + aa.cla() + aa.scatter(wine_data[:,i], wine_data[:,-1], 2, wine_data[:,0]) + aa.set_title('{0}, {1}'.format(input_data.data.keys()[i], config['hyperparameters'][i])) + +fh3, ah3 = plt.subplots() +C = np.corrcoef(wine_norm, rowvar=False) +hmat = ah3.matshow(np.abs(C)) + +plt.show() +for R, var_name in zip(C[0:-1,-1], input_data.data.keys()): + print('{0}: R = {1:0.2f}'.format(var_name, R)) +corr_variables = (abs(C[-1]) >= R_limit) + +# Subsample wine data down to only correlated variables +# wine_data = wine_data[:, corr_variables] + +if optimise_hyper: + n_rel = 1 + n_abs = 300 + x_rel = wine_data[0:2*n_rel, 0:-1] + uvi_rel = np.array([[0, 1]]) + fuv_rel = wine_data[0:2*n_rel, -1] + y_rel = np.array([1]) + + x_abs = wine_data[2*n_rel:(2*n_rel+n_abs), 0:-1] + y_abs = np.expand_dims(input_data.data.values[2 * n_rel:(2 * n_rel + n_abs), -1] - 2, axis=1).astype(dtype='int') + model_kwargs = {'x_rel': x_rel, 'uvi_rel': uvi_rel, 'x_abs': x_abs, 'y_rel': y_rel, 'y_abs': y_abs, + 'rel_kwargs': config['rel_obs_params'], 'abs_kwargs': config['abs_obs_params']} + model_kwargs.update(config['GP_params']) + model_kwargs['verbose'] = 2 + prefGP = GPpref.PreferenceGaussianProcess(**model_kwargs) + + prefGP.set_hyperparameters(log_hyp) + f = prefGP.calc_laplace(log_hyp) + print np.exp(log_hyp) + log_hyp_opt = op.fmin(prefGP.calc_nlml,log_hyp) + print np.exp(log_hyp_opt) + config['hyperparameters'] = np.exp(log_hyp_opt).tolist() + with open('data/wine_quality/learned_{0}_params.yaml'.format(wine_type), 'w') as fh: + yaml.safe_dump(config, fh) diff --git a/test_data.py b/test_data.py new file mode 100644 index 0000000..205ab70 --- /dev/null +++ b/test_data.py @@ -0,0 +1,216 @@ +import numpy as np +import pickle + + +def wrms(y_true, y_est, weight=True): + # RMS weighted by the true value of y (high value high importance) + if weight: + w = y_true + else: + w = 1.0 + return np.sqrt(np.mean(((y_true - y_est)*w)**2)) + + +def wrms_misclass(y_true, y_est, w_power=2): + # This is the misclassification error, where the weight is the max of the true or predicted value (penalise + # predicting high values if the true value is low) + w = np.power(np.maximum(y_true, y_est), w_power) + return np.sqrt(np.mean(((y_true - y_est)*w)**2)) + +def rel_error(y_true, prel_true, y_est, prel_est, weight=False): + if weight: + y_max = np.maximum(y_true.flatten(), y_est.flatten()) + else: + y_max = np.ones(y_true.shape[0], dtype='float') + nx = prel_true.shape[0] + mean_p_err = 0.0 + w_sum = 0.0 + for i in range(nx): + for j in range(i, nx): + w = max(y_max[i], y_max[j]) + w_sum += w + mean_p_err += w*np.abs(prel_true[i,j] - prel_est[i,j]) + return mean_p_err/w_sum + +def ordinal_kld(p_y_true, p_y_est, w = np.array([1.0])): + kld = -(p_y_true * np.log(p_y_est/p_y_true)).sum(axis=0) + wkld = w*kld/w.mean() + return wkld.mean() + +class ObsObject(object): + def __init__(self, x_rel, uvi_rel, x_abs, y_rel, y_abs): + self.x_rel, self.uvi_rel, self.x_abs, self.y_rel, self.y_abs = x_rel, uvi_rel, x_abs, y_rel, y_abs + + def get_obs(self): + return self.x_rel, self.uvi_rel, self.x_abs, self.y_rel, self.y_abs + +def obs_stats(obs_array, n_rel_samples): + for method in obs_array: + n_rel = 0.0 + n_abs = 0.0 + for ob in method['obs']: + n_rel += ob.x_rel.shape[0]/n_rel_samples + n_abs += ob.x_abs.shape[0] + print "{0}: p_rel = {1}, p_abs = {2}".format(method['name'], n_rel/(n_rel+n_abs), n_abs/(n_rel+n_abs)) + + +class VariableWave(object): + def __init__(self, amp_range, f_range, off_range, damp_range, n_components=1, n_dimensions=1): + self.amp_range = amp_range + self.f_range = f_range + self.off_range = off_range + self.damp_range = damp_range + self.n_components = n_components + self.n_dimensions = n_dimensions + self.randomize() + + def out(self, x): + y = self.amplitude*np.cos(self.frequency * np.pi * (x-self.offset)) * np.exp(-self.damping*(x-self.offset)**2) + return y + + def set_values(self, a, f, o, d): + self.amplitude = a + self.frequency = f + self.offset = o + self.damping = d + + def randomize(self, print_vals=False): + self.amplitude = np.random.uniform(low=self.amp_range[0], high=self.amp_range[1]/self.n_components) + self.frequency = np.random.uniform(low=self.f_range[0], high=self.f_range[1]) + self.offset = np.random.uniform(low=self.off_range[0], high=self.off_range[1]) + self.damping = np.random.uniform(low=self.damp_range[0], high=self.damp_range[1]) + if print_vals: + self.print_values() + + def print_values(self): + print "a: {0:.2f}, f: {1:.2f}, o: {2:.2f}, d: {3:.2f}".format(self.amplitude, self.frequency, self.offset, self.damping) + + def export_state(self): + return dict(amplitude=self.amplitude, frequency=self.frequency, offset=self.offset, damping=self.damping) + + +class MultiWave(VariableWave): + def out(self, x): + y = np.zeros(np.array(x).shape) + for a, f, o, d in zip(self.amplitude, self.frequency, self.offset, self.damping): + y += a*np.cos(f*np.pi*(x-o)) * np.exp(-d*(x-o)**2) + return y.sum(axis=1, keepdims=True) + + def randomize(self, print_vals=False): + self.amplitude = np.random.uniform(low=self.amp_range[0], high=self.amp_range[1], size=(self.n_components, self.n_dimensions)) + self.frequency = np.random.uniform(low=self.f_range[0], high=self.f_range[1], size=(self.n_components, self.n_dimensions)) + self.offset = np.random.uniform(low=self.off_range[0], high=self.off_range[1], size=(self.n_components, self.n_dimensions)) + self.damping = np.random.uniform(low=self.damp_range[0], high=self.damp_range[1], size=(self.n_components, self.n_dimensions)) + if print_vals: + self.print_values() + + def print_values(self): + print "a: {0}, f: {1}, o: {2}, d: {3}".format(self.amplitude, self.frequency, self.offset, self.damping) + + def get_values(self): + return self.amplitude, self.frequency, self.offset, self.damping + + +class DoubleMultiWave(object): + def __init__(self, amp_range, f_range, off_range, damp_range, n_components=2): + self.f_low = MultiWave(amp_range[0:2], f_range[0:2], off_range[0:2], damp_range[0:2], n_components=1) + self.f_high = MultiWave(amp_range[2:4], f_range[2:4], off_range[2:4], damp_range[2:4], n_components=n_components-1) + self.randomize() + + def out(self, x): + return self.f_low.out(x)+self.f_high.out(x) + + def randomize(self, print_vals=False): + self.f_low.randomize() + self.f_high.randomize() + if print_vals: + self.print_values() + + def print_values(self): + self.f_low.print_values() + self.f_high.print_values() + + +class WaveSaver(object): + def __init__(self, n_trials, n_components, n_dim=1): + self.amplitude = np.zeros((n_trials, n_components, n_dim), dtype='float') + self.frequency = np.zeros((n_trials, n_components, n_dim), dtype='float') + self.offset = np.zeros((n_trials, n_components, n_dim), dtype='float') + self.damping = np.zeros((n_trials, n_components, n_dim), dtype='float') + self.n = 0 + + def set_vals(self, n, a, f, o, d): + self.n = n + self.amplitude[n] = a + self.frequency[n] = f + self.offset[n] = o + self.damping[n] = d + + def save(self, filename): + with open(filename, 'wb') as fh: + pickle.dump(self, fh) + + def get_vals(self, n): + return self.amplitude[n], self.frequency[n], self.offset[n], self.damping[n] + +def damped_wave(x): + y = np.cos(6 * np.pi * (x - 0.5)) * np.exp(-10 * (x - 0.5) ** 2) + return y + +def multi_peak(x): + y = np.cos(6 * np.pi * (x - 0.5)) + return y + +def basic_sine(x): + y = (np.sin(x*2*np.pi + np.pi/4))/1.2 + return y + +def zero_fun(x): + return 0*x + +def data1(): + x_rel = np.array([[0.6], [0.7]]) + uvi_rel = np.array([[0, 1], [1, 0]], dtype='int') + # uv_rel = x_rel[uvi_rel][:,:,0] + y_rel = np.array([[1], [1]], dtype='int') + fuv_rel = np.array([[-0.1, 0.1], [-0.1, 0.1]]) + + x_abs = np.array([[0.2]]) + y_abs = np.array([[0.5]]) + mu_abs = np.array([[0.0]]) + + return x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs + + +def data2(): + x_rel = np.array([[ 0.8517731 ], [ 0.66358258], + [ 0.06054717], [ 0.45331369], + [ 0.8461625 ], [ 0.58854979]]) + uvi_rel = np.array([[0, 1], [2, 3], [4, 5]], dtype='int') + # uv_rel = x_rel[uvi_rel][:,:,0] + y_rel = np.array([[-1], [1], [1]], dtype='int') + fuv_rel = np.array([[0.0043639, -0.10653237], [0.01463141, 0.05046293], + [0.01773679, 0.45730181]]) + + x_abs = np.array([[0.43432351]]) + y_abs = np.array([[0.38966307]]) + mu_abs = np.array([[0.0]]) + + return x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs + + +def data3(): + x_rel = np.array([[0.8517731 ], [0.66358258], + [0.06054717], [0.45331369], + [0.8461625 ], [0.58854979]]) + uvi_rel = np.array([[0, 1], [2, 3], [4, 5]], dtype='int') + # uv_rel = x_rel[uvi_rel][:,:,0] + y_rel = np.array([[-1], [1], [1]], dtype='int') + fuv_rel = np.array([[0.0043639, -0.10653237], [0.01463141, 0.05046293], + [0.01773679, 0.45730181]]) + + x_abs = np.array([[0.43432351], [0.03362113]]) + y_abs = np.array([[0.38966307], [0.999]]) + mu_abs = np.array([[0.0], [0.0]]) + + return x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/data_downloader.py b/utils/data_downloader.py new file mode 100644 index 0000000..0afadfa --- /dev/null +++ b/utils/data_downloader.py @@ -0,0 +1,42 @@ +from __future__ import print_function +import os +import urllib3 +import zipfile + +class MrDataGrabber(object): # Does not work with bananas + + def __init__(self, url, target_path): + self.url = url + self.path = target_path + self.target_basename = os.path.basename(self.url) + self.target_file = os.path.join(self.path, self.target_basename) + + def download(self): + if os.path.exists(self.target_file): + print('Target file already downloaded, please remove if you wish to update.') + else: + http = urllib3.PoolManager() + r = http.request('GET', self.url, preload_content=False) + + with open(self.target_file, 'wb') as out: + print('Downloading {0}...'.format(self.url), end='') + while True: + data = r.read(2**16) + if not data: + break + out.write(data) + r.release_conn() + print(' done.') + + if os.path.splitext(self.target_basename)[1].lower() == '.zip': + self.unzip_data() + return self.target_file + + def unzip_data(self, target_dir=None, force=False): + if target_dir is None: + target_dir = os.path.join(self.path, os.path.splitext(self.target_basename)[0]) + if (not force) and os.path.isdir(target_dir): + print('Target directory already exists, not unzipping.') + else: + zip_ref = zipfile.ZipFile(self.target_file, 'r') + zip_ref.extractall(target_dir) diff --git a/utils/wine_data.py b/utils/wine_data.py new file mode 100644 index 0000000..fae5562 --- /dev/null +++ b/utils/wine_data.py @@ -0,0 +1,21 @@ +import pandas +import os +from utils.data_downloader import MrDataGrabber + + +class WineQualityData(object): + def __init__(self, wine_type='red', file_loc='data/wine_quality/'): + url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-{0}.csv'.format( + wine_type) + + try: + # Create target Directory + os.mkdir(file_loc) + print("Directory ", file_loc, " Created ") + except OSError: + print("Directory ", file_loc, " already exists") + + self.download_data = MrDataGrabber(url, file_loc) + self.download_data.download() + self.file = self.download_data.target_file + self.data = pandas.read_csv(self.file, delimiter=';')