osurdml · nrjl · Feb 28, 2017 · Feb 28, 2017 · Feb 28, 2017 · Mar 1, 2017
diff --git a/GP_preference_demo.py b/GP_preference_demo.py
@@ -3,68 +3,127 @@
 import matplotlib.pyplot as plt
 import GPpref
 import scipy.optimize as op
+import plot_tools as ptt
+import test_data
+import yaml
+# from scipy.stats import beta
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
 
-log_hyp = np.log([0.1,1.0,0.1]) # length_scale, sigma_f, sigma_probit
-np.random.seed(1)
-
-n_train = 20
-true_sigma = 0.05
-delta_f = 1e-5
-
-# Define polynomial function to be modelled
-def true_function(x):
-    y = np.sin(x*2*np.pi + np.pi/4) + 0.2
-    return y
-
-# Define noisy observation function
-def obs_function(x, sigma):
-    fx = true_function(x)
-    noise = np.random.normal(scale=sigma, size=x.shape)
-    return fx + noise
-
-def noisy_preference_rank(uv, sigma):
-    fuv = obs_function(uv,sigma)
-    y = -1*np.ones((fuv.shape[0],1),dtype='int')
-    y[fuv[:,1] > fuv[:,0]] = 1
-    return y, fuv
-
-# Main program
-# Plot true function
-x_plot = np.linspace(0.0,1.0,101,dtype='float')
-y_plot = true_function(x_plot)
+train_hyper = False
+use_test_data = False # test_data.data3 #
+verbose = 2
+
+with open('./data/statruns_2D.yaml', 'rt') as fh:
+    wave = yaml.safe_load(fh)
+try:
+    np.random.seed(wave['statrun_params']['randseed'])
+except KeyError:
+    np.random.seed(0)
+d_x = wave['GP_params']['hyper_counts'][0]-1
+random_wave = test_data.MultiWave(n_dimensions=d_x, **wave['wave_params'])
+log_hyp = np.log(wave['hyperparameters'])
+
+n_rel_train = 5
+n_abs_train = 5
+
+n_xplot = 31
+n_posterior_samples = 3
+
+random_wave.print_values()
+true_function = random_wave.out
+
+rel_obs_fun = GPpref.RelObservationSampler(true_function, wave['GP_params']['rel_likelihood'], wave['rel_obs_params'])
+abs_obs_fun = GPpref.AbsObservationSampler(true_function, wave['GP_params']['abs_likelihood'], wave['abs_obs_params'])
+
+# True function
+# This is a complicated (but memory efficient, maybe?) way to generate list of all grid points, there's probably a
+# better (itertools way)
+x_plot = np.linspace(0.0,1.0,n_xplot,dtype='float')
+x_test = ptt.make_meshlist(x_plot, d_x)
+f_true = abs_obs_fun.f(x_test)
+mu_true = abs_obs_fun.mean_link(x_test)
+abs_y_samples = abs_obs_fun.l.y_list
+p_abs_y_true = abs_obs_fun.observation_likelihood_array(x_test, abs_y_samples)
+if d_x is 1:
+    p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test)
 
 # Training data - this is a bit weird, but we sample x values, then the uv pairs
 # are actually indexes into x, because it is easier computationally. You can 
 # recover the actual u,v values using x[ui],x[vi]
-x_train = np.random.random((2*n_train,1))
-uvi_train = np.random.choice(range(2*n_train), (n_train,2), replace=False)
-uv_train = x_train[uvi_train][:,:,0]
+if use_test_data:
+    x_rel, uvi_rel, y_rel, fuv_rel, x_abs, y_abs, mu_abs = use_test_data()
+else:
+    x_rel, uvi_rel, y_rel, fuv_rel = rel_obs_fun.generate_n_observations(n_rel_train, n_xdim=d_x)
+    x_abs, y_abs, mu_abs = abs_obs_fun.generate_n_observations(n_abs_train, n_xdim=d_x)
+
+# Construct GP object
+wave['GP_params']['verbose'] = verbose
+model_kwargs = {'x_rel':x_rel, 'uvi_rel':uvi_rel, 'x_abs':x_abs, 'y_rel':y_rel, 'y_abs':y_abs,
+                'rel_kwargs': wave['rel_obs_params'], 'abs_kwargs': wave['abs_obs_params']}
+model_kwargs.update(wave['GP_params'])
+prefGP = GPpref.PreferenceGaussianProcess(**model_kwargs)
+
+prefGP.set_hyperparameters(log_hyp)
+# If training hyperparameters, use external optimiser
+if train_hyper:
+    log_hyp = op.fmin(prefGP.calc_nlml,log_hyp)
+
+f = prefGP.calc_laplace(log_hyp)
+prefGP.print_hyperparameters()
 
-# Get noisy observations f(uv) and corresponding ranks y_train
-y_train, fuv_train = noisy_preference_rank(uv_train,true_sigma)
+# Latent predictions
+fhat, vhat = prefGP.predict_latent(x_test)
 
-hf,ha = plt.subplots(1,1)
-ha.plot(x_plot,y_plot,'r-')
-for uv,fuv,y in zip(uv_train, fuv_train, y_train):
-    ha.plot(uv, fuv, 'b-')
-    ha.plot(uv[(y+1)/2],fuv[(y+1)/2],'k+')
+# Posterior likelihoods
+p_abs_y_post, E_y = prefGP.abs_posterior_likelihood(abs_y_samples, fhat=fhat, varhat=vhat)
 
-ha.set_title('Training data')
-ha.set_ylabel('f(x)')
-ha.set_xlabel('x')
+wrms = test_data.wrms(mu_true, E_y)
+wrms2 = test_data.wrms_misclass(mu_true, E_y)
 
+if d_x is 1:
+    p_rel_y_true = rel_obs_fun.observation_likelihood_array(x_test)
+    p_rel_y_post = prefGP.rel_posterior_likelihood_array(fhat=fhat, varhat=vhat)
+else:
+    p_rel_y_true, p_rel_y_post = None, None
 
-# GP hyperparameters
-# note: using log scaling to aid learning hyperparameters with varied magnitudes
+if d_x <= 2:
+    # Plot true functions
+    fig_t, ax_t = ptt.true_plots(x_test, f_true, mu_true, wave['rel_obs_params']['sigma'],
+                                                     abs_y_samples, p_abs_y_true, p_rel_y_true,
+                                                     t_l=r'True latent function, $f(x)$')
 
-prefGP = GPpref.PreferenceGaussianProcess(x_train, uvi_train, y_train, delta_f=delta_f)
+    # Posterior estimates
+    fig_p, ax_p, h_plotobj = \
+        ptt.estimate_plots(x_test, f_true, mu_true, fhat, vhat, E_y, wave['rel_obs_params']['sigma'],
+                           abs_y_samples, p_abs_y_post, p_rel_y_post,
+                           x_abs, y_abs, x_rel[uvi_rel], fuv_rel, y_rel, n_posterior_samples=n_posterior_samples,
+                           posterior_plot_kwargs={'color':'grey', 'ls':'--'},
+                           t_l=r'$\mathcal{GP}$ latent function estimate $\hat{f}(x)$',
+                           t_a=r'Posterior absolute likelihood, $p(u | \mathcal{Y}, \theta)$',
+                           t_r=r'Posterior relative likelihood $P(x_0 \succ x_1 | \mathcal{Y}, \theta)$')
+    if d_x == 1:
+        p_err = test_data.rel_error(mu_true, p_rel_y_true, E_y, p_rel_y_post, weight=False)
+        print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}, p_err: {2:0.3f}".format(wrms, wrms2, p_err)
+    else:
+        print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}".format(wrms, wrms2)
+    plt.show(block=False)
 
-# Pseudocode:
-# FOr a set of hyperparameters, return log likelihood that can be used by an optimiser
-theta0 = log_hyp
+else:
+    print "Input state space dimension: {0} is too high to plot".format(d_x)
+    print "WRMS: {0:0.3f}, WRMS_MC: {1:0.3f}".format(wrms, wrms2)
 
-# log_hyp = op.fmin(prefGP.calc_nlml,theta0)
-f,lml = prefGP.calc_laplace(log_hyp)
 
-ha.plot(x_train, f, 'g^')
-plt.show()
+## SCRAP
+# p_y = np.zeros((n_ysamples, n_xplot))
+# y_samples = np.linspace(0.01, 0.99, n_ysamples)
+# iny = 1.0/n_ysamples
+# E_y2 = np.zeros(n_xplot)
+#
+# normal_samples = np.random.normal(size=n_mcsamples)
+# for i,(fstar,vstar) in enumerate(zip(fhat, vhat.diagonal())):
+#     f_samples = normal_samples*vstar+fstar
+#     aa, bb = prefGP.abs_likelihood.get_alpha_beta(f_samples)
+#     p_y[:, i] = [iny*np.sum(beta.pdf(yj, aa, bb)) for yj in y_samples]
+#     p_y[:, i] /= np.sum(p_y[:, i])
+#     E_y2[i] = np.sum(np.dot(y_samples, p_y[:, i]))
diff --git a/GP_regression_demo.py b/GP_regression_demo.py
@@ -3,52 +3,87 @@
 import scipy.optimize as op
 import matplotlib.pyplot as plt
 import GPr
-np.random.seed(0)
+import plot_tools as ptt
+plt.rc('font',**{'family':'serif','sans-serif':['Computer Modern Roman']})
+plt.rc('text', usetex=True)
+
+f_sigma = 0.15
+n_samples = 10
+r_seed = 1
+n_posterior_samples = 3
+optimise_hp = False
 
 # Define polynomial function to be modelled
 def true_function(x):
-    c = np.array([3,5,-9,-3,2],float)
-    y = np.polyval(c,x)
+    # c = np.array([3,5,-9,-3,2],float)
+    # y = np.polyval(c,x)
+    y = np.sin(x*2*np.pi)
     return y
 
+
 # Define noisy observation function
-def obs_function(x):
-    y = true_function(x) + np.random.normal(0,0.1,len(x))
+def obs_function(x, sigma):
+    y = true_function(x) + np.random.normal(0, sigma, size=x.shape)
     return y
 
+
 # Main program
 # Plot true function
-x_plot = np.arange(0,1,0.01,float)
-y_plot = true_function(x_plot)
+x_plot = np.atleast_2d(np.arange(-0.5, 1.5, 0.01, dtype='float')).T
+fx_plot = true_function(x_plot)
 plt.figure()
-plt.plot(x_plot,y_plot,'k-')
+plt.plot(x_plot, fx_plot, 'k-')
 
 # Training data
-x_train = np.random.random(20)
-y_train = obs_function(x_train)
-plt.plot(x_train,y_train,'rx')
+np.random.seed(r_seed)
+x_train = np.atleast_2d(0.6*np.random.random(n_samples)+0.2).T
+# x_train = np.array([-10.0])
+y_train = obs_function(x_train, f_sigma)
+plt.plot(x_train, y_train, 'rx')
 
 # Test data
-x_test = np.arange(0,1,0.01,float)
+x_test = x_plot
 
 # GP hyperparameters
 # note: using log scaling to aid learning hyperparameters with varied magnitudes
-log_hyp = np.log([1,1,0.1])
+log_hyp = np.log([0.3, 1.0, 0.2])
 mean_hyp = 0
 like_hyp = 0
 
 # Initialise GP for hyperparameter training
-initGP = GPr.GaussianProcess(log_hyp,mean_hyp,like_hyp,"SE","zero","zero",x_train,y_train)
+initGP = GPr.GaussianProcess(log_hyp, mean_hyp, like_hyp, "SE", "zero", "zero", x_train, y_train)
 
 # Run optimisation routine to learn hyperparameters
-opt_log_hyp = op.fmin(initGP.compute_likelihood,log_hyp)
+if optimise_hp:
+    opt_log_hyp = op.fmin(initGP.compute_likelihood, log_hyp)
+else:
+    opt_log_hyp = log_hyp
 
-# Learnt GP with optimised hyperparameters
+# Learned GP with optimised hyperparameters
 optGP = GPr.GaussianProcess(opt_log_hyp,mean_hyp,like_hyp,"SE","zero","zero",x_train,y_train)
-y_test,cov_y = optGP.compute_prediction(x_test)
+fhat_test, fhat_var = optGP.compute_prediction(x_test)
+
+h_fig,h_ax = plt.subplots()
+h_fx, patch_fx = ptt.plot_with_bounds(h_ax, x_plot, fx_plot, f_sigma, c=ptt.lines[0])
+h_y, = h_ax.plot(x_train, y_train, 'rx', mew=1.0, ms=8)
+h_fhat, patch_fhat = ptt.plot_with_bounds(h_ax, x_plot, fhat_test, np.sqrt(fhat_var.diagonal())[np.newaxis].T, c=ptt.lines[1], ls='--')
+f_post = np.random.multivariate_normal(fhat_test.flatten(), fhat_var, n_posterior_samples)
+h_pp = h_ax.plot(x_plot, f_post.T, '--', color='grey', lw=0.8)
+h_ax.autoscale()
+
+# h_pp = h_ax.plot(x_plot, y_post.T, c='grey', ls='--', lw=0.8)
+
+# h_ax.set_ylim([-3, 3])
+gp_str = '$\hat{{f}}(x)|\mathcal{{Y}} \sim \mathcal{{GP}}(\mathbf{{0}}, K_{{SE}}:l={0:0.2f}, \sigma_f={1:0.2f}, \sigma_n={2:0.2f})$'
+gp_l, gp_sigf, gp_sign = optGP.covFun.hyp
+gp_str = gp_str.format(gp_l, gp_sigf, gp_sign)
+h_ax.legend((h_fx, h_y, h_fhat),('$f(x)$', '$y \sim \mathcal{{N}}(f(x), {0:0.1f}^2)$'.format(f_sigma), gp_str), loc='best')
+h_ax.set_xlabel('$x$')
+#h_fig.savefig('fig/regression_example.pdf', bbox_inches='tight', transparent='true')
+
 
 # Plot true and modelled functions
-plt.plot(x_test,y_test,'b-')
-plt.plot(x_test,y_test+np.sqrt(cov_y),'g--')
-plt.plot(x_test,y_test-np.sqrt(cov_y),'g--')
-plt.show()
+# plt.plot(x_test,y_test,'b-')
+# plt.plot(x_test,y_test+np.sqrt(cov_y),'g--')
+# plt.plot(x_test,y_test-np.sqrt(cov_y),'g--')
+plt.show(block=False)