From 403d1c7d34190b96971b157415f393c59b713cee Mon Sep 17 00:00:00 2001 From: Angela Zhou Date: Tue, 29 Oct 2019 20:10:31 -0400 Subject: [PATCH] initial commit --- data_scenarios.py | 251 ++++++++++ methods.py | 359 ++++++++++++++ methods_test.py | 121 +++++ subgrad.py | 1035 +++++++++++++++++++++++++++++++++++++++ unconfoundedness_fns.py | 676 +++++++++++++++++++++++++ 5 files changed, 2442 insertions(+) create mode 100644 data_scenarios.py create mode 100644 methods.py create mode 100644 methods_test.py create mode 100644 subgrad.py create mode 100644 unconfoundedness_fns.py diff --git a/data_scenarios.py b/data_scenarios.py new file mode 100644 index 0000000..5924054 --- /dev/null +++ b/data_scenarios.py @@ -0,0 +1,251 @@ + +import matplotlib.pyplot as plt +import numpy as np +import cvxpy as cvx +from gurobipy import * +from scipy.optimize import minimize +import datetime as datetime +import pickle +from matplotlib.backends.backend_pdf import PdfPages +from matplotlib import collections as matcoll +from sklearn import svm +from methods import * + +import os +import sys +module_path = os.path.abspath(os.path.join('/Users/az/Box Sync/unconfoundedness')) +if module_path not in sys.path: + sys.path.append(module_path) +from scripts_running import * +from unconfoundedness_fns import * +# from evals import * +from greedy_partitioning import * +from subgrad import * +from scipy.stats import norm +from scipy.stats import multivariate_normal +from copy import deepcopy + +d = 5 # dimension of x +n = 800; +# parameters +rho = np.asarray([1 / np.sqrt(2), -1 / np.sqrt(2), 0, 0, 1 / np.sqrt(3)]) # normalize to unit 0.5 +rho = rho / (np.dot(rho, rho) * 2) + +beta_cons = 2.5 +beta_x = np.asarray([0, .5, -0.5, 0, 0, 0]) +beta_x_T = np.asarray([-1.5, 1, -1.5, 1., 0.5, 0]) +beta_T = np.asarray([0, .75, -.5, 0, -1, 0, 0]) +beta_T_conf = np.asarray([0, .75, -.5, 0, -1, 0]) +# beta_T = np.asarray([-1, 0,0, -.5, 1,0.5,1.5]) +# beta_T_conf = np.asarray([-1, 0,0, -.5, 1,0.5 ]) +mu_x = np.asarray([-1, .5, -1, 0, -1]); + +alpha = -2 +w = 1.5 + +def return_CATE_optimal_assignment(x, u): + n = x.shape[0] + risk_T_1 = real_risk_(np.ones(n), x, u) + risk_T_0 = real_risk_(np.zeros(n), x, u) + opt_T = [1 if risk_T_1[k] < risk_T_0[k] else 0 for k in range(n)] + return opt_T + + +# generate propensity model +def REAL_PROP_LOG(x, u): + Gamma = 1.5 + print x.shape + nominal_ = logistic_pol_asgn(beta_T_conf, x) + # return nominal_ + # set u = I[ Y(T)\mid x > Y(-T) \mid x ] + a_bnd, b_bnd = get_bnds(nominal_, Gamma) + q_lo = 1 / b_bnd; + q_hi = 1 / a_bnd + opt_T = return_CATE_optimal_assignment(x, u) + q_real = np.asarray([q_hi[i] if opt_T[i] == 1 else q_lo[i] for i in range(len(u))]) + + return q_real + +def real_risk(T, beta_cons, beta_x, beta_x_T, x, u): + ''' + takes as input integral T + ''' + n = len(T); + risk = np.zeros(n) + for i in range(len(T)): + risk[i] = T[i] * beta_cons + np.dot(beta_x.T, x[i, :]) + np.dot(beta_x_T.T, x[i, :] * T[i]) + alpha * (u[i]) * ( + (2 * T[i] - 1)) + w * (u[i]) + return risk + + +def real_risk_(T, x, u): + return real_risk(T, beta_cons, beta_x, beta_x_T, x, u) + +def real_risk_T_integer(T, x, u): + T = get_sgn_0_1(T) + return real_risk(T, beta_cons, beta_x, beta_x_T, x, u) + + +''' +takes in prob_1, n x m array of treatment assignment probabilities +compute +\sum_t 1/n \sum_i Y(t)\pi_i(t) +''' +def real_risk_prob_oracle(prob_t, x, u): + n_ts = prob_t.shape[1] + risk = 0 + for t in range(n_ts): + risk += np.mean( np.multiply(prob_t[:,t], real_risk_(t*np.ones(x.shape[0]), x, u) ) ) + return risk +# return prob_1 * real_risk_T_integer(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u) + + + + +def real_risk_prob(prob_1, x, u): + n = len(u) + prob_1 = np.asarray(prob_1) + return prob_1 * real_risk_(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u) + +def generate_log_data(mu_x, n, beta_cons, beta_x, beta_x_T): + # generate n datapoints from the same multivariate normal distribution + d = len(mu_x) + u = (np.random.rand(n) > 0.5) # needs to be Rademacher + # u = np.asarray([u[i] if u[i] == 1 else -1 for i in range(len(u))]) + # u = np.ones(n) + x = np.zeros([n, d]) + for i in range(n): + x[i, :] = np.random.multivariate_normal(mean=mu_x * (2 * u[i] - 1), cov=np.eye(d)) + x_ = np.hstack([x, np.ones([n, 1])]) + # generate propensities + true_Q = REAL_PROP_LOG(x_, u) + T = np.array(np.random.uniform(size=n) < true_Q).astype(int).flatten() + T = T.reshape([n, 1]); + T_sgned = np.asarray([1 if T[i] == 1 else -1 for i in range(n)]).flatten() + clf = LogisticRegression(); + clf.fit(x, T) + propensities = clf.predict_proba(x) + nominal_propensities_pos = propensities[:, 1] + + nominal_propensities_pos = logistic_pol_asgn(beta_T_conf, x_) + + q0 = np.asarray([nominal_propensities_pos[i] if T[i] == 1 else 1 - nominal_propensities_pos[i] for i in range(n)]) + true_Q_obs = np.asarray([true_Q[i] if T[i] == 1 else 1 - true_Q[i] for i in range(n)]) + Y = np.zeros(n) + for i in range(n): + Y[i] = T[i] * beta_cons + np.dot(beta_x.T, x_[i, :]) + np.dot(beta_x_T.T, x_[i, :] * T[i]) + alpha * (u[i]) * ( + 2 * T[i] - 1) + w * u[i] # + np.dot(beta_x_T.T, x_[i,:] - [-1,1] ) #+ np.dot(beta_x_high_freq.T, np.sin(x[i,0:HIGH_FREQ_N]*FREQ)*T[i]) + # add random noise + T = T.flatten() + Y += 2 * np.random.randn(n) # + np.random.randn()*np.asarray(T)*2 ; + return [x_, u, T, Y, true_Q_obs, q0] + + +# generate propensity model +def REAL_PROP_LOG(x, u): + Gamma = 1.5 + print x.shape + nominal_ = logistic_pol_asgn(beta_T_conf, x) + # return nominal_ + # set u = I[ Y(T)\mid x > Y(-T) \mid x ] + a_bnd, b_bnd = get_bnds(nominal_, Gamma) + q_lo = 1 / b_bnd; + q_hi = 1 / a_bnd + opt_T = return_CATE_optimal_assignment(x, u) + q_real = np.asarray([q_hi[i] if opt_T[i] == 1 else q_lo[i] for i in range(len(u))]) + + return q_real + + +def real_risk_mt(x_, u, T, alpha, beta_tilde, beta_x_T, eta_tilde, eta): + n = len(T); risk = np.zeros(n) + for i in range(len(T)): + risk[i] = alpha[T[i]] + np.dot(beta_tilde.T, x_[i, :]) + np.dot(beta_x_T[:,T[i]].T, x_[i, :]) + eta[T[i]]*(u[i]) + eta_tilde*u[i] + return risk + +def real_risk_mt_(x_, u, T): + return real_risk_mt(x_, u, T,*outcome_dgp_params) + +''' +takes in prob_1, n x m array of treatment assignment probabilities +compute +\sum_t 1/n \sum_i Y(t)\pi_i(t) +''' +def real_risk_prob_oracle_mt(prob_t, x, u, *outcome_dgp_params): + n_ts = prob_t.shape[1] + risk = 0 + for t in range(n_ts): + risk += np.mean( np.multiply(prob_t[:,t], real_risk_mt_(x, u, t*np.ones(n).astype(int)) ) ) + return risk +# return prob_1 * real_risk_T_integer(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u) + + +def return_CATE_optimal_assignment_mt(x, u, n_ts, *outcome_dgp_params): + n = x.shape[0] + risk = np.zeros([n,n_ts]) + for k in range(n_ts): + risk[:,k] = real_risk_mt(x, u, k*np.ones(n).astype(int), *outcome_dgp_params) + opt_T = [np.argmin(risk[i,:]) for i in range(n)] + return opt_T + +''' real propensity that generates treatment assignments for multiple treatments +''' +def real_propensity_mt(x, u, beta_T_conf_prop, log_gamma, *outcome_dgp_params ): + nominal_ = logistic_pol_asgn_mt(beta_T_conf_prop, x) + # set u = I[ Y(T)\mid x > Y(-T) \mid x ] + opt_T = return_CATE_optimal_assignment_mt(x, u, beta_T_conf_prop.shape[1], *outcome_dgp_params) + n_ts = beta_T_conf_prop.shape[1] + q_lo = np.zeros([x.shape[0], n_ts]); q_hi = np.zeros([x.shape[0], n_ts]) + for k in range(n_ts): + a_bnd, b_bnd = get_bnds(nominal_[:,k], log_gamma) + q_lo[:,k] = 1 / b_bnd; + q_hi[:,k] = 1 / a_bnd + q_real = deepcopy(nominal_) + for i in range(x.shape[0]): + if opt_T[i] == 1: + q_real[i,1] = q_hi[i, 1 ] + q_real[i,2] = nominal_[i,2] + q_real[i,0] = 1 - q_real[i,1] - q_real[i,2] + # elif opt_T[i] == 0: + # q_real[i,0] = q_lo[i, 0 ] + # q_real[i,2] = nominal_[i,2] + # q_real[i,1] = 1 - q_real[i,0] - q_real[i,2] + else: + q_real[i,:] = nominal_[i,:] + # clip and renormalize + q_real = np.clip(q_real, 0.01, 0.99) + for i in range(x.shape[0]): + q_real[i,:] = q_real[i,:] / sum(q_real[i,:]) + return q_real + + +''' +# take in a vector of +beta_cons: +beta_tilde: \tilde{\beta} (confounding interaction with x) +beta_x_T: [d x k] array of coefficient vectors, one for each treatment + +''' +def generate_log_data_mt(mu_x, n, beta_T_conf_prop, n_ts, log_gamma, alpha, beta_tilde, beta_x_T, eta_tilde, eta): + # generate n datapoints from the same multivariate normal distribution + outcome_dgp_params = [alpha, beta_tilde, beta_x_T, eta_tilde, eta ] + d = len(mu_x); + u = (np.random.rand(n) > 0.5) # Bernoulli noise realization; + x = np.zeros([n, d]) + for i in range(n): + x[i, :] = np.random.uniform(low=np.ones(d)*-3,high = np.ones(d)*3) + + # x[i, :] = np.random.multivariate_normal(mean=mu_x * (2 * u[i] - 1), cov=np.eye(d)) + x_ = np.hstack([x, np.ones([n, 1])]) + # generate propensities + true_Q = real_propensity_mt(x_, u, beta_T_conf_prop, log_gamma, *outcome_dgp_params ) # in n x k for multiple treatments + T = np.asarray([np.random.choice( range(n_ts), p = true_Q[i,:] ) for i in range(n) ]).astype(int).flatten() + q0 = logistic_pol_asgn_mt_obsA(beta_T_conf_prop, x_, T) + # q0 = np.asarray([nominal_propensities_pos[i,T[i]] for i in range(n)]) + true_Q_obs = np.asarray([true_Q[i,T[i]] for i in range(n)]) + Y = np.zeros(n) + for i in range(n): + Y[i] = alpha[T[i]] + np.dot(beta_tilde.T, x_[i, :]) + np.dot(beta_x_T[:,T[i]].T, x_[i, :]) + eta[T[i]]*(u[i]) + eta_tilde*u[i] + # add random noise + Y += np.random.randn(n) # + np.random.randn()*np.asarray(T)*2 ; + return [x_, u, T, Y, true_Q_obs, q0] diff --git a/methods.py b/methods.py new file mode 100644 index 0000000..acf4402 --- /dev/null +++ b/methods.py @@ -0,0 +1,359 @@ +import numpy as np +import gurobipy as gp +from unconfoundedness_fns import * +from subgrad import * +from opt_tree import * +from greedy_partitioning_serv import * +import pickle +from datetime import datetime + +''' +policy_prob_1: function returning probability of policy assignment = 1 +weight_function: function returning adversarial weights (given lower, upper bound and gamma or effective gamma budget bound (L1 budget) +baseline_pol: pi_0(x) function +policy_optimizer: ogd, or optimal_tree +- verbose +- save + - stump + - experiment name +''' +class ConfoundingRobustPolicy: + def __init__(self, baseline_pol, verbose = False, ind_rep = -1, save_params={}, save = True, treatment_n='binary', params={}): + self.baseline_pol = baseline_pol + # self.weight_function = weight_function + self.verbose = verbose + self.save_params = save_params #stump, exp_name + self.save = save + #self.save_params != {} # check if it's not the default + self.ind_rep = ind_rep + self.params = params + self.treatment_n = treatment_n # some implementations leverage binary encoding + + # store training data when passed in by .fit + self.x = None + self.t = None + self.t_levels = None + self.y = None + self.fq = None + self.RBARS_ = None + self.RISKS = None + self.POLS_dict = None + self.eval_data = None + self.opt_params_og = None + self.get_risk = None #overwritten with a frozen test set: call get_risk with a vector of policy prob assignments + ''' + fit based on X, T, Y; q0 nominal propensities + Assume x doesn't have an intercept + GAMMAS series of parameters + method_types: list of strings 'ogd', 'fdiv-02', + method_params object takes in + - optimizer + - opt_params + the params_dict that gets passed to the optimizer + DEFAULT_POL: the policy to default to if loss is nonnegative + - pol_opt + 'ogd', 'tree', 'ipw': descriptor of method (gradient based or otherwise) + 'unc_set_type': 'interval', 'L1-budget' + # Make GAMS rounded precision + ''' + def fit(self, x, t, y, q0, GAMS, method_params, eval_conf={'eval':False} ): + if self.treatment_n == 'binary': + t = get_sgn_0_1(t) # make sure treatment is signed, not 0-1 + else: + n_treatments = len(np.unique(t)) + random.seed(1) + + # we input data with intercept + data_dict = { 'x':x, 't':t, 'y':y, 'fq': q0 } + self.x = x; self.t = t; self.y = y; self.fq = q0; self.t_levels = np.sort(np.unique(t)) + opt_params = method_params['opt_params'] # get parameters (e.g. optimization parameters from dict) + self.opt_params_og = opt_params + opt_params.update({'x': x, 't': t, 'y': y, 'fq': q0}) + if self.treatment_n == 'binary': + prev_sol = np.random.randn(x.shape[1]); + else: + print n_treatments + print x.shape[1] + prev_sol = np.random.randn(x.shape[1],n_treatments); + POLS = [ None ] * len(GAMS); losses = np.zeros(len(GAMS)) + RISKS = np.zeros(len(GAMS)) + + if eval_conf['eval']: # if evaluate policy error online + self.init_evaluation(eval_conf) # set risk_test + + # iterate over values of GAMMAS and train + for ind_g, gam in enumerate(GAMS): + if self.verbose: + print 'gamma, ', gam + a_bnd, b_bnd = get_bnds(q0, gam) + data_dict.update({'a_bnd':a_bnd, 'b_bnd':b_bnd}); opt_params.update( {'a_bnd':a_bnd,'b_bnd':b_bnd} ) + # update parameters based on gamma, a_bnd, b_bnd + optimizer = method_params['optimizer'] + [opt_params, gammas] = self.update_opt_params(gam, a_bnd, b_bnd, opt_params, method_params) + now = datetime.now() + [robust_th, robust_loss] = optimizer(th=prev_sol, **opt_params) + print datetime.now() - now, 'time optimizing' + # Optimization refinements since gamma uncertainty sets are nested: revert, check loss is negative + now = datetime.now() + if method_params['pol_opt'] != 'IPW': + if ind_g > 1: + [robust_th, robust_loss] = self.reversion_check_optimality(ind_g, robust_th, robust_loss, + a_bnd, b_bnd, gammas, POLS, POL_PROB_1=opt_params['POL_PROB_1'], WGHTS_ = opt_params['WGHTS_']) + [robust_th, robust_loss] = self.check_loss_nonnegative(robust_th, robust_loss, opt_params['DEFAULT_POL']) + prev_sol = robust_th; POLS[ind_g] = robust_th; losses[ind_g] = robust_loss + print datetime.now() - now, 'time verifying' + + # if evaluate policy error online: report results + if eval_conf['eval']: + if self.treatment_n == 'binary': + test_rec = opt_params['POL_PROB_1'](robust_th, self.eval_data['x_test']) + else: # we need treatment information for returning policy probability vector for multiple treatments + if eval_conf['eval_type'] == "true_dgp": + test_rec = opt_params['POL_PROB_all'](robust_th, self.eval_data['x_test'], self.eval_data['t_test']) + else: #condition evaluation on observational treatment assignment in t_test + test_rec = opt_params['POL_PROB_1'](robust_th, self.eval_data['x_test'], self.eval_data['t_test']) + + RISKS[ind_g] = self.get_risk(test_rec) + self.RISKS = RISKS + if self.verbose: + print 'eval risk', RISKS[ind_g] + if self.verbose: + pickle.dump({'recs': test_rec, 'risk':RISKS[ind_g]}, open(self.save_params['stump'] + self.save_params['exp_name'] + '--' + method_params[ + 'type'] + '--gamma--'+ str(np.round(gam,2)) + '--rep' + str(self.ind_rep) + '--' + datetime.now().strftime( + '%Y-%m-%d-%H-%M-%S') + '.p', 'wb')) + + # After fitting: class contains a list of policies + # Post processing of optimization results; calibrate, + if method_params['pol_opt'] != 'IPW': + self.RBARS_ = self.calibrate_risk_for_policies(GAMS, POLS, unc_set_type=method_params['unc_set_type'], POL_PROB_1=opt_params['POL_PROB_1'], WGHTS_=opt_params['WGHTS_']) + self.POLS_dict = dict(zip(GAMS, POLS)) + if self.save: + pickle.dump( {'RBARS_':self.RBARS_, 'POLS_dict': self.POLS_dict, 'GAMS':GAMS, 'RISKS':RISKS}, open(self.save_params['stump']+self.save_params['exp_name']+'--'+method_params['type']+'--rep'+str(self.ind_rep)+'--'+datetime.now().strftime('%Y-%m-%d-%H-%M-%S')+'.p', 'wb') ) + + def predict(self, x, gamma): + pol_fn = POLS_dict[str(gamma)] + return opt_params['POL_PROB_1'](pol_fn, x) + # self.POLS_dict[str(gamma)](x) + + + ''' update opt params for changes in gamma, a_bnd, b_bnd computed + ''' + def update_opt_params(self, gamma, a_bnd, b_bnd, opt_params, method_params): + gammas = gamma + if method_params['pol_opt'] == 'ogd': + [N_RNDS, eta, N_RNDS_tv] = self.get_ogd_params(a_bnd, b_bnd) + if method_params['unc_set_type'] == 'interval': + opt_params.update({'N_RNDS': N_RNDS, 'gamma':gammas, 'step_schedule': eta, 'eta_0': 1}) + elif method_params['unc_set_type'] == 'L1-budget': + fq_sums = [ (1.0 / self.fq[self.t == t_]).sum() for t_ in self.t_levels ] + fq_nm_wghts = [(1 / self.fq[self.t == self.t_levels[ind]]) / fq_sums[ind] for ind in range(len(self.t_levels)) ] + upper_gamma_bnds = np.asarray([np.sum(np.maximum(a_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind], b_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind])) for ind,t_ in enumerate(self.t_levels)]) + gammas = opt_params['rho']*upper_gamma_bnds + opt_params.update({'N_RNDS': N_RNDS_tv, 'gamma': gamma, 'step_schedule': eta, 'eta_0': 1}) + elif method_params['pol_opt'] == 'tree': + None + elif method_params['pol_opt'] == 'IPW': + opt_params.update({'eta_0': 1}) + [psub_0_th_ipw, ls_th_ipw] = opt_w_restarts_vanilla_ipw(th = 0, **opt_params) + # N_RNDS, PI_1, x_[:, 0:d], t_, nominal_Q_, y_, 1,logging=False, step_schedule=IPW_STEP) + return [opt_params, gammas] + + def get_upper_gamma_bnds(self, a_bnd, b_bnd): + fq_sums = [(1.0 / self.fq[self.t == t_]).sum() for t_ in self.t_levels] + fq_nm_wghts = [(1 / self.fq[self.t == self.t_levels[ind]]) / fq_sums[ind] for ind in range(len(self.t_levels))] + upper_gamma_bnds = np.asarray([np.sum(np.maximum(a_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind], + b_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind])) for + ind, t_ in enumerate(self.t_levels)]) + gammas = self.opt_params_og['rho'] * upper_gamma_bnds + return gammas + + ''' + 'eval_type': assume known dgp or RCT evaluation + ''' + def init_evaluation(self, eval_conf): + eval_type = eval_conf['eval_type'] + self.eval_data = eval_conf['eval_data'] + if eval_type == "true_dgp": + oracle_risk = eval_conf['oracle_risk'] + def get_risk(pi_test, baseline_pol = self.baseline_pol): + x_test = self.eval_data['x_test']; u_test = self.eval_data['u_test']; t_test = self.eval_data['t_test'] + if self.treatment_n == 'binary': + return np.mean(oracle_risk(pi_test, x_test, u_test) - oracle_risk( baseline_pol(x_test) , x_test, u_test)) + else: + # For evaluating multiple treatments with oracle outcomes, compute + # \sum_i \sum_t Y_i(t) (\pi(t, X_i) - \pi_0(t, X_i) ) + baseline_assignment = np.zeros(pi_test.shape) + for t in np.unique(t_test): + baseline_assignment[:, t] = baseline_pol(x_test, t*np.ones(x_test.shape[0])) + return oracle_risk(pi_test, x_test, u_test) - oracle_risk(baseline_assignment, x_test, u_test) + self.get_risk = get_risk + + elif eval_type == "rct": # if evaluate on RCT data + def get_risk(pi_test, baseline_pol = self.baseline_pol): + x_test = self.eval_data['x_test'] + t_test = self.eval_data['t_test'] + y_test = self.eval_data['y_test'] + pi_test = np.asarray(pi_test) + RCT_prop_test = self.eval_data['RCT_q0'] # propensities from RCT + + if sum(pi_test) > 0: # if treat at all + if self.treatment_n == 'binary': + t_test_sgn = get_sgn_0_1(t_test) # if evaluating on an rct: need to put this in fulldata dict + return np.mean( y_test * (pi_test - baseline_pol(x_test)) * t_test_sgn / RCT_prop_test ) + else: + # \sum_i Y_i (\pi(T[i]) - \pi_0(T[i]) ) / Q[i,T[i]] + return np.mean(y_test * (pi_test - baseline_pol(x_test, t_test)) / RCT_prop_test) + else: + return 0 # assuming baseline is all-control + self.get_risk = get_risk + + def calibrate_risk_for_policies(self, GAMS, POLS, unc_set_type, POL_PROB_1, WGHTS_ = opt_wrapper): + ''' Compute a calibration plot under "GAMS" gamma parameters for "POLS" policies + + calibration assesses the worst case regret of a policy learned under an assumption of Gamma = Gamma' + which would be incurred were the true state of the world Gamma_calib + ''' + RBARS_ = np.zeros([len(GAMS),len(GAMS)]) + # Post processing, Rbar # compute policy unconfounded # data_dict: x_, t_sgned_, y_, PI_1, PREF, fq, WGHTS_ + for ind_gam_k in range(len(GAMS)): + for ind_g_calib, gam_calib in enumerate(GAMS): + a_calib, b_calib = get_bnds(self.fq, gam_calib) # previously was used to calib for IST data # a_calib, b_calib = get_bnds(q0[train_ind]*nominal_selection_[train_ind], gam_calib) + if unc_set_type == 'L1-budget': + gammas = self.get_upper_gamma_bnds(a_calib, b_calib) + gamma = gammas + else: + gamma = 0 + if self.treatment_n == 'binary': + RBARS_[ind_gam_k, ind_g_calib] = Rbar(th = POLS[ind_gam_k], + x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1,BASELINE_POL= self.baseline_pol, + a_bnd = a_calib, b_bnd = b_calib, fq=self.fq, gamma = gamma,WGHTS_ = WGHTS_) + else: + RBARS_[ind_gam_k, ind_g_calib] = Rbar_mt(th = POLS[ind_gam_k], + x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1,BASELINE_POL= self.baseline_pol, + a_bnd = a_calib, b_bnd = b_calib, fq=self.fq, gamma = gamma,WGHTS_ = WGHTS_) + + self.RBARS_ = RBARS_ + return RBARS_ + ''' + Check losses on previous policies + # TODO: fix + ''' + def reversion_check_optimality(self, ind_g, th, ls_th, a_bnd, b_bnd, gammas, POLS, POL_PROB_1, WGHTS_ = opt_wrapper): + #### Reversion for non-fdiv policies + RBARS_opt = np.zeros(ind_g) + + for ind_gam_i in range(ind_g): # for every gamma_i < gamma_k + # Evaluate previously found policies on current data; with current Gamma parameter bounds + if self.treatment_n=='binary': + RBARS_opt[ind_gam_i] = Rbar(th = POLS[ind_gam_i], + x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1, BASELINE_POL= self.baseline_pol, + a_bnd = a_bnd, b_bnd = b_bnd, fq = self.fq, gamma = gammas, WGHTS_ = WGHTS_) + else: + RBARS_opt[ind_gam_i] = Rbar_mt(th = POLS[ind_gam_i], + x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1, BASELINE_POL= self.baseline_pol, + a_bnd = a_bnd, b_bnd = b_bnd, fq = self.fq, gamma = gammas, WGHTS_ = WGHTS_) + if self.verbose: + print RBARS_opt, 'others evaled' + print ls_th, 'current loss' + if np.min(RBARS_opt) < ls_th: # if there is a policy achieving better risk, evaluated on this gamma + th = POLS[np.argmin(RBARS_opt)] # set the policy to the one achieving the minimum evaluation + if self.verbose: + print 'reverting at ' + str(ind_g) + 'to policy learned at ' + str(np.argmin(RBARS_opt)) + return [th, ls_th] + + ''' revert to control policy (for linear policy, assuming intercept is last coefficient)''' + def check_loss_nonnegative(self, robust_th, ls, baseline_th, TOL = 1e-4): + if ls > TOL: + robust_th = baseline_th + if self.verbose: + print 'truncating because nonnegative loss' + ls = 0 # truncate loss + else: + robust_th = robust_th + return [robust_th, ls] + + + + ''' get parameter values for OGD (perhaps clipping) + ''' + def get_ogd_params(self, a_bnd, b_bnd): + D = np.linalg.norm((a_bnd - b_bnd) / sum(a_bnd)) + G = np.linalg.norm(0.25 * max(np.abs(self.y)) * self.x.shape[1]) # assume we bound the 2-norm of $\Theta$ by p + eps = 0.05 + N_RNDS = np.clip(int(G ** 2 * D ** 2 / eps ** 2), 50, 200) + eta = np.clip((D * G) / np.sqrt(N_RNDS), 0.5, 0.6) + N_RNDS_tv = np.clip(N_RNDS, 10, 20) # use OGD parameters + return [N_RNDS, eta, N_RNDS_tv] + + + +''' opt tree wrapper calls +X__: preprocessed training data (standardize train/test data with same ) +t (unsigned) +Y +D (depth) +nominal_Q_, +pi_0, +a_bnd_train, +b_bnd_train +Pass a signed treatment vector to the greedy warm start; but use the sharp version of optimal tree +which uses integer encoding of t +''' +def get_opt_tree_policy(BASELINE_POL, x, t, fq, y, depth, a_bnd, b_bnd, sharp = True, verbose = False,TIME_LIMIT = 180, **params): + # optimal tree globals + # remove intercept + x, eps = preprocess_data_oct_pol(x[:,:-1]) + n = len(y); n_ts = len(np.unique(t)) + pi_0 = BASELINE_POL(x, t) + K=2; N = 2**(depth+1) - 1; N_nodes = np.arange(2**(depth+1) - 1)+1; N_TB = int(np.floor(N/2)) + branch_nodes = N_nodes[0:(N_TB )]; N_L = N - N_TB + leaf_nodes = N_nodes[N_TB:] + A_L = [[] for i in range(N)]; A_R = [[] for i in range(N)] + [A_L, A_R] = get_ancestors(N_nodes, A_L, A_R) + [lowers, uppers] = getterminalindices(N) + #### train on optimal tree + T_sgned = get_sgn_0_1(t); # this makes a naive binarization + y_label = 0 + mode_y = mode(y)[0][0] + L_hat = len(np.where(y == mode_y)[0])*1.0/ len(y) + [n, dim_d] = x.shape + [tree, warm_a, warm_b, warm_d, warm_z, warm_c ] = greedy_ws(N_TB, N_L, x, + y, T_sgned, pi_0, a_bnd, b_bnd, depth, leaf_nodes) + warm_c = np.ones(N_L)*0.5 + # get leaf labels from warm start + leaf_labels = np.ones(N_L)*0.5 + [warm_leaves,leaf_cts] = causal_tree_pred(x, leaf_nodes, warm_a, warm_b, leaf_labels, leaf_lbl=False) + # fill warm_z + for i in range(n): + which_leaf = int(warm_leaves[i])-leaf_nodes[0] + warm_z[i,which_leaf] = 1 + if verbose: + print 'ws: ', warm_a, warm_b, warm_d + print warm_z.sum(axis=0), 'start leaf node counts' + [m_pol,a_pol,b_pol,d_pol, z_pol, c_pol, l_pol, P, lmbda, u, v] = policy_opt_tree_centered_on_p_0(x, + y,t,fq, pi_0, a_bnd, b_bnd, A_L, A_R, lowers, uppers, leaf_nodes, + branch_nodes, L_hat, N, K, eps, y_label, warm_a, warm_b, warm_d, warm_z, warm_c, sharp = sharp, TIME_LIMIT= TIME_LIMIT) + lmbda = lmbda.X + a_pol_ = np.zeros([dim_d, N_TB]) + for i in range(N_TB): + a_pol_[:,i] = np.asarray([ a_pol[p,i+1].X for p in range(dim_d) ]).T + b_pol_ = [ b_pol[i+1].X for i in range(N_TB) ] + leaf_labels = np.asarray([ [c_pol[t_l, t].X for t in range(n_ts) ] for t_l in leaf_nodes ]) +# leaf_labels = [ 0 if c_pol[t_l].X > 0 else 1 for t_l in leaf_nodes ] + if verbose: + print a_pol_, b_pol_ + print ' c assignment: ', [c_pol[t_l,t].X for t in range(n_ts) for t_l in leaf_nodes ] + print ' P assignment: ', [P[i,t].X for t in range(n_ts) for i in range(n) ] + print leaf_labels + print 'leaf nonempty', [l_pol[t_l].X for t_l in leaf_nodes ] + if sum(b_pol_) < 0.001: + print 'empty solution' + + # pickle.dump( {'a': a_pol_,'b': b_pol_, 'leaves': leaf_labels, 'ws-tree': tree}, open('data/out/syn-glb/CR/opt-tree-gam-'+str(gam)+'-rep-'+str(ind_rep+5) + '-'+datetime.now().strftime('%Y-%m-%d-%H-%M-%S')+'.p', 'wb')) + # [Y_pred, leaf_node_cts] = causal_tree_pred(x_test, leaf_nodes, a_pol_, b_pol_,leaf_labels) + # print leaf_node_cts, 'leaf node cts' + # tree_rec = [tree.get_assignment(x_test[i,:]) for i in range(xtest.shape[0]) ] + # print 'sum Y pred: ' + str(sum(Y_pred)) + # print 'sum treerec: ' + str(sum(tree_rec)) + tree_pol = {'a': a_pol_,'b': b_pol_, 'leaves': leaf_labels, 'leaf_nodes': leaf_nodes} + return [tree_pol, lmbda] \ No newline at end of file diff --git a/methods_test.py b/methods_test.py new file mode 100644 index 0000000..94bc36f --- /dev/null +++ b/methods_test.py @@ -0,0 +1,121 @@ +import matplotlib.pyplot as plt +import numpy as np +from gurobipy import * +from scipy.optimize import minimize +import datetime as datetime +import pickle +from matplotlib.backends.backend_pdf import PdfPages +from matplotlib import collections as matcoll +from sklearn import svm +from methods import * +import os +import sys +from unconfoundedness_fns import * +from subgrad import * +from scipy.stats import norm +from scipy.stats import multivariate_normal +from joblib import Parallel, delayed +from data_scenarios import * + +def real_risk_prob(prob_1, x, u): + n = len(u) + prob_1 = np.asarray(prob_1) + return prob_1 * real_risk_(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u) + +N_REPS = int(sys.argv[1]) +arr = sys.argv[2].split(',') +GAMS = [float(gam) for gam in arr] +print(GAMS), 'gammas' + +ENN = 4000 +d = 5 # dimension of x +th_ctrl = np.zeros(d+1) +th_ctrl[-1] = -1000 + +beta_cons = 2.5 +beta_x = np.asarray([0, .5, -0.5, 0, 0, 0]) +beta_x_T = np.asarray([-1.5,1,-1.5,1.,0.5,0]) +beta_T = np.asarray([0, .75, -.5,0,-1,0, 0]) +beta_T_conf = np.asarray([0, .75, -.5,0,-1,0]) +mu_x = np.asarray([-1,.5,-1,0,-1]); + +dgp_params = {'mu_x':mu_x, 'n':ENN, 'beta_cons':beta_cons, 'beta_x':beta_x, 'beta_x_T':beta_x_T} +save_params = {'stump':'sharp-test-synthetic/', 'exp_name': 'testing'} + + +opt_config_robust = {'N_RST': 15, 'GRAD_': get_implicit_grad_centered, 'WGHTS_': opt_wrapper, + 'GRAD_CTR': get_implicit_grad_centered, 'POL_PROB_1': logistic_pol_asgn, + 'POL_GRAD': qk_dpi_dtheta, 'DEFAULT_POL':th_ctrl, + 'BASELINE_POL': ctrl_p_1, 'P_1': ctrl_p_1, 'averaging': True, 'give_initial': True, + 'sharp': True} + +robust_opt_params = {'optimizer': opt_w_restarts, 'pol_opt': 'ogd', + 'unc_set_type': 'interval', 'opt_params': opt_config_robust, + 'BASELINE_POL': th_ctrl, 'type': 'logistic-interval'} + +fdiv_config = {'N_RST':15,'DEFAULT_POL':th_ctrl, + 'rho':0.5, 'WGHTS_':get_general_interval_wghts_algo_centered_TV_prob, 'GRAD_':get_implicit_grad_centered, + 'GRAD_CTR':get_implicit_grad_centered, 'POL_PROB_1':logistic_pol_asgn,'POL_GRAD':qk_dpi_dtheta, + 'BASELINE_POL': ctrl_p_1,'eta_0':0.5, 'averaging':True, 'give_initial':True, 'sharp':True} + +fdiv_robust_opt_params = { 'optimizer':opt_w_restarts, 'pol_opt':'ogd', + 'unc_set_type':'L1-budget', 'opt_params':fdiv_config, + 'BASELINE_POL':th_ctrl, 'type':'logistic-L1-budget-0.5'} + +fdiv_config_025 = {'N_RST':15,'DEFAULT_POL':th_ctrl, + 'rho':0.25, 'WGHTS_':get_general_interval_wghts_algo_centered_TV_prob, 'GRAD_':get_implicit_grad_centered, + 'GRAD_CTR':get_implicit_grad_centered, 'POL_PROB_1':logistic_pol_asgn,'POL_GRAD':qk_dpi_dtheta, + 'BASELINE_POL': ctrl_p_1,'eta_0':0.5, 'averaging':True, 'give_initial':True, 'sharp':True} + +fdiv_robust_opt_params_025 = { 'optimizer':opt_w_restarts, 'pol_opt':'ogd', + 'unc_set_type':'L1-budget', 'opt_params':fdiv_config_025, + 'BASELINE_POL':th_ctrl, 'type':'logistic-L1-budget-0.25'} + +######### +## IPW and tree parameters +opt_config_ipw = {'N_RST':15, 'N_RNDS':50, 'POL_PROB_1':logistic_pol_asgn, 'eta_0':1, 'averaging':True,'DEFAULT_POL':th_ctrl} + +ipw_opt_params = { 'optimizer':opt_w_restarts_vanilla_ipw, 'pol_opt':'IPW', + 'unc_set_type':'interval', 'opt_params':opt_config_ipw, + 'BASELINE_POL':th_ctrl, 'type':'IPW'} + + +methods = ['ogd-interval', 'fdiv-0.5', 'fdiv-0.25'] +method_params = [robust_opt_params, fdiv_robust_opt_params, fdiv_robust_opt_params_025] + + +def gen_data_run_for_gamma_for_joblib_mt(dgp_params, GAMS, real_risk_prob, method_params, ind_rep, gen_data=True, + save=False, save_params=[], already_gen_data=[]): + print ind_rep + if gen_data: + [x_full, u, T_, Y_, true_Q_, q0] = generate_log_data(**dgp_params) + else: + [x_full, u, T_, Y_, true_Q_, q0] = already_gen_data + np.random.seed(ind_rep) + random.seed(ind_rep) + train_ind, test_ind = model_selection.train_test_split(range(len(Y_)), test_size=0.9) + test_data = {'x_test': x_full[test_ind, :], 't_test': T_[test_ind], 'y_test': Y_[test_ind], 'u_test': u[test_ind]} + eval_conf = {'eval': True, 'eval_type': 'true_dgp', 'eval_data': test_data, 'oracle_risk': real_risk_prob_oracle} + ConfRobPols = [ConfoundingRobustPolicy(baseline_pol=ctrl_p_1_mt, save_params=save_params, save = True, verbose=True, treatment_n = 'multiple') for method in + method_params] + for ind, method_param in enumerate(method_params): + ConfRobPols[ind].fit(x_full[train_ind, :], T_[train_ind], Y_[train_ind], q0[train_ind], GAMS, method_param, + eval_conf=eval_conf) + del ConfRobPols[ind].x + del ConfRobPols[ind].t + del ConfRobPols[ind].y + del ConfRobPols[ind].eval_data + + return ConfRobPols + + +res = Parallel(n_jobs=-1, verbose=40)( + delayed(gen_data_run_for_gamma_for_joblib_mt)(dgp_params, GAMS, real_risk_prob, method_params, i, gen_data=True, + save=True, save_params=save_params) for i in range(N_REPS)) + +# delayed(gen_data_run_for_gamma_for_joblib)(dgp_params, GAMS, real_risk_prob, method_params, i, gen_data=True, +# save=True, save_params=save_params) for i in range(N_REPS)) + +pickle.dump(res, open('res-'+save_params['exp_name'] + '.pkl', 'wb')) +RISKS = [[ res[ind][meth].RISKS for ind in range(N_REPS) ] for meth in range(len(method_params)) ] +pickle.dump(RISKS, open('risks_joblib_test_opt_tree_mt_50reps.pkl', 'wb') ) diff --git a/subgrad.py b/subgrad.py new file mode 100644 index 0000000..d6a0356 --- /dev/null +++ b/subgrad.py @@ -0,0 +1,1035 @@ +""" +@author: Angela Zhou +""" +import numpy as np +import gurobipy as gp +from unconfoundedness_fns import * +import matplotlib.pyplot as plt +from scipy.optimize import minimize +from datetime import datetime +import random +import sys + + + +def log_progress(sequence, every=None, size=None, name='Items'): + from ipywidgets import IntProgress, HTML, VBox + from IPython.display import display + + is_iterator = False + if size is None: + try: + size = len(sequence) + except TypeError: + is_iterator = True + if size is not None: + if every is None: + if size <= 200: + every = 1 + else: + every = int(size / 200) # every 0.5% + else: + assert every is not None, 'sequence is iterator, set every' + + if is_iterator: + progress = IntProgress(min=0, max=1, value=1) + progress.bar_style = 'info' + else: + progress = IntProgress(min=0, max=size, value=0) + label = HTML() + box = VBox(children=[label, progress]) + display(box) + + index = 0 + try: + for index, record in enumerate(sequence, 1): + if index == 1 or index % every == 0: + if is_iterator: + label.value = '{name}: {index} / ?'.format( + name=name, + index=index + ) + else: + progress.value = index + label.value = u'{name}: {index} / {size}'.format( + name=name, + index=index, + size=size + ) + yield record + except: + progress.bar_style = 'danger' + raise + else: + progress.bar_style = 'success' + progress.value = index + label.value = "{name}: {index}".format( + name=name, + index=str(index or '?') + ) + +''' get gradient of pi wrt theta for logistic policy +where pi = Pr[pi=1|x] +''' +def qk_dpi_dtheta(pi_1, pol_theta, x): + n = len(pi_1); dg_dz = np.multiply(pi_1,np.ones(len(pi_1))-pi_1) + return np.multiply(dg_dz[:,np.newaxis], x) + #return np.diag(dg_dz).dot( x ).reshape(x.shape) # n x p: enforce jic + + +# """ get gradient of pi wrt theta for logistic policy +# where pi = Pr[pi=1|x] +# """ +# def qk_dpi_dtheta_mt(pi_1, pol_theta, x): +# n = len(pi_1); +# K = pol_theta.shape[1] +# grad = np.zeros(pol_theta.shape) +# for a in range(K): +# dg_dz = np.multiply(pi_1[:,a],np.ones(n)-pi_1[:,a]) +# grad[:,a] = np.dot(dg_dz[:, np.newaxis].T, x) +# return grad + +""" get gradient of pi wrt theta for logistic policy +where pi = Pr[pi=1|x] +""" +def qk_dpi_dtheta_mt_scalar(pi_1, x): + return pi_1*(1-pi_1) * x + +""" +read in callbacks for derivative of pi given theta and optimal w, t +PI_1, POL_GRAD (returns (p x 1) vector) +take in ** normalized weights W ** +""" +def get_implicit_grad_centered(pol_theta, PI_1, POL_GRAD, x, Y, t01, W): + # if need to get active index set + # rescaled weights in original + n = len(W); T_sgned = get_sgn_0_1(t01) + constants = np.multiply(Y, np.multiply(T_sgned,W)) + policy_x = PI_1(pol_theta, x) # 1 x n + dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p + if x.ndim > 1: + return np.multiply(constants[:,np.newaxis], dpi_dtheta).sum(axis=0) + else: + return np.sum(np.multiply(constants,dpi_dtheta)) + +""" +returns gradient for multiple treatments case +read in callbacks for derivative of pi given theta and optimal w, t +theta is a d x K array +PI_1 returns probability pi = Pr[ A_i | x ] under policy +POL_GRAD returns gradient for observed A_i (returns (p x 1) vector) +take in *** normalized weights W *** +""" +def get_implicit_grad_centered_mt(pol_theta, PI_1, POL_GRAD_scalar, x, Y, t01, W): + n = len(W); #t_levels=np.unique(t01) + constants = np.multiply(Y, W) + policy_x = PI_1(pol_theta, x, t01) + # np.asarray([ PI_1(pol_theta[:,k], x) for k in t01 ]).flatten() # k x n + dpi_dtheta = np.asarray([ POL_GRAD_scalar( policy_x[i], x[i,:] ) for i in range(n) ] ) + # call qk_dpi_dtheta for pol_theta[a_i] for each datapoint + K = pol_theta.shape[1] + grad = np.zeros(pol_theta.shape) + for a in range(K): + if x.ndim > 1: + grad[:,a] = np.multiply(constants[(t01 == a), np.newaxis], dpi_dtheta[t01 == a,:]).sum(axis=0) + else: + grad[:,a] = np.sum(np.multiply(constants[t01 == a], dpi_dtheta[t01 == a])) + # print grad.shape + return grad + +""" +find value of centered estimator, evaluated against a benchmark policy which assigns +Pi(x) = 1 w.p p_1 for all x +""" +def centered_around_p1(a_bnd, b_bnd, Y_T, pi_1, p_1): + return find_opt_robust_ipw_val(np.multiply(Y_T, (pi_1 - p_1)), a_bnd, b_bnd, shorter=True) + + +def plot_W_GDS(p_ths, W_GDs): + plot(p_ths, W_GDs[:,0]) + for i in range(len(p_ths)): + plot([p_ths[i]-0.5, p_ths[i]+0.5], [W_GDs[i,0]-W_GDs[i,1]*0.5, W_GDs[i,0]+W_GDs[i,1]*0.5], c='b',alpha=0.1) + +""" test gradient fn for th, given vector of assignments p_1 +""" +def test_subgrad_for_th(p_th, p_1, PI_1, POL_GRAD, x, y, t01): + n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01); + [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd) + grad = get_implicit_grad_centered(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum()) + return [lda_opt,grad] + +""" test gradient fn for th, regret against the anti-policy -Pi +""" +def test_subgrad_for_anti(p_th, p_1, PI_1, POL_GRAD, x, y, t01): + n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01); + [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd) + grad = get_implicit_grad_centered_anti_pi(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum()) + return [lda_opt,grad] + + + +""" centered problem with weights +assume given pi_1 vector +max (pi_1 - p_1)YT W +""" +def get_general_interval_wghts_algo_centered_TV_prob(gamma, Y, a_, b_, fq, quiet=True): + wm = 1/fq; wm_sum=wm.sum(); n = len(Y) + wm = wm/wm_sum # normalize propensities + # assume estimated propensities are probs of observing T_i + y = Y; weights = np.zeros(n); + m = gp.Model() + if quiet: m.setParam("OutputFlag", 0) + t = m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) + w = [m.addVar(obj = -yy, lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y] + d = [m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y] + m.update() + m.addConstr(gp.quicksum(w)==1) + m.addConstr(gp.quicksum(d)<=gamma*t) + for i in range(len(y)): + m.addConstr(w[i] <= b_[i] * t/wm_sum) + m.addConstr(w[i] >= a_[i] * t/wm_sum) + m.addConstr(d[i] >= w[i] - t*wm[i]) + m.addConstr(d[i] >= - w[i] + t*wm[i]) + m.optimize() + wghts = np.asarray([ ww.X for ww in w ]) # would like to have failsafe for not being able to optimize + return [-m.ObjVal,wghts,t.X/wm_sum] + +""" +read in callbacks for derivative of pi given theta and optimal w, t +Y(Pi) - Y(-Pi) +PI_1, POL_GRAD (returns (p x 1) vector) +take in ** normalized weights W ** +#### +DEPRECATED beceause slow/memory using +Use get_implicit_grad_centered instead +""" +# def get_implicit_grad_centered_anti_pi(pol_theta, PI_1, POL_GRAD, x, Y, t01, W): +# n = len(W); T_sgned = get_sgn_0_1(t01) +# dc_dpi = np.diag(2*Y*T_sgned) +# policy_x = PI_1(pol_theta, x) # 1 x n +# dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p +# return dc_dpi.dot(dpi_dtheta).T.dot(W) #! double check + +def logistic_pol_asgn(theta, x): + ''' Requires an intercept term + ''' + n = x.shape[0] + theta = theta.flatten() + if len(theta) == 1: + logit = np.multiply(x, theta).flatten() + else: + logit = np.dot(x, theta).flatten() + LOGIT_TERM_POS = np.ones(n)*1.0 / ( np.ones(n) + np.exp( -logit )) + return LOGIT_TERM_POS +''' wrapper to align arguments with the budgeted case +''' +def opt_wrapper(gamma, y, a_bnd, b_bnd, fq): + return find_opt_weights_shorter(y, a_bnd, b_bnd) + +def anti_p_1(pi_1): + return np.ones(len(pi_1))-pi_1 # anti policy +def ctrl_p_1(pi_1): + return np.zeros(len(pi_1)) +def tmnt_p_1(pi_1): + return np.ones(len(pi_1)) + +''' +input: arbitrary policy vector (to get shape) +return 1 where treatment pattern is 0 (control +''' +def ctrl_p_1_mt(x,t01): + # treatment probs 0 + pi_0 = np.zeros(x.shape[0]) + pi_0[t01==0] = 1 # control treatment with probablity 1 + return pi_0 + + + + +""" subgrad descent template algo +automatically augments data ! +take in theta_0, # rounds +WGHTS_: fn obtaining optimal weights +GRAD_: fn to obtain parametric subgradient +POL_GRAD: gradient of parametrized policy wrt parameters +PI_1: return prob of pi(x) = 1 +p_1: pi_0 probability of t = 1 +""" +def grad_descent(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t01, fq, y, + a_, b_, gamma,eta_0,logging=False,step_schedule=0.5,linesearch=True): + n = x.shape[0]; t = get_sgn_0_1(t01) + assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_]) + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + # x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; + + def opt_wrapper_here(th):#linesearch wrapper + pi_1 = PI_1(th, x_aug).flatten(); + p_1 = P_1(pi_1); + [lda_opt, wghts, wghts_sum] = WGHTS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq) + return lda_opt + + for k in range(N_RNDS) : + # print k; sys.stdout.flush() + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule); eta_t_og = eta_t + pi_1 = PI_1(th, x_aug).flatten(); + p_1 = P_1(pi_1); + [lda_opt, wghts, wghts_sum] = WGHTS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq) + subgrad = GRAD_(th, PI_1, POL_GRAD, x_aug, y, t, wghts*1.0 / wghts_sum ) + if (linesearch==True): # Armijo LineSearch + # print 'ls'; sys.stdout.flush() + ALS = ArmijoLineSearch(tfactor = 0.2) + d = -subgrad + slope = np.dot(subgrad, d) + eta_t = ALS.search(opt_wrapper_here, th, d, slope, f = lda_opt) + # print eta_t, 'eta_t'; sys.stdout.flush() + if eta_t is not None: + th = th - eta_t * subgrad + else: + th = th - eta_t_og * subgrad +# oosrisks[k] = np.mean( PI_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] ); + THTS[k] = th; PSTARS[k,:] = wghts.flatten(); losses[k] = lda_opt + return [losses, THTS, PSTARS] + +""" subgrad descent template algo +This is the sharp version +which computes weights separately in each T=1, T=-1 component +automatically augments data ! +take in theta_0, # rounds +WGHTS_: fn obtaining optimal weights +This weight function is overloaded to accommodate weight functions that take in extra parameters +about the uncertainty set +GRAD_: fn to obtain parametric subgradient +POL_GRAD: gradient of parametrized policy wrt parameters +PI_1: return prob of pi(x) = 1 +p_1: pi_0 probability of t = 1 +""" +def grad_descent_sharp(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t01, fq, y, + a_, b_, gamma,eta_0,logging=False,step_schedule=0.5,linesearch=True): + n = x.shape[0]; t = get_sgn_0_1(t01) + t_levels = np.unique(t01) + assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_]) + # Check if x data contains an intercept: only retain for backwards compatibility: + # If last column is all ones, don't augment + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; + if not hasattr(gamma, "__len__"): + gammas = gamma * np.ones(len(t_levels)) + else: + gammas = gamma + def opt_wrapper_here(th):#linesearch wrapper + pi_1 = POL_PROB_1(th, x_aug).flatten(); + p_1 = BASELINE_POL(pi_1); + [lda_opt_neg1, wghts_neg1, wghts_sum_neg1] = WGHTS_(gammas[0], np.multiply(-1*y[t == -1], pi_1[t == -1] - p_1[t == -1]), a_[t == -1], b_[t == -1], fq[t == -1]) + [lda_opt_1, wghts_1, wghts_sum_1] = WGHTS_(gammas[1], np.multiply(y[t == 1], pi_1[t == 1] - p_1[t == 1]), a_[t == 1], b_[t == 1], fq[t == 1]) + + wghts_total[t==1] = wghts_1*1.0 / wghts_sum_1; wghts_total[t==-1] = wghts_neg1*1.0 / wghts_sum_neg1; + + return lda_opt_1 + lda_opt_neg1 + + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule); eta_t_og = eta_t + pi_1 = POL_PROB_1(th, x_aug).flatten(); + p_1 = BASELINE_POL(pi_1); + # Modification for sharpness: Call the weight subroutine within treated and untreated groups separately + wghts_total = np.zeros(len(t)); subgrad_total = np.zeros(len(t)) + # compute for T=1 + #TODO: modify for general case, mult T + [lda_opt_1, wghts_1, wghts_sum_1] = WGHTS_(gammas[1], np.multiply(y[t == 1], pi_1[t == 1] - p_1[t == 1]), a_[t == 1], b_[t == 1], fq[t == 1]) + # compute for T=-1 + [lda_opt_neg1, wghts_neg1, wghts_sum_neg1] = WGHTS_(gammas[0], np.multiply(-1*y[t == -1], pi_1[t == -1] - p_1[t == -1]), a_[t == -1], b_[t == -1], fq[t == -1]) + wghts_total[t==1] = wghts_1*1.0 / wghts_sum_1; wghts_total[t==-1] = wghts_neg1*1.0 / wghts_sum_neg1; + subgrad = GRAD_(th, POL_PROB_1, POL_GRAD, x_aug, y, t, wghts_total*1.0 / np.sum(wghts_total) ) + lda_opt = lda_opt_1 + lda_opt_neg1 + if (linesearch==True): # Armijo LineSearch + ALS = ArmijoLineSearch(tfactor = 0.2, default = eta_t) + d = -subgrad + slope = np.dot(subgrad, d) + eta_t = ALS.search(opt_wrapper_here, th, d, slope, f = lda_opt) + # print eta_t, 'eta_t'; sys.stdout.flush() + if eta_t is not None: + th = th - eta_t * subgrad + else: + th = th - eta_t_og * subgrad +# oosrisks[k] = np.mean( POL_PROB_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] ); + THTS[k] = th; PSTARS[k,:] = wghts_total.flatten(); losses[k] = lda_opt + return [losses, THTS, PSTARS] + + + +# random restarts +''' refactored signature +anti_p_1 -> pi_0 +''' +def opt_w_restarts_rf(N_RST, th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, pi_0, +X, T, Y, q0, a_bnd, b_bnd, gamma, +eta_0,logging=False,step_schedule=0.5, averaging = False, give_initial=False): + ls = np.zeros(N_RST); ths = [None] *N_RST + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + random.seed(j) + if not give_initial: + th_0 = np.random.randn(x.shape[1]+1); + else: + th_0 = th + [oosrisks, losses, THTS, PSTARS] = grad_descent(th_0, N_RNDS, WGHTS_, + GRAD_, POL_GRAD, PI_1, pi_0, X, T, Y, q0, a_bnd, b_bnd, gamma, + eta_0,step_schedule) + if averaging: #average losses: OGD rule + ls[j] = np.mean(losses); ths[j] = sum(THTS)/len(THTS) + else: + # return the best so far, not last + best_so_far = np.argmin(losses) + ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + +# random restarts +def opt_w_restarts(N_RST, th, + N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, # specify policy functions, gradients, weight function s + x, t, fq, y, a_bnd, b_bnd, gamma, eta_0, # give data + logging=False, step_schedule=0.5, averaging = False, give_initial=False, sharp = False, **kwargs): # other opt settings + ls = np.zeros(N_RST); ths = [None] *N_RST + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + random.seed(j) + if (give_initial) and (j==0): + th_0 = th + else: + th_0 = np.random.randn(x.shape[1])*0.25; + if sharp: + # print 'using sharp estimator' + [losses, THTS, PSTARS] = grad_descent_sharp(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule) + else: + [losses, THTS, PSTARS] = grad_descent(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule) + if averaging: #average losses: OGD rule + ls[j] = np.mean(losses); ths[j] = sum(THTS)/len(THTS) + else: + best_so_far = np.argmin(losses) # return the best so far, not last + ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + if logging: + print ls, 'opt losses' + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + + +""" Multiple treatments, optimize with restarts +""" +def opt_w_restarts_mt(N_RST, th, + N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, # specify policy functions, gradients, weight function s + x, t, fq, y, a_bnd, b_bnd, gamma, eta_0, # give data + logging=False, step_schedule=0.5, averaging = False, give_initial=False, sharp = False, **kwargs): # other opt settings + ls = np.zeros(N_RST); ths = [None] *N_RST + n_ts = len(np.unique(t)) + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + random.seed(j) + + if (give_initial) and (j==0): + th_0 = th + else: + th_0 = np.random.randn(x.shape[1],n_ts)*0.25; + + [losses, THTS, PSTARS] = grad_descent_sharp_mt(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule) + + if averaging: #average losses: OGD rule + ls[j] = np.mean(losses); ths[j] = sum(THTS)/len(THTS) + else: + best_so_far = np.argmin(losses) # return the best so far, not last + ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + if logging: + print ls, 'opt losses' + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + +""" logistic pol assign observed treatment A_i +""" + + +""" +subgrad descent template algo +This is the sharp version for multiple treatments +which computes weights separately in each T=1, T=-1 component +automatically augments data ! +take in theta_0, # rounds +WGHTS_: fn obtaining optimal weights +This weight function is overloaded to accommodate weight functions that take in extra parameters +about the uncertainty set +GRAD_: fn to obtain parametric subgradient +POL_GRAD: gradient of parametrized policy wrt parameters +POL_PROB_A: return prob of pi(x) = A_i (for observed A_i) +BASELINE_POL: pi_0 probability of pi_0 = A_i (for observed A_i) +""" +def grad_descent_sharp_mt(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_A, BASELINE_POL, x, t01, fq, y, + a_, b_, gamma,eta_0,logging=False,step_schedule=0.5,linesearch=False): + n = x.shape[0]; #t = get_sgn_0_1(t01) + t_levels = np.unique(t01) + assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_]) + # Check if x data contains an intercept: only retain for backwards compatibility: + # If last column is all ones, don't augment + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; + if not hasattr(gamma, "__len__"): # Default to repeating gamma if not specified + gammas = gamma * np.ones(len(t_levels)) + else: + gammas = gamma + def opt_wrapper_here(th):#linesearch wrapper + pi_1 = POL_PROB_A(th, x_aug, t01).flatten(); + p_1 = BASELINE_POL(pi_1, t01); + wghts_total = np.zeros(len(t01)); lda_opt_total = 0 + for ind,t_l in range(t_levels): + [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]), a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l]) + wghts_total[t01==t_l] = wghts*1.0 / wghts_sum; + lda_opt_total += lda_opt + return lda_opt_total + + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule); eta_t_og = eta_t + pi_1 = POL_PROB_A(th, x_aug, t01).flatten(); + p_1 = BASELINE_POL(pi_1, t01); + # Modification for sharpness: + # Call the weight subroutine within each treatment partition separately + wghts_total_norm = np.zeros(len(t01)); + lda_opt_total = 0 + #TODO: modify for general case, mult T + for ind,t_l in enumerate(t_levels): + # print 'pi_1', pi_1 + # print pi_1[t01 == t_l] + # print 'th', th + # print p_1[t01 == t_l] + [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]), a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l]) + wghts_total_norm[t01==t_l] = wghts*1.0 / wghts_sum; + lda_opt_total += lda_opt + + subgrad = GRAD_(th, POL_PROB_A, POL_GRAD, x_aug, y, t01, wghts_total_norm ) + + if (linesearch==True): # Armijo LineSearch + ALS = ArmijoLineSearch(tfactor = 0.2, default = eta_t) + d = -subgrad + slope = np.dot(subgrad, d) + eta_t = ALS.search(opt_wrapper_here, th, d, slope, f = lda_opt_total) + if eta_t is not None: + th = th - eta_t * subgrad + else: + th = th - eta_t_og * subgrad + THTS[k] = th; PSTARS[k,:] = wghts_total_norm.flatten(); losses[k] = lda_opt_total + return [losses, THTS, PSTARS] + +''' objective evaluation for multiple treatments +''' +def mt_obj_eval(th, *args): # linesearch wrapper + [WGHTS_, GRAD_, POL_GRAD, POL_PROB_A, BASELINE_POL, x, t01, fq, y, a_, b_, gamma] = args + t_levels = np.unique(t01) + if not hasattr(gamma, "__len__"): # Default to repeating gamma if not specified + gammas = gamma * np.ones(len(t_levels)) + else: + gammas = gamma + n = x.shape[0]; #t = get_sgn_0_1(t01) + assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_]) + # Check if x data contains an intercept: only retain for backwards compatibility: + # If last column is all ones, don't augment + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + pi_1 = POL_PROB_A(th, x_aug, t01).flatten(); + p_1 = BASELINE_POL(pi_1, t01); + wghts_total = np.zeros(len(t01)); + lda_opt_total = 0 + for ind, t_l in enumerate(t_levels): + [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], + np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]), + a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l]) + wghts_total[t01 == t_l] = wghts * 1.0 / wghts_sum; + lda_opt_total += lda_opt + return lda_opt_total + +''' gradient evaluation for multiple treatments +''' +def mt_grad_eval(th, *args): # linesearch wrapper + [WGHTS_, GRAD_, POL_GRAD, POL_PROB_A, BASELINE_POL, x, t01, fq, y, a_, b_, gamma] = args + n = x.shape[0]; #t = get_sgn_0_1(t01) + t_levels = np.unique(t01) + if not hasattr(gamma, "__len__"): # Default to repeating gamma if not specified + gammas = gamma * np.ones(len(t_levels)) + else: + gammas = gamma + assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_]) + # Check if x data contains an intercept: only retain for backwards compatibility: + # If last column is all ones, don't augment + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + wghts_total_norm = np.zeros(len(t01)); + lda_opt_total = 0 + # TODO: modify for general case, mult T + pi_1 = POL_PROB_A(th, x_aug, t01).flatten(); + p_1 = BASELINE_POL(pi_1, t01); + for ind, t_l in enumerate(t_levels): + [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], + np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]), + a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l]) + wghts_total_norm[t01 == t_l] = wghts * 1.0 / wghts_sum; + lda_opt_total += lda_opt + subgrad = GRAD_(th, POL_PROB_A, POL_GRAD, x_aug, y, t01, wghts_total_norm) + return subgrad + +# def grad_descent_vanilla_ipw(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5): +def grad_descent_vanilla_ipw(th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5): + n = x.shape[0]; + assert all(len(arr) == n for arr in [x,t,fq,y]) + # If last column is all ones, don't augment + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS) + fq_norm = fq/np.sum(fq) + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3); + pi_1 = POL_PROB_1(th, x_aug).flatten(); + pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] ) + loss = np.sum( y*pi_t/fq_norm ) + pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug) + pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] ) + subgrad = (y/fq_norm).dot(pi_t_grad).T + th = th - eta_t * subgrad + THTS[k] = th; losses[k] = loss + return [oosrisks, losses, THTS, PSTARS] + +# random restarts +def opt_w_restarts_vanilla_ipw(N_RST, th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5,**params): + ls = np.zeros(N_RST); ths = [None] *N_RST + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + # assume data has intercept + th_0 = np.random.randn(x.shape[1]); + [oosrisks, losses, THTS, PSTARS] = grad_descent_vanilla_ipw(th_0, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule) + ls[j]=losses[-1]; ths[j] = THTS[-1] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + +''' linear CATE projection +''' +def grad_descent_linearproj_CATE(th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5): + n = x.shape[0]; + assert all(len(arr) == n for arr in [x,t,fq,y]) + # If last column is all ones, don't augment + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS) + fq_norm = fq/np.sum(fq) + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3); + pi_1 = POL_PROB_1(th, x_aug).flatten(); + # pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] ) + loss = np.sum( pi_1 * y ) # assume y is CATE estimate + pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug) + # pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] ) + subgrad = (y).dot(pi_1_grad).T + th = th - eta_t * subgrad + THTS[k] = th; losses[k] = loss + return [oosrisks, losses, THTS, PSTARS] + + +def opt_w_restarts_generic(GRAD_DESCENT, N_RST, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5,**params): + ls = np.zeros(N_RST); ths = [None] *N_RST + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + # assume data has intercept + th_0 = np.random.randn(x.shape[1]); + [oosrisks, losses, THTS, PSTARS] = GRAD_DESCENT(th_0, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule) + ls[j]=losses[-1]; ths[j] = THTS[-1] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + + +# def grad_descent_vanilla_ipw(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5): +def grad_descent_vanilla_ipw_centered(th, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5): + n = x.shape[0]; + assert all(len(arr) == n for arr in [x,t,fq,y]) + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS) + fq_norm = fq #/np.sum(fq) + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3); + pi_1 = PI_1(th, x_aug).flatten(); + p_1 = P_1(pi_1); + pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] ) + loss = np.sum( y*(pi_t-p_1)/fq_norm ) + pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug) + pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] ) + subgrad = (y/fq_norm).dot(pi_t_grad).T + th = th - eta_t * subgrad + THTS[k] = th; losses[k] = loss + return [oosrisks, losses, THTS, PSTARS] + +def grad_descent_policynorm_ipw(th, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5): + n = x.shape[0]; + assert all(len(arr) == n for arr in [x,t,fq,y]) + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS) + fq_norm = fq#/np.sum(fq) + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3); + pi_1 = PI_1(th, x_aug).flatten(); + pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] ) + policy_norm_denom = np.sum( pi_t / fq_norm ) + loss = np.sum( y*pi_t/fq_norm ) / policy_norm_denom # self normalized version + pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] ) + subgrad = (y/fq_norm).dot(pi_t_grad).T / policy_norm_denom - (loss/policy_norm_denom)* (1/fq_norm).dot(pi_t_grad).T + th = th - eta_t * subgrad + THTS[k] = th; losses[k] = loss + return [oosrisks, losses, THTS, PSTARS] +# random restarts +def opt_w_restarts_vanilla_ipw_centered(N_RST, th, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0, +logging=False,step_schedule=0.5,normalized_policy=False): + ls = np.zeros(N_RST); ths = [None] *N_RST + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + th_0 = np.random.randn(x.shape[1]+1); + if normalized_policy: + [oosrisks, losses, THTS, PSTARS] = grad_descent_policynorm_ipw(th_0, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule) + else: + [oosrisks, losses, THTS, PSTARS] = grad_descent_vanilla_ipw(th_0, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule) + ls[j]=losses[-1]; ths[j] = THTS[-1] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + + +# random restarts +def opt_w_restarts_sc_min(N_RST, FN_, JAC_, d, args_): + ls = np.zeros(N_RST); ths = [None] * N_RST + for j in range(N_RST): + th_0 = np.random.randn(d+1); + res = minimize(FN_, th_0, jac = JAC_, method='L-BFGS-B', args = tuple(args_),options={'disp': True}) + ls[j]=FN_(res.x, *args_); ths[j] = res.x + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + + +""" general template algorithm +""" +def grad_descent_template(th, N_RNDS, LOSS_, GRAD_, POL_GRAD, PI_1, P_1, x, t01, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5): + n = x.shape[0]; t = get_sgn_0_1(t01) + assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_]) + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS) + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3); + pi_1 = PI_1(th, x_aug).flatten(); + p_1 = P_1(pi_1); + loss = LOSS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq) + subgrad = GRAD_(th, PI_1, POL_GRAD, x_aug, y, t, wghts*1.0 / wghts_sum ) + th = th - eta_t * subgrad +# oosrisks[k] = np.mean( PI_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] ); + THTS[k] = th; PSTARS[k,:] = wghts.flatten(); losses[k] = lda_opt + if k > 0: + if np.isclose(lda_opt, losses[k-1], atol = 0.0001): + return [oosrisks, losses[0:k], THTS[0:k], PSTARS] + return [oosrisks, losses, THTS, PSTARS] + + +#### Functions suitable for use with lbfgs +def opt_wrapper_sc(th, *args): + C = 0.05 + PI_1 = args[0]; x_aug=args[1]; y=args[2]; t_sgned=args[3]; fq = args[4]; P_1 = args[5]; a_bnd = args[6]; b_bnd = args[7]; n = len(y) + pi_1 = PI_1(th, x_aug).flatten(); p_1 = P_1(pi_1); + [lda_opt, wghts, wghts_sum]= find_opt_weights_shorter(np.multiply(y*t_sgned, pi_1 - p_1), a_bnd, b_bnd) + return lda_opt + C*np.linalg.norm(th,2)**2 + +def get_implicit_grad_centered_sc(th, *args): + C = 0.05 + PI_1 = args[0]; x_aug=args[1]; y=args[2]; t_sgned=args[3]; fq = args[4]; P_1 = args[5]; a_bnd = args[6]; b_bnd = args[7]; POL_GRAD = args[8]; n = len(y) + pi_1 = PI_1(th, x_aug) # 1 x n + p_1 = P_1(pi_1); + [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t_sgned, pi_1 - p_1), a_bnd, b_bnd) + W = wghts / wghts_sum + # more efficiently compute constants: + constants = np.multiply(y, np.multiply(t_sgned,W)) + policy_x = PI_1(th, x_aug) # 1 x n + dpi_dtheta = POL_GRAD(policy_x, th, x_aug) # n x p + if x_aug.ndim > 1: + return np.multiply(constants[:,np.newaxis], dpi_dtheta).sum(axis=0) + else: + return np.sum(np.multiply(constants,dpi_dtheta)) + +def get_implicit_grad_centered_anti_pi_sc(th, *args): + C = 0.05 + PI_1 = args[0]; x_aug=args[1]; y=args[2]; t_sgned=args[3]; fq = args[4]; P_1 = args[5]; a_bnd = args[6]; b_bnd = args[7]; POL_GRAD = args[8]; n = len(y) + pi_1 = PI_1(th, x_aug) # 1 x n + p_1 = P_1(pi_1); + [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t_sgned, pi_1 - p_1), a_bnd, b_bnd) + W = wghts / wghts_sum + dc_dpi = np.diag(2*y*t_sgned) + dpi_dtheta = POL_GRAD(pi_1, th, x_aug) # n x p + return dc_dpi.dot(dpi_dtheta).T.dot(W) + 2*C*th #! double check + +""" gamma infinity subgradient descent +""" +def grad_descent_gam_inf(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t01, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5): + n = x.shape[0]; t = get_sgn_0_1(t01) + assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_]) + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS) + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule); + pi_1 = PI_1(th, x_aug).flatten(); + p_1 = P_1(pi_1); + options = np.multiply(y*t, pi_1 - p_1) + lda_opt = np.max( options ); best_ind = np.argmax( options ) + # [lda_opt, wghts, wghts_sum] = WGHTS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq) + subgrad = y[best_ind]*t[best_ind] * pi_1[best_ind] * (1 - pi_1[best_ind]) * x_aug[best_ind, :] + th = th - eta_t * subgrad +# oosrisks[k] = np.mean( PI_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] ); + THTS[k] = th; + # PSTARS[k,:] = wghts.flatten(); + losses[k] = lda_opt + return [oosrisks, losses, THTS, PSTARS] + +def opt_w_restarts_gam_inf(N_RST, th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, anti_p_1, x, t_sgned, fq, y, a_bnd, b_bnd, gamma,eta_0,logging=False,step_schedule=0.5): + ls = np.zeros(N_RST); ths = [None] *N_RST + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + th_0 = np.random.randn(x.shape[1]+1); + [oosrisks, losses, THTS, PSTARS] = grad_descent_gam_inf(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, anti_p_1, x, t_sgned, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule) + # return the best so far, not last + best_so_far = np.argmin(losses) + ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + +''' +Evaluate loss of policy th under different bounds a_, b_ (corresponding to a different value of Gamma) +PI_1 policy prob: return probability pi(x) = 1 +p_1: baseline probability +a_, b_: bounds corresponding to Gamma being assessed (e.g. Un) +gamma: effective gamma bound (no-op for unbudgeted uncertainty set) +''' +def Rbar(th, x, t, y, POL_PROB_1, BASELINE_POL, a_bnd, b_bnd, fq, gamma = 0, WGHTS_ = opt_wrapper, **kwargs ): + pi_1 = np.asarray(POL_PROB_1(th, x)).flatten() + t_levels = np.unique(t) + p_1 = BASELINE_POL(pi_1) + # old # [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter( np.multiply(y*t, pi_1 - p_1), a_, b_) + if not hasattr(gamma, "__len__"): + gammas = gamma * np.ones(len(t_levels)) + else: + gammas = gamma + [lda_opt_neg1, wghts_neg1, wghts_sum_neg1] = WGHTS_(gammas[0],np.multiply(-1*y[t == -1], pi_1[t == -1] - p_1[t == -1]), a_bnd[t == -1], b_bnd[t == -1], fq[t == -1]) + [lda_opt_1, wghts_1, wghts_sum_1] = WGHTS_(gammas[1], np.multiply(y[t == 1], pi_1[t == 1] - p_1[t == 1]), a_bnd[t == 1], b_bnd[t == 1], fq[t == 1]) + return lda_opt_1 + lda_opt_neg1 + +''' +Evaluate loss of policy th under different bounds a_, b_ (corresponding to a different value of Gamma) +PI_1 policy prob: return probability pi(x) = 1 +p_1: baseline probability (check in case handed a n x m array) +a_, b_: bounds corresponding to Gamma being assessed (e.g. Un) +gamma: effective gamma bound (no-op for unbudgeted uncertainty set) +''' +def Rbar_mt(th, x, t, y, POL_PROB_1, BASELINE_POL, a_bnd, b_bnd, fq, gamma = 0, WGHTS_ = opt_wrapper, **kwargs ): + pi_1 = np.asarray(POL_PROB_1(th, x, t)).flatten() + t_levels = np.unique(t) + p_1 = BASELINE_POL(pi_1, t) + if len(np.asarray(pi_1).shape) > 1: + p_1 = [ p_1[i,t[i]] for i in range(x.shape[0]) ] # project n x m array to a vector based on observed teratment assignment + # old # [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter( np.multiply(y*t, pi_1 - p_1), a_, b_) + if not hasattr(gamma, "__len__"): + gammas = gamma * np.ones(len(t_levels)) + else: + gammas = gamma + lda_opt_total = 0 + for ind,t_l in enumerate(t_levels): + [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], np.multiply(y[t == t_l], pi_1[t == t_l] - p_1[t == t_l]), a_bnd[t == t_l], b_bnd[t == t_l], fq[t == t_l]) + lda_opt_total += lda_opt + return lda_opt_total + + + +''' vanilla uncentered ipw +extra args: PI_1, x (with intercept), y, t, fq, +''' +def vanilla_ipw(th,*args): + C = 0.05 + PI_1 = args[0]; x_aug=args[1]; y=args[2]; t=args[3]; fq = args[4]; n = len(y) + fq_norm = fq/np.sum(fq) + pi_1 = PI_1(th, x_aug).flatten(); + pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] ) + return np.sum( y*pi_t/fq_norm ) + C*np.linalg.norm(th,2)**2 +''' vanilla uncentered ipw +extra args: PI_1, x (with intercept), y, t, fq, +''' +def vanilla_ipw_subgrad(th,*args): + C = 0.05 + PI_1 = args[0]; x_aug=args[1]; y=args[2]; t=args[3]; fq = args[4]; n = len(y) + fq_norm = fq/np.sum(fq) + pi_1 = PI_1(th, x_aug).flatten(); + pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug) + pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] ) + subgrad = (y/fq_norm).dot(pi_t_grad).T + 2*C*th + return subgrad + +def grad_descent_vanilla_ipw_mt(th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5): + n = x.shape[0]; + assert all(len(arr) == n for arr in [x,t,fq,y]) + # If last column is all ones, don't augment + if (x[:,-1] == np.ones(n)).all(): + x_aug = x + else: # otherwise augment data + x_aug = np.hstack([x, np.ones([n,1])]); + risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS) + fq_norm = fq/np.sum(fq) + for k in range(N_RNDS) : + eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3); + pi_1 = POL_PROB_1(th, x_aug).flatten(); + pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] ) + loss = np.sum( y*pi_t/fq_norm ) + pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug) + pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] ) + subgrad = (y/fq_norm).dot(pi_t_grad).T + th = th - eta_t * subgrad + THTS[k] = th; losses[k] = loss + return [oosrisks, losses, THTS, PSTARS] + +# random restarts +def opt_w_restarts_vanilla_ipw_mt(N_RST, th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5,**params): + ls = np.zeros(N_RST); ths = [None] *N_RST + iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST) + for j in iterator: + # assume data has intercept + th_0 = np.random.randn(x.shape[1]); + [oosrisks, losses, THTS, PSTARS] = grad_descent_vanilla_ipw(th_0, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule) + ls[j]=losses[-1]; ths[j] = THTS[-1] + if logging: + plt.plot(range(N_RNDS), losses) + plt.pause(0.05) + return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss + + +# https://github.com/funsim/moola/blob/master/moola/linesearch/dcsrch_fortran/linesearch.py +# armijo linesearch +class LineSearch: + """ + A generic linesearch class. Most methods of this + class should be overridden by subclassing. + """ + + def __init__(self, **kwargs): + self._id = 'Generic Linesearch' + return + + def _test(self, func, x, d, slope, f = None, t = 1.0, **kwargs): + """ + Given a descent direction d for function func at the + current iterate x, see if the steplength t satisfies + a specific linesearch condition. + Must be overridden. + """ + return True # Must override + + def search(self, func, x, d, slope, f = None, **kwargs): + """ + Given a descent direction d for function func at the + current iterate x, compute a steplength t such that + func(x + t * d) satisfies a linesearch condition + when compared to func(x). The value of the argument + slope should be the directional derivative of func in + the direction d: slope = f'(x;d) < 0. If given, f should + be the value of func(x). If not given, it will be evaluated. + func can point to a defined function or be a lambda function. + For example, in the univariate case:: + test(lambda x: x**2, 2.0, -1, 4.0) + """ + # return to default stepsize if not a descent dir (due to finite differences and nonconvex opt) + t = 1.0 + if slope >= 0.0: + return t + while not self._test(func, x, d, f = f, t = t, **kwargs): + pass + return t + + +class ArmijoLineSearch(LineSearch): + """ + An Armijo linesearch with backtracking. This class implements the simple + Armijo test + f(x + t * d) <= f(x) + t * beta * f'(x;d) + where 0 < beta < 1/2 and f'(x;d) is the directional derivative of f in the + direction d. Note that f'(x;d) < 0 must be true. + :keywords: + :beta: Value of beta (default 0.001) + :tfactor: Amount by which to reduce the steplength + during the backtracking (default 0.5). + """ + + def __init__(self, **kwargs): + LineSearch.__init__(self, **kwargs) + self.beta = max(min(kwargs.get('beta', 1.0e-4), 0.5), 1.0e-10) + self.tfactor = max(min(kwargs.get('tfactor', 0.1), 0.999), 1.0e-3) + self.default = kwargs.get('default') + return + + def _test(self, func, x, d, slope, f = None, t = 1.0, **kwargs): + """ + Given a descent direction d for function func at the + current iterate x, see if the steplength t satisfies + the Armijo linesearch condition. + """ + if f is None: + f = func(x) + f_plus = func(x + t * d) + return (f_plus <= f + t * self.beta * slope) + + def search(self, func, x, d, slope, f = None, **kwargs): + """ + Given a descent direction d for function func at the + current iterate x, compute a steplength t such that + func(x + t * d) satisfies the Armijo linesearch condition + when compared to func(x). The value of the argument + slope should be the directional derivative of func in + the direction d: slope = f'(x;d) < 0. If given, f should + be the value of func(x). If not given, it will be evaluated. + func can point to a defined function or be a lambda function. + For example, in the univariate case: + `test(lambda x: x**2, 2.0, -1, 4.0)` + """ + if f is None: + f = func(x) + + t = 1.0 + if slope >= 0.0: + return t # don't linesearch if not a descent direction + iterations = 0 + while not self._test(func, x, d, slope, f = f, t = t, **kwargs): + t *= self.tfactor + # print iterations; sys.stdout.flush() + iterations += 1 + if iterations >= 20: + return self.default + # raise ValueError("line search doesn't terminate") + # return None + return t diff --git a/unconfoundedness_fns.py b/unconfoundedness_fns.py new file mode 100644 index 0000000..c37854b --- /dev/null +++ b/unconfoundedness_fns.py @@ -0,0 +1,676 @@ +""" +@author: Angela Zhou +""" +import numpy as np +from random import sample +import math +import cvxpy as cvx +import matplotlib.pyplot as plt +from scipy.optimize import minimize +# from matplotlib import collections as matcoll +from sklearn import svm +from scipy.integrate import quad +from sklearn import linear_model +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression +from scipy.stats import norm +from scipy import integrate +# from sympy import mpmath as mp + + + + +''' +Helper functions for estimating propensities +''' +def estimate_prop(x, T, predict_x, predict_T): + clf_dropped = LogisticRegression() + clf_dropped.fit(x, T) + est_prop = clf_dropped.predict_proba(predict_x) + est_Q = np.asarray( [est_prop[k,1] if predict_T[k] == 1 else est_prop[k,0] for k in range(len(predict_T))] ) + return [est_Q, clf_dropped] + +def get_prop(clf, x, T): + est_prop = clf_dropped.predict_proba(x) + est_Q = np.asarray( [est_prop[k,1] if T[k] == 1 else est_prop[k,0] for k in range(len(T))] ) + return est_Q + + + +# get indicator vector from signed vector +def get_0_1_sgn(vec): + n = len(vec) + return np.asarray([1 if vec[i] == 1 else 0 for i in range(n) ]).flatten() +# get signed vector from indicator vector +def get_sgn_0_1(vec): + n = len(vec) + return np.asarray([1 if vec[i] == 1 else -1 for i in range(n) ]).flatten() +''' +performs policy match indicator function; returns 1 or 0 +input: signed treatments, signed policy assignment +''' +def pol_match(T_sgned, pol): + sgn_match = np.multiply(T_sgned, pol ) + return get_0_1_sgn(sgn_match) + + + + + +''' faster sorting routines +''' +''' Get general weights by sorting for the uncentered, smoothed estimator +''' +def get_general_interval_wghts_algo_uncentered_smoothed(init_policy, **params): + T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x']; + n = params['n']; weights = np.zeros(n); a = params['a']; b = params['b'] + T_sgned=get_sgn_0_1(T_obs) + probs_pi_T = params['pi_pol'](T_sgned, init_policy, x) + a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T) + # Sort all Y! + plus_inds = np.argsort(Y); + Y_plus_std = Y[plus_inds] # Sorted Y + n_plus = n; vals = np.zeros(n_plus+1) + prev_val = -np.inf; k=0; val = np.inf + ## !!!! You can speed up by binary search + while (val > prev_val) and (k < n_plus+1): + denom = 0; num = 0; val = prev_val +# for i in range(len(vals)): + num = sum(np.multiply(a_mod[plus_inds[0:k]], Y_plus_std[0:k])) + sum(np.multiply(b_mod[plus_inds[k:]], Y_plus_std[k:])) + denom = sum(a_mod[plus_inds[0:k]])+sum(b_mod[plus_inds[k:]]) +# for k in range(n_plus): +# if k 0, W >= 0 ] +# prim_obj = ( Y.T * W ) #+ LAMBDA*(cvx.norm(opt_theta)+cvx.norm(opt_theta_0) + cvx.norm(opt_theta_1)) +# obj = cvx.Maximize(prim_obj) # -1* minimize negative rewards = max rewards +# prob = cvx.Problem(obj, constraints) # since we add a convex regularizer +# prob.solve() +# return [ W.value, t.value, prob.value ] + +''' +solves for optimal weights for the problem with \sum W_i (pi Y_i) +returns w, unnormalized weights +''' +def get_general_interval_wghts_pol(init_policy, **params): + n = params['n']; weights = np.zeros(n) + T_sgned = np.asarray([ 1 if params['T'][i] == 1 else -1 for i in range(n)]).flatten() + policy_x = params['pi_pol'](T_sgned, init_policy, params['x']).flatten() + params['hinge'] = policy_x + [W, t, val] = get_optimal_interval_wghts_CVX(init_policy, **params) # W is normalized weights + weights = (W.flatten()/t).T + return [weights,t] + +''' +evaluate loss function from parametric problem (uncentered), under probabilistic policy assumption +#! weights are weights multiplied separately by probabilities +compute the loss as weights^T * (\pi Y) +''' +def normalized_parametric_loss_all(pol_theta, *args): + params = dict(args[0]); x = params['x']; C = params['C']; sign = params['sign']; + n = params['n']; q = params['q']; Y = params['Y']; + T = params['T']; a = params['a']; b = params['b']; pi = params['pi_pol']; + T_sgned = np.asarray([ 1 if T[i] == 1 else -1 for i in range(n)]).flatten(); + W = params['weights']; opt_t = params['opt_t']; d = len(pol_theta); + policy_x = pi(T_sgned, pol_theta, x).reshape([n,1]); #policy_x_prime = pi_prime(pol_theta, x) + # already accounted for probability + loss_val = np.dot(params['weights'].reshape([n,1]).T, Y) +# loss_val = params['weights'].reshape([n,1]).T * np.multiply(policy_x, Y) + return sign*( loss_val + C*np.linalg.norm(pol_theta,2)) + + +#! This is function returning Pr[ \pi(x) = T_sgned] +def logistic_pol(T_sgned, theta, x): + n = len(T_sgned); theta = theta.flatten() + if len(theta) == 1: + pol_match = np.multiply(T_sgned, np.multiply(x, theta).flatten()) + else: + pol_match = np.multiply(T_sgned, np.dot(x, theta).flatten()) + LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -pol_match )) + return LOGIT_TERM_POS + + +''' +performs policy match indicator function; returns 1 or 0 +input: signed treatments, signed policy assignment +''' +def pol_match(T_sgned, pol): + sgn_match = np.multiply(T_sgned, pol ) + return get_0_1_sgn(sgn_match) + + +''' Get general weights by sorting for the uncentered, smoothed estimator +''' +def get_general_interval_wghts_algo_uncentered_smoothed(init_policy, **params): + T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x']; + n = params['n']; weights = np.zeros(n); a = params['a']; b = params['b'] + T_sgned=get_sgn_0_1(T_obs) + probs_pi_T = params['pi_pol'](T_sgned, init_policy, x) + a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T) + # Sort all Y! + plus_inds = np.argsort(Y); + Y_plus_std = Y[plus_inds] # Sorted Y + n_plus = n; vals = np.zeros(n_plus+1) + prev_val = -np.inf; k=1; val = sum(np.multiply(b_mod[plus_inds], Y_plus_std)) / sum(b_mod[plus_inds]) + ## !!!! You can speed up by binary search + while (val > prev_val) and (k < n_plus+1): + denom = 0; num = 0; prev_val = val + num = sum(np.multiply(a_mod[plus_inds[0:k]], Y_plus_std[0:k])) + sum(np.multiply(b_mod[plus_inds[k:]], Y_plus_std[k:])) + denom = sum(a_mod[plus_inds[0:k]])+sum(b_mod[plus_inds[k:]]) + val = num / denom; k+=1; + lda_opt = val; k_star = k-1 + weights[plus_inds[0:k_star]] = a_mod[plus_inds[0:k_star]] + weights[plus_inds[k_star:]] = b_mod[plus_inds[k_star:]] + return [weights, sum(weights)] + + +''' return Pr[ \pi(x)=T ] +''' +def logistic_pol_match_obs(T_sgned, theta, x): + n = len(T_sgned); pol_match = np.multiply(T_sgned, np.dot(x, theta).flatten()) + LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -pol_match )) + return LOGIT_TERM_POS +''' return Pr[ \pi(x)=1 ] +''' +def logistic_pol_asgn(theta, x): + theta = theta.flatten() + n = x.shape[0] + if len(theta) == 1: + logit = np.multiply(x, theta).flatten() + else: + logit = np.dot(x, theta).flatten() + LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -logit )) + return LOGIT_TERM_POS + +''' multinomial logistic: return all probabilities +We index theta as a d x K array +x is n x d +output is n x K +''' +def logistic_pol_asgn_mt(theta, x): + n=x.shape[0]; d = x.shape[1] + k = theta.shape[1] + logit = np.zeros([n,k]); LOGIT_TERM_POS = np.zeros([n,k]) + if d == 1: + for a in range(k): + logit[:,a] = np.multiply(x, theta[:,a]) + else: + for a in range(k): + logit[:,a] = np.dot(x, theta[:,a]) + for i in range(n): + # compute probability of observed action + LOGIT_TERM_POS[i,:] = np.asarray([np.exp(logit[i,k_]) / ( np.sum( np.exp(logit[i,:]) ) ) for k_ in range(k) ]) # sum over classes + return LOGIT_TERM_POS # output is nxk + +''' multinomial logistic: return all probabilities +We index theta as a d x K array +x is n x d +output is n x K +(overloaded arguments just in case) +''' +def logistic_pol_asgn_mt(theta, x, t_test=0): + n=x.shape[0]; d = x.shape[1] + k = theta.shape[1] + logit = np.zeros([n,k]); LOGIT_TERM_POS = np.zeros([n,k]) + if d == 1: + for a in range(k): + logit[:,a] = np.multiply(x, theta[:,a]) + else: + for a in range(k): + logit[:,a] = np.dot(x, theta[:,a]) + for i in range(n): + # compute probability of observed action + LOGIT_TERM_POS[i,:] = np.asarray([np.exp(logit[i,k_]) / ( np.sum( np.exp(logit[i,:]) ) ) for k_ in range(k) ]) # sum over classes + if np.isinf(np.exp(logit[i,k_])): + LOGIT_TERM_POS[i] = 1 # overflow fix + return LOGIT_TERM_POS # output is nxk + + +''' multinomial logistic: return probability of assigning observed treatment +We index theta as a d x K array +x is n x d +Output is filtered based on the observed treatment pattern +''' +def logistic_pol_asgn_mt_obsA(theta, x, t01): + n = x.shape[0] + d = x.shape[1]; + k = theta.shape[1] # n treatments + logit = np.zeros([n,k]) + LOGIT_TERM_POS = np.zeros(n) + if d == 1: + for a in range(k): + logit[:,a] = np.multiply(x, theta[:,a]) + else: + for a in range(k): + logit[:,a] = np.dot(x, theta[:,a]) + # numerically stable version + amax = max(logit) + for i in range(n): + # compute probability of observed action + LOGIT_TERM_POS[i] = np.exp(logit[i,t01[i]]) / ( np.sum( np.exp(logit[i,:]) ) ) # sum over classes + if np.isinf(np.exp(logit[i,t01[i]])): + LOGIT_TERM_POS[i] = 1 # overflow fix + if np.isnan(LOGIT_TERM_POS[i]): + print 'num',np.exp(logit[i,t01[i]]) + print 'denom',np.sum( np.exp(logit[i,:]) ) + # if sum(np.isnan(LOGIT_TERM_POS))>0: + # print 'nan theta',theta + # return LOGIT_TERM_POS + return LOGIT_TERM_POS # output is nx1 + + +def find_opt_weights_short_val(a_,b_,Y): + [lda_opt, weights, s_wghts] = find_opt_weights_short(a_, b_, Y) + return lda_opt + +'''get odds ratio bounds on estimated propensities (est_Q) given sensitivity level Gamma +''' +def get_bnds(est_Q,LogGamma): + n = len(est_Q) + p_hi = np.multiply(np.exp(LogGamma), est_Q ) / (np.ones(n) - est_Q + np.multiply(np.exp(LogGamma), est_Q )) + p_lo = np.multiply(np.exp(-LogGamma), est_Q ) / (np.ones(n) - est_Q + np.multiply(np.exp(-LogGamma), est_Q )) + assert (p_lo < p_hi).all() + a_bnd = 1/p_hi; + b_bnd = 1/p_lo + return [ a_bnd, b_bnd ] + + + +# ''' Given the truncated list of weights, Y (unsorted) and +# return Lambda (problem value), k +# ''' +# def find_opt_weights_short(a_, b_, Y, sub_ind=[]): +# if len(sub_ind)>0: +# a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind] +# sort_inds = np.argsort(Y); a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds] +# n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1; val = sum(np.multiply(b_, Y)) /sum(b_) ## !!!! You can speed up by binary search +# while (val > prev_val) and (k < n_plus+1): +# denom = 0; num = 0; prev_val = val; num = 1.0*sum(np.multiply(a_[0:k], Y[0:k])) + sum(np.multiply(b_[k:], Y[k:])) +# denom = sum(a_[0:k])+sum(b_[k:]); val = num / denom; k+=1; +# lda_opt = prev_val; k_star = k-1 +# sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:] +# weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:] +# +# return [lda_opt, weights, sum(weights)] + + + +''' Given Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind, +return Lambda (min problem value), weights, sum(weights) +''' +def find_opt_weights_short(Y, a_, b_, sub_ind=[]): + if len(sub_ind)>0: + print sub_ind + a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind] + sort_inds = np.lexsort((a_,Y)); a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds] + n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1; val = sum(np.multiply(b_, Y)) /sum(b_) + while (val > prev_val) and (k < n_plus+1): + denom = 0; num = 0; prev_val = val; num = 1.0*sum(np.multiply(a_[0:k], Y[0:k])) + sum(np.multiply(b_[k:], Y[k:])) + denom = sum(a_[0:k])+sum(b_[k:]); val = num / denom; k+=1; + lda_opt = prev_val; k_star = k-1 + sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:] + weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:] + return [lda_opt, weights, sum(weights)] + +''' explicit: include plots of all values +''' +def find_opt_weights_plot(Y,a_,b_,sub_ind=[], lexsort = False): + if len(sub_ind)>0: + print sub_ind + a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind] + if lexsort: + sort_inds = np.lexsort((a_,Y)) + else: + sort_inds = np.argsort(Y); + a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds] + n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1; + val = np.sum(np.multiply(b_, Y)) /np.sum(b_) + vals = [ (1.0*np.sum(np.multiply(a_[0:k], Y[0:k])) + np.sum(np.multiply(b_[k:], Y[k:])))/(np.sum(a_[0:k])+np.sum(b_[k:])) for k in range(n_plus) ] + lda_opt = np.max(vals); k_star = np.argmax(vals)-1 + plt.figure() + plt.plot(vals) + plt.figure() + plt.plot(np.diff(vals)) + sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:] + weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:] + return [lda_opt, weights, sum(weights)] + + +def rnd_k_val(k_,a_,b_,Y): + k= int(np.floor(k_)) # floor or round? + return (1.0*np.sum(np.multiply(a_[0:k], Y[0:k])) + np.sum(np.multiply(b_[k:], Y[k:])))/(np.sum(a_[0:k])+np.sum(b_[k:])) + +''' Given Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind, +use ternary search to find the optimal value. +Possibly off by one error but it shouldn't matter. +return Lambda (min problem value), weights, sum(weights) +''' +def find_opt_weights_shorter(Y, a_, b_, sub_ind=[]): + if len(sub_ind)>0: + a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind] + # sort_inds = np.argsort(Y); + sort_inds = np.lexsort((b_-a_,Y)); + a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds] + n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1; val = np.sum(np.multiply(b_, Y)) /sum(b_) + left = 0; right = n_plus-1;keepgoing=True + while keepgoing: + #left and right are the current bounds; the maximum is between them +# print (left,right) +# print (rnd_k_val(leftThird,a_,b_,Y), rnd_k_val(rightThird,a_,b_,Y)) + if abs(right - left) < 2.1: # separation in index space + k = np.floor((left + right)/2) + keepgoing=False + leftThird = left + (right - left)/3 + rightThird = right - (right - left)/3 + if rnd_k_val(leftThird,a_,b_,Y) < rnd_k_val(rightThird,a_,b_,Y): + left = leftThird + else: + right = rightThird + k_star=int(k); k=int(k) + lda_opt = (1.0*np.sum(np.multiply(a_[0:k], Y[0:k])) + np.sum(np.multiply(b_[k:], Y[k:])))/(np.sum(a_[0:k])+np.sum(b_[k:])) + sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:] + weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:] + return [lda_opt, weights, np.sum(weights)] + +'''Given Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind, +Lambda (min problem value), weights, sum(weights) +''' +def find_opt_robust_ipw_val(Y, a_, b_,shorter=False): + if shorter: + [lda_opt, weights, s_wghts] = find_opt_weights_shorter(Y, a_, b_) + else: + [lda_opt, weights, s_wghts] = find_opt_weights_short(Y, a_, b_) + return lda_opt + +'''Given Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind, +Lambda (max problem value), weights, sum(weights) +''' +def find_opt_robust_ipw_val_min(Y, a_,b_,shorter=False): + if shorter: + [lda_opt, weights, s_wghts] = find_opt_weights_shorter(-Y, a_, b_) + else: + [lda_opt, weights, s_wghts] = find_opt_weights_short(-Y, a_, b_) + return -lda_opt + + +''' functions for computing TV divergence +''' + +''' get optimal weights with TV constraint +''' +import gurobipy as gp + +''' minimize wrt TV bound +''' +def get_general_interval_wghts_algo_uncentered_smoothed_f_divergence_TV(incumbent_pol, quiet=True, **params): + T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x']; + n = params['n']; a = params['a']; b = params['b'] + gamma = params['gamma']; wm = 1/params['q'] + if params['subind'] == True: + subinds = params['subinds'] + Y = Y[subinds]; n = len(subinds); T_obs = T_obs[subinds]; x = x[subinds]; a= a[subinds]; b= b[subinds]; wm = wm[subinds] + # assume estimated propensities are probs of observing T_i + y = Y; weights = np.zeros(n) + # nominal propensities + # smoothing probabilities + T_sgned=get_sgn_0_1(T_obs) + probs_pi_T = params['pi_pol'](T_sgned, incumbent_pol, x) + a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T) + wm = np.multiply(wm, probs_pi_T) + m = gp.Model() + if quiet: m.setParam("OutputFlag", 0) + t = m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) + w = [m.addVar(obj = -yy, lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y] + d = [m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y] + m.update() + m.addConstr(gp.quicksum(w)==1) + m.addConstr(gp.quicksum(d)<=gamma*t) + for i in range(len(y)): + m.addConstr(w[i] <= b_mod[i] * t) + m.addConstr(w[i] >= a_mod[i] * t) + m.addConstr(d[i] >= w[i] - t*wm[i]) + m.addConstr(d[i] >= - w[i] + t*wm[i]) + m.optimize() + return -m.ObjVal + +''' maximize wrt TV bound +''' +def get_general_interval_wghts_algo_uncentered_smoothed_f_divergence_TV_max(incumbent_pol, quiet=True, **params): + T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x']; + n = params['n']; weights = np.zeros(n); a = params['a']; b = params['b'] + gamma = params['gamma'] + # assume estimated propensities are probs of observing T_i + y = -Y + wm = 1/params['q'] # nominal propensities + # smoothing probabilities + T_sgned=get_sgn_0_1(T_obs) + probs_pi_T = params['pi_pol'](T_sgned, incumbent_pol, x) + a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T) + wm = np.multiply(wm, probs_pi_T) + m = gp.Model() + if quiet: m.setParam("OutputFlag", 0) + t = m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) + w = [m.addVar(obj = -yy, lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y] + d = [m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y] + m.update() + m.addConstr(gp.quicksum(w)==1) + m.addConstr(gp.quicksum(d)<=gamma*t) + for i in range(len(y)): + m.addConstr(w[i] <= b_mod[i] * t) + m.addConstr(w[i] >= a_mod[i] * t) + m.addConstr(d[i] >= w[i] - t*wm[i]) + m.addConstr(d[i] >= - w[i] + t*wm[i]) + m.optimize() + return m.ObjVal + +def logistic_pol_asgn(theta, x): + n = x.shape[0] + theta = theta.flatten() + if len(theta) == 1: + logit = np.multiply(x, theta).flatten() + else: + logit = np.dot(x, theta).flatten() + LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -logit )) + return LOGIT_TERM_POS + +#### +''' Getting subgradients for the robust value function +''' + +''' +read in callbacks for derivative of pi given theta and optimal w, t +PI_1, POL_GRAD (returns (p x 1) vector) +take in ** normalized weights W ** +''' +def get_implicit_grad_centered(pol_theta, PI_1, POL_GRAD, x, Y, t01, W): + # if need to get active index set + # rescaled weights in original + n = len(W); T_sgned = get_sgn_0_1(t01) + + # dc_dpi = np.diag(Y*T_sgned) + # policy_x = PI_1(pol_theta, x) # 1 x n + # dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p + # return dc_dpi.dot(dpi_dtheta).T.dot(W) + constants = np.multiply(Y, np.multiply(T_sgned,W)) + policy_x = PI_1(pol_theta, x) # 1 x n + dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p + if x.ndim > 1: + return np.multiply(constants[:,np.newaxis], dpi_dtheta).sum(axis=0) + else: + return np.sum(np.multiply(constants,dpi_dtheta)) +# # Minibatch version: +# CHUNKN = 20 +# id_batches = np.array_split(range(n),CHUNKN) # list of arrays of indices +# if x.ndim > 1: +# grad = np.zeros([x.shape[1], 1]) +# else: +# grad = 0 +# for batch in id_batches: +# constants = Y[batch]*T_sgned[batch]*W[batch] +# policy_x = PI_1(pol_theta, x[batch,]) # 1 x n +# dpi_dtheta = POL_GRAD(policy_x, pol_theta, x[batch,]) # n x p +# grad += np.sum(np.multiply(constants[:,np.newaxis], dpi_dtheta)) # should be in px1 +# return grad # no regularization +''' +read in callbacks for derivative of pi given theta and optimal w, t +Y(Pi) - Y(-Pi) +PI_1, POL_GRAD (returns (p x 1) vector) +take in ** normalized weights W ** + +''' +def get_implicit_grad_centered_anti_pi(pol_theta, PI_1, POL_GRAD, x, Y, t01, W): + n = len(W); T_sgned = get_sgn_0_1(t01) + dc_dpi = np.diag(2*Y*T_sgned) + policy_x = PI_1(pol_theta, x) # 1 x n + dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p + return dc_dpi.dot(dpi_dtheta).dot(W) + + + + +''' +find value of centered estimator, evaluated against a benchmark policy which assigns +Pi(x) = 1 w.p p_1 for all x +''' +def centered_around_p1(a_bnd, b_bnd, Y_T, pi_1, p_1): + return find_opt_robust_ipw_val(np.multiply(Y_T, (pi_1 - p_1)), a_bnd, b_bnd, shorter=True) + +def plot_W_GDS(p_ths, W_GDs): + plot(p_ths, W_GDs[:,0]) + for i in range(len(p_ths)): + plot([p_ths[i]-0.5, p_ths[i]+0.5], [W_GDs[i,0]-W_GDs[i,1]*0.5, W_GDs[i,0]+W_GDs[i,1]*0.5], c='b',alpha=0.1) + +''' test gradient fn for th, given vector of assignments p_1 +''' +def test_subgrad_for_th(p_th, p_1, PI_1, POL_GRAD, x, y, t01): + n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01); + [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd) + grad = get_implicit_grad_centered(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum()) + return [lda_opt,grad] + +''' test gradient fn for th, regret against the anti-policy -Pi +''' +def test_subgrad_for_anti(p_th, p_1, PI_1, POL_GRAD, x, y, t01): + n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01); + [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd) + grad = get_implicit_grad_centered_anti_pi(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum()) + return [lda_opt,grad] + +########### Data generation tools ######################################################################## +""" +""" + + + # generate propensity model +def real_prop(x, beta_prop): + n = x.shape[0]; d = x.shape[1] + prop = np.zeros(n) + for i in range(n): + prop[i] = np.exp(np.dot(beta_prop[0:d], x[i,:]) + beta_prop[-1] )/ (1 + np.exp(np.dot(beta_prop[0:d], x[i,:]) + beta_prop[-1] )) + return prop +''' +requires specifying globals +HIGH_FREQ_N = 5 +FREQ = 20 +''' +def generate_data_nd(mu_x, sigma_x_mat, n, beta_cons, beta_x, beta_x_T, TRUE_PROP_BETA, beta_x_high_freq): +# x = np.random.normal(mu_x, sigma_x, size = n) + # generate n datapoints from the same multivariate normal distribution + x = np.random.multivariate_normal(mean = mu_x, cov= sigma_x_mat, size = n ) + true_Q = real_prop(x, TRUE_PROP_BETA) + T = np.array(np.random.uniform(size=n) < true_Q).astype(int).flatten() + T = T.reshape([n,1]) + + clf = LogisticRegression(); clf.fit(x, T) + propensities = clf.predict_proba(x) + print clf.coef_ + T_sgned = np.asarray([ 1 if T[i] == 1 else -1 for i in range(n)]).flatten() + + y_sigma = 0.5 + nominal_propensities_pos = propensities[:,1]; nominal_propensities_null = propensities[:,0] + Q = np.asarray( [nominal_propensities_pos[i] if T[i] == 1 else nominal_propensities_null[i] for i in range(n)] ) + q = Q + Y = np.zeros(n) + for i in range(n): + Y[i] = T[i]*beta_cons + np.dot(beta_x.T, x[i,:]) + np.dot(beta_x_T.T, x[i,:]*T[i]) + np.dot(beta_x_high_freq.T, np.sin(x[i,0:HIGH_FREQ_N]*FREQ)*T[i]) + + white_noise_coef = 0.2 + # add random noise + Y += np.random.multivariate_normal(mean = np.zeros(n), cov=white_noise_coef * np.eye(n)) + Y_OFFSET = np.abs(np.min(Y)) + Y = Y + Y_OFFSET + T = T.flatten() +# px = x.reshape([n,p]); x_augmented = np.hstack([x, np.ones([n,1])]) + T_colors = [ 'r' if T[i] == 0 else 'b' for i in range(n)] +# x_poly = np.hstack([x, x**2, x**3, np.ones([n,1])]) +# x_u_poly = np.hstack([x_poly[:,0:3], u.reshape([n,1])]) + return [x, T, Y, true_Q, clf, T_colors, Y_OFFSET] + +######################################################################################## + +### Scale continuous +def scale_continuous(train_dict,test_dict): + continuous = np.asarray([len(np.unique(train_dict['X'][:,j])) for j in range(train_dict['X'].shape[1])]) > 10 + print np.where(continuous)[0] + def scale_columns(x_train_col, x_test_col): + mn = np.mean(x_train_col); sd = np.std(x_train_col) + x_train_col = (x_train_col*1.0 - mn*1.0)/sd + x_test_col = (x_test_col*1.0 - mn)*1.0/sd + return [x_train_col, x_test_col] + + for ind in np.where(continuous)[0]: + [x_train_col, x_test_col] = scale_columns(train_dict['X'][:,ind], test_dict['X'][:,ind]) + train_dict['X'][:,ind] = x_train_col + test_dict['X'][:,ind] = x_test_col + return [train_dict, test_dict] + + +''' add new versions of items to data dict ''' +def subsample_traindict(train_dict,TEST_FRAC): + train_ind, test_ind = train_test_split(range(len(train_dict['Y'])), test_size=TEST_FRAC) + dictlabels = ['T', 'Y', 'Yhf', 'prop_T' ] + train_dict['X'] = train_dict['X'][train_ind,:] + for ind,key in enumerate(dictlabels): + train_dict[key] = train_dict[key][train_ind]; + # x_, x_test, t_sgned_, t_sgned_test, y_, y_test, yhf_, yhftest_, nominal_Q_, nominal_Q_test, train_ind, test_ind \ + # = train_test_split(train_dict['X'], train_dict['T'], train_dict['Y'],train_dict['Yhf'], train_dict['prop_T'], range(len(train_dict['Y'])), test_size=TEST_FRAC) + # new_list = [x_, t_sgned_, y_,yhf_, nominal_Q_, train_ind] + # dictlabels = [ 'X', 'T', 'Y', 'Yhf', 'q0' ] # this ends up permuting the estimated propensities + # for ind,key in enumerate(dictlabels): + # train_dict[key] = new_list[ind]; + return [train_dict, train_ind] + +def scale_columns(x_train_col, x_test_col): + mn = np.mean(x_train_col); sd = np.std(x_train_col) + x_train_col = (x_train_col*1.0 - mn*1.0)/sd + x_test_col = (x_test_col*1.0 - mn)*1.0/sd + return [x_train_col, x_test_col] + +def scale_dicts(train_dict, test_dict): + continuous = np.asarray([len(np.unique(train_dict['X'][:,j])) for j in range(train_dict['X'].shape[1])]) > 10 + for ind in np.where(continuous)[0]: + [x_train_col, x_test_col] = scale_columns(train_dict['X'][:,ind], test_dict['X'][:,ind]) + train_dict['X'][:,ind] = x_train_col + test_dict['X'][:,ind] = x_test_col + return [train_dict, test_dict]