From 403d1c7d34190b96971b157415f393c59b713cee Mon Sep 17 00:00:00 2001
From: Angela Zhou <angelamczhou@gmail.com>
Date: Tue, 29 Oct 2019 20:10:31 -0400
Subject: [PATCH] initial commit

---
 data_scenarios.py       |  251 ++++++++++
 methods.py              |  359 ++++++++++++++
 methods_test.py         |  121 +++++
 subgrad.py              | 1035 +++++++++++++++++++++++++++++++++++++++
 unconfoundedness_fns.py |  676 +++++++++++++++++++++++++
 5 files changed, 2442 insertions(+)
 create mode 100644 data_scenarios.py
 create mode 100644 methods.py
 create mode 100644 methods_test.py
 create mode 100644 subgrad.py
 create mode 100644 unconfoundedness_fns.py

diff --git a/data_scenarios.py b/data_scenarios.py
new file mode 100644
index 0000000..5924054
--- /dev/null
+++ b/data_scenarios.py
@@ -0,0 +1,251 @@
+
+import matplotlib.pyplot as plt
+import numpy as np
+import cvxpy as cvx
+from gurobipy import *
+from scipy.optimize import minimize
+import datetime as datetime
+import pickle
+from matplotlib.backends.backend_pdf import PdfPages
+from matplotlib import collections as matcoll
+from sklearn import svm
+from methods import *
+
+import os
+import sys
+module_path = os.path.abspath(os.path.join('/Users/az/Box Sync/unconfoundedness'))
+if module_path not in sys.path:
+    sys.path.append(module_path)
+from scripts_running import *
+from unconfoundedness_fns import *
+# from evals import *
+from greedy_partitioning import *
+from subgrad import *
+from scipy.stats import norm
+from scipy.stats import multivariate_normal
+from copy import deepcopy
+
+d = 5  # dimension of x
+n = 800;
+# parameters
+rho = np.asarray([1 / np.sqrt(2), -1 / np.sqrt(2), 0, 0, 1 / np.sqrt(3)])  # normalize to unit 0.5
+rho = rho / (np.dot(rho, rho) * 2)
+
+beta_cons = 2.5
+beta_x = np.asarray([0, .5, -0.5, 0, 0, 0])
+beta_x_T = np.asarray([-1.5, 1, -1.5, 1., 0.5, 0])
+beta_T = np.asarray([0, .75, -.5, 0, -1, 0, 0])
+beta_T_conf = np.asarray([0, .75, -.5, 0, -1, 0])
+# beta_T = np.asarray([-1, 0,0, -.5, 1,0.5,1.5])
+# beta_T_conf = np.asarray([-1, 0,0, -.5, 1,0.5 ])
+mu_x = np.asarray([-1, .5, -1, 0, -1]);
+
+alpha = -2
+w = 1.5
+
+def return_CATE_optimal_assignment(x, u):
+    n = x.shape[0]
+    risk_T_1 = real_risk_(np.ones(n), x, u)
+    risk_T_0 = real_risk_(np.zeros(n), x, u)
+    opt_T = [1 if risk_T_1[k] < risk_T_0[k] else 0 for k in range(n)]
+    return opt_T
+
+
+# generate propensity model
+def REAL_PROP_LOG(x, u):
+    Gamma = 1.5
+    print x.shape
+    nominal_ = logistic_pol_asgn(beta_T_conf, x)
+    #     return nominal_
+    # set u = I[ Y(T)\mid x > Y(-T) \mid x ]
+    a_bnd, b_bnd = get_bnds(nominal_, Gamma)
+    q_lo = 1 / b_bnd;
+    q_hi = 1 / a_bnd
+    opt_T = return_CATE_optimal_assignment(x, u)
+    q_real = np.asarray([q_hi[i] if opt_T[i] == 1 else q_lo[i] for i in range(len(u))])
+
+    return q_real
+
+def real_risk(T, beta_cons, beta_x, beta_x_T, x, u):
+    '''
+    takes as input integral T
+    '''
+    n = len(T);
+    risk = np.zeros(n)
+    for i in range(len(T)):
+        risk[i] = T[i] * beta_cons + np.dot(beta_x.T, x[i, :]) + np.dot(beta_x_T.T, x[i, :] * T[i]) + alpha * (u[i]) * (
+        (2 * T[i] - 1)) + w * (u[i])
+    return risk
+
+
+def real_risk_(T, x, u):
+    return real_risk(T, beta_cons, beta_x, beta_x_T, x, u)
+
+def real_risk_T_integer(T, x, u):
+    T = get_sgn_0_1(T)
+    return real_risk(T, beta_cons, beta_x, beta_x_T, x, u)
+
+
+'''
+takes in prob_1, n x m array of treatment assignment probabilities 
+compute 
+\sum_t 1/n \sum_i Y(t)\pi_i(t) 
+'''
+def real_risk_prob_oracle(prob_t, x, u):
+    n_ts = prob_t.shape[1]
+    risk = 0
+    for t in range(n_ts):
+        risk += np.mean( np.multiply(prob_t[:,t], real_risk_(t*np.ones(x.shape[0]), x, u) )  )
+    return risk
+#     return prob_1 * real_risk_T_integer(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u)
+
+
+
+
+def real_risk_prob(prob_1, x, u):
+    n = len(u)
+    prob_1 = np.asarray(prob_1)
+    return prob_1 * real_risk_(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u)
+
+def generate_log_data(mu_x, n, beta_cons, beta_x, beta_x_T):
+    # generate n datapoints from the same multivariate normal distribution
+    d = len(mu_x)
+    u = (np.random.rand(n) > 0.5)  # needs to be Rademacher
+    #     u = np.asarray([u[i] if u[i] == 1 else -1 for i in range(len(u))])
+    #     u = np.ones(n)
+    x = np.zeros([n, d])
+    for i in range(n):
+        x[i, :] = np.random.multivariate_normal(mean=mu_x * (2 * u[i] - 1), cov=np.eye(d))
+    x_ = np.hstack([x, np.ones([n, 1])])
+    # generate propensities
+    true_Q = REAL_PROP_LOG(x_, u)
+    T = np.array(np.random.uniform(size=n) < true_Q).astype(int).flatten()
+    T = T.reshape([n, 1]);
+    T_sgned = np.asarray([1 if T[i] == 1 else -1 for i in range(n)]).flatten()
+    clf = LogisticRegression();
+    clf.fit(x, T)
+    propensities = clf.predict_proba(x)
+    nominal_propensities_pos = propensities[:, 1]
+
+    nominal_propensities_pos = logistic_pol_asgn(beta_T_conf, x_)
+
+    q0 = np.asarray([nominal_propensities_pos[i] if T[i] == 1 else 1 - nominal_propensities_pos[i] for i in range(n)])
+    true_Q_obs = np.asarray([true_Q[i] if T[i] == 1 else 1 - true_Q[i] for i in range(n)])
+    Y = np.zeros(n)
+    for i in range(n):
+        Y[i] = T[i] * beta_cons + np.dot(beta_x.T, x_[i, :]) + np.dot(beta_x_T.T, x_[i, :] * T[i]) + alpha * (u[i]) * (
+                    2 * T[i] - 1) + w * u[i]  # + np.dot(beta_x_T.T, x_[i,:] - [-1,1] ) #+ np.dot(beta_x_high_freq.T, np.sin(x[i,0:HIGH_FREQ_N]*FREQ)*T[i])
+    # add random noise
+    T = T.flatten()
+    Y += 2 * np.random.randn(n)  # + np.random.randn()*np.asarray(T)*2 ;
+    return [x_, u, T, Y, true_Q_obs, q0]
+
+
+# generate propensity model
+def REAL_PROP_LOG(x, u):
+    Gamma = 1.5
+    print x.shape
+    nominal_ = logistic_pol_asgn(beta_T_conf, x)
+    #     return nominal_
+    # set u = I[ Y(T)\mid x > Y(-T) \mid x ]
+    a_bnd, b_bnd = get_bnds(nominal_, Gamma)
+    q_lo = 1 / b_bnd;
+    q_hi = 1 / a_bnd
+    opt_T = return_CATE_optimal_assignment(x, u)
+    q_real = np.asarray([q_hi[i] if opt_T[i] == 1 else q_lo[i] for i in range(len(u))])
+
+    return q_real
+
+
+def real_risk_mt(x_, u, T, alpha, beta_tilde, beta_x_T,  eta_tilde, eta):
+    n = len(T); risk = np.zeros(n)
+    for i in range(len(T)):
+        risk[i] = alpha[T[i]] + np.dot(beta_tilde.T, x_[i, :]) + np.dot(beta_x_T[:,T[i]].T, x_[i, :]) + eta[T[i]]*(u[i]) + eta_tilde*u[i]
+    return risk
+
+def real_risk_mt_(x_, u, T):
+    return real_risk_mt(x_, u, T,*outcome_dgp_params)
+
+'''
+takes in prob_1, n x m array of treatment assignment probabilities 
+compute 
+\sum_t 1/n \sum_i Y(t)\pi_i(t) 
+'''
+def real_risk_prob_oracle_mt(prob_t, x, u, *outcome_dgp_params):
+    n_ts = prob_t.shape[1]
+    risk = 0
+    for t in range(n_ts):
+        risk += np.mean( np.multiply(prob_t[:,t], real_risk_mt_(x, u, t*np.ones(n).astype(int)) )  )
+    return risk
+#     return prob_1 * real_risk_T_integer(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u)
+
+
+def return_CATE_optimal_assignment_mt(x, u, n_ts, *outcome_dgp_params):
+    n = x.shape[0]
+    risk = np.zeros([n,n_ts])
+    for k in range(n_ts):
+        risk[:,k] = real_risk_mt(x, u, k*np.ones(n).astype(int), *outcome_dgp_params)
+    opt_T = [np.argmin(risk[i,:]) for i in range(n)]
+    return opt_T
+
+''' real propensity that generates treatment assignments for multiple treatments 
+'''
+def real_propensity_mt(x, u, beta_T_conf_prop, log_gamma, *outcome_dgp_params ):
+    nominal_ = logistic_pol_asgn_mt(beta_T_conf_prop, x)
+    # set u = I[ Y(T)\mid x > Y(-T) \mid x ]
+    opt_T = return_CATE_optimal_assignment_mt(x, u, beta_T_conf_prop.shape[1], *outcome_dgp_params)
+    n_ts = beta_T_conf_prop.shape[1]
+    q_lo = np.zeros([x.shape[0], n_ts]); q_hi = np.zeros([x.shape[0], n_ts])
+    for k in range(n_ts):
+        a_bnd, b_bnd = get_bnds(nominal_[:,k], log_gamma)
+        q_lo[:,k] = 1 / b_bnd;
+        q_hi[:,k] = 1 / a_bnd
+    q_real = deepcopy(nominal_)
+    for i in range(x.shape[0]):
+        if opt_T[i] == 1:
+            q_real[i,1] = q_hi[i, 1 ]
+            q_real[i,2] = nominal_[i,2]
+            q_real[i,0] = 1 - q_real[i,1] - q_real[i,2]
+        # elif opt_T[i] == 0:
+        #     q_real[i,0] = q_lo[i, 0 ]
+        #     q_real[i,2] = nominal_[i,2]
+        #     q_real[i,1] = 1 - q_real[i,0] - q_real[i,2]
+        else:
+            q_real[i,:] = nominal_[i,:]
+    # clip and renormalize
+    q_real = np.clip(q_real, 0.01, 0.99)
+    for i in range(x.shape[0]):
+        q_real[i,:] = q_real[i,:] / sum(q_real[i,:])
+    return q_real
+
+
+'''
+# take in a vector of
+beta_cons: 
+beta_tilde: \tilde{\beta} (confounding interaction with x) 
+beta_x_T: [d x k] array of coefficient vectors, one for each treatment
+
+'''
+def generate_log_data_mt(mu_x, n, beta_T_conf_prop, n_ts, log_gamma, alpha, beta_tilde, beta_x_T,  eta_tilde, eta):
+    # generate n datapoints from the same multivariate normal distribution
+    outcome_dgp_params = [alpha, beta_tilde, beta_x_T,  eta_tilde, eta ]
+    d = len(mu_x);
+    u = (np.random.rand(n) > 0.5)  # Bernoulli noise realization;
+    x = np.zeros([n, d])
+    for i in range(n):
+        x[i, :] = np.random.uniform(low=np.ones(d)*-3,high = np.ones(d)*3)
+
+        # x[i, :] = np.random.multivariate_normal(mean=mu_x * (2 * u[i] - 1), cov=np.eye(d))
+    x_ = np.hstack([x, np.ones([n, 1])])
+    # generate propensities
+    true_Q = real_propensity_mt(x_, u, beta_T_conf_prop, log_gamma, *outcome_dgp_params ) # in n x k for multiple treatments
+    T = np.asarray([np.random.choice( range(n_ts), p = true_Q[i,:] ) for i in range(n) ]).astype(int).flatten()
+    q0 = logistic_pol_asgn_mt_obsA(beta_T_conf_prop, x_, T)
+    # q0 = np.asarray([nominal_propensities_pos[i,T[i]] for i in range(n)])
+    true_Q_obs = np.asarray([true_Q[i,T[i]] for i in range(n)])
+    Y = np.zeros(n)
+    for i in range(n):
+        Y[i] = alpha[T[i]] + np.dot(beta_tilde.T, x_[i, :]) + np.dot(beta_x_T[:,T[i]].T, x_[i, :]) + eta[T[i]]*(u[i]) + eta_tilde*u[i]
+    # add random noise
+    Y += np.random.randn(n)  # + np.random.randn()*np.asarray(T)*2 ;
+    return [x_, u, T, Y, true_Q_obs, q0]
diff --git a/methods.py b/methods.py
new file mode 100644
index 0000000..acf4402
--- /dev/null
+++ b/methods.py
@@ -0,0 +1,359 @@
+import numpy as np
+import gurobipy as gp
+from unconfoundedness_fns import *
+from subgrad import *
+from opt_tree import *
+from greedy_partitioning_serv import *
+import pickle
+from datetime import datetime
+
+'''
+policy_prob_1: function returning probability of policy assignment = 1
+weight_function: function returning adversarial weights (given lower, upper bound and gamma or effective gamma budget bound (L1 budget) 
+baseline_pol: pi_0(x) function 
+policy_optimizer: ogd, or optimal_tree 
+- verbose 
+- save 
+    - stump 
+    - experiment name 
+'''
+class ConfoundingRobustPolicy:
+    def __init__(self, baseline_pol, verbose = False, ind_rep = -1, save_params={}, save = True, treatment_n='binary', params={}):
+        self.baseline_pol = baseline_pol
+        # self.weight_function = weight_function
+        self.verbose = verbose
+        self.save_params = save_params #stump, exp_name
+        self.save = save
+            #self.save_params != {} # check if it's not the default
+        self.ind_rep = ind_rep
+        self.params = params
+        self.treatment_n = treatment_n # some implementations leverage binary encoding
+
+        # store training data when passed in by .fit
+        self.x = None
+        self.t = None
+        self.t_levels = None
+        self.y = None
+        self.fq = None
+        self.RBARS_ = None
+        self.RISKS = None
+        self.POLS_dict = None
+        self.eval_data = None
+        self.opt_params_og = None
+        self.get_risk = None #overwritten with a frozen test set: call get_risk with a vector of policy prob assignments
+    ''' 
+    fit based on X, T, Y; q0 nominal propensities
+    Assume x doesn't have an intercept
+    GAMMAS series of parameters
+    method_types: list of strings 'ogd', 'fdiv-02', 
+    method_params object takes in 
+        - optimizer
+        - opt_params
+            the params_dict that gets passed to the optimizer
+            DEFAULT_POL: the policy to default to if loss is nonnegative
+        - pol_opt
+            'ogd', 'tree', 'ipw': descriptor of method (gradient based or otherwise) 
+                'unc_set_type': 'interval', 'L1-budget'
+    # Make GAMS rounded precision
+    '''
+    def fit(self, x, t, y, q0, GAMS, method_params, eval_conf={'eval':False} ):
+        if self.treatment_n == 'binary':
+            t = get_sgn_0_1(t) # make sure treatment is signed, not 0-1
+        else:
+            n_treatments = len(np.unique(t))
+        random.seed(1)
+
+        # we input data with intercept
+        data_dict = { 'x':x, 't':t, 'y':y, 'fq': q0 }
+        self.x = x; self.t = t; self.y = y; self.fq = q0; self.t_levels = np.sort(np.unique(t))
+        opt_params = method_params['opt_params']  # get parameters (e.g. optimization parameters from dict)
+        self.opt_params_og = opt_params
+        opt_params.update({'x': x, 't': t, 'y': y, 'fq': q0})
+        if self.treatment_n == 'binary':
+            prev_sol = np.random.randn(x.shape[1]);
+        else:
+            print n_treatments
+            print x.shape[1]
+            prev_sol = np.random.randn(x.shape[1],n_treatments);
+        POLS = [ None ] * len(GAMS);  losses = np.zeros(len(GAMS))
+        RISKS = np.zeros(len(GAMS))
+
+        if eval_conf['eval']:  # if evaluate policy error online
+            self.init_evaluation(eval_conf)  # set risk_test
+
+        # iterate over values of GAMMAS and train
+        for ind_g, gam in enumerate(GAMS):
+            if self.verbose:
+                print 'gamma, ', gam
+            a_bnd, b_bnd = get_bnds(q0, gam)
+            data_dict.update({'a_bnd':a_bnd, 'b_bnd':b_bnd}); opt_params.update( {'a_bnd':a_bnd,'b_bnd':b_bnd} )
+            # update parameters based on gamma, a_bnd, b_bnd
+            optimizer = method_params['optimizer']
+            [opt_params, gammas] = self.update_opt_params(gam, a_bnd, b_bnd, opt_params, method_params)
+            now = datetime.now()
+            [robust_th, robust_loss] = optimizer(th=prev_sol, **opt_params)
+            print datetime.now() - now, 'time optimizing'
+            # Optimization refinements since gamma uncertainty sets are nested: revert, check loss is negative
+            now = datetime.now()
+            if method_params['pol_opt'] != 'IPW':
+                if ind_g > 1:
+                    [robust_th, robust_loss] = self.reversion_check_optimality(ind_g, robust_th, robust_loss,
+                            a_bnd, b_bnd, gammas, POLS, POL_PROB_1=opt_params['POL_PROB_1'], WGHTS_ = opt_params['WGHTS_'])
+                [robust_th, robust_loss] = self.check_loss_nonnegative(robust_th, robust_loss, opt_params['DEFAULT_POL'])
+            prev_sol = robust_th; POLS[ind_g] = robust_th; losses[ind_g] = robust_loss
+            print datetime.now() - now, 'time verifying'
+
+            # if evaluate policy error online: report results
+            if eval_conf['eval']:
+                if self.treatment_n == 'binary':
+                    test_rec = opt_params['POL_PROB_1'](robust_th, self.eval_data['x_test'])
+                else: # we need treatment information for returning policy probability vector for multiple treatments
+                    if eval_conf['eval_type'] == "true_dgp":
+                        test_rec = opt_params['POL_PROB_all'](robust_th, self.eval_data['x_test'], self.eval_data['t_test'])
+                    else: #condition evaluation on observational treatment assignment in t_test
+                        test_rec = opt_params['POL_PROB_1'](robust_th, self.eval_data['x_test'], self.eval_data['t_test'])
+
+                RISKS[ind_g] = self.get_risk(test_rec)
+                self.RISKS = RISKS
+                if self.verbose:
+                    print 'eval risk', RISKS[ind_g]
+            if self.verbose:
+                pickle.dump({'recs': test_rec, 'risk':RISKS[ind_g]}, open(self.save_params['stump'] + self.save_params['exp_name'] + '--' + method_params[
+                        'type'] + '--gamma--'+ str(np.round(gam,2)) + '--rep' + str(self.ind_rep) + '--' + datetime.now().strftime(
+                        '%Y-%m-%d-%H-%M-%S') + '.p', 'wb'))
+
+        # After fitting: class contains a list of policies
+        # Post processing of optimization results; calibrate,
+        if method_params['pol_opt'] != 'IPW':
+            self.RBARS_ = self.calibrate_risk_for_policies(GAMS, POLS, unc_set_type=method_params['unc_set_type'], POL_PROB_1=opt_params['POL_PROB_1'], WGHTS_=opt_params['WGHTS_'])
+        self.POLS_dict = dict(zip(GAMS, POLS))
+        if self.save:
+            pickle.dump( {'RBARS_':self.RBARS_, 'POLS_dict': self.POLS_dict, 'GAMS':GAMS, 'RISKS':RISKS}, open(self.save_params['stump']+self.save_params['exp_name']+'--'+method_params['type']+'--rep'+str(self.ind_rep)+'--'+datetime.now().strftime('%Y-%m-%d-%H-%M-%S')+'.p', 'wb') )
+
+    def predict(self, x, gamma):
+        pol_fn = POLS_dict[str(gamma)]
+        return opt_params['POL_PROB_1'](pol_fn, x)
+            # self.POLS_dict[str(gamma)](x)
+
+
+    ''' update opt params for changes in gamma, a_bnd, b_bnd computed 
+    '''
+    def update_opt_params(self, gamma, a_bnd, b_bnd, opt_params, method_params):
+        gammas = gamma
+        if method_params['pol_opt'] == 'ogd':
+            [N_RNDS, eta, N_RNDS_tv] = self.get_ogd_params(a_bnd, b_bnd)
+            if method_params['unc_set_type'] == 'interval':
+                opt_params.update({'N_RNDS': N_RNDS, 'gamma':gammas, 'step_schedule': eta, 'eta_0': 1})
+            elif method_params['unc_set_type'] == 'L1-budget':
+                fq_sums = [ (1.0 / self.fq[self.t == t_]).sum() for t_ in self.t_levels ]
+                fq_nm_wghts = [(1 / self.fq[self.t == self.t_levels[ind]]) / fq_sums[ind] for ind in range(len(self.t_levels)) ]
+                upper_gamma_bnds = np.asarray([np.sum(np.maximum(a_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind], b_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind])) for ind,t_ in enumerate(self.t_levels)])
+                gammas = opt_params['rho']*upper_gamma_bnds
+                opt_params.update({'N_RNDS': N_RNDS_tv, 'gamma': gamma, 'step_schedule': eta, 'eta_0': 1})
+        elif method_params['pol_opt'] == 'tree':
+            None
+        elif method_params['pol_opt'] == 'IPW':
+            opt_params.update({'eta_0': 1})
+            [psub_0_th_ipw, ls_th_ipw] = opt_w_restarts_vanilla_ipw(th = 0, **opt_params)
+                                                            # N_RNDS, PI_1, x_[:, 0:d], t_, nominal_Q_, y_, 1,logging=False, step_schedule=IPW_STEP)
+        return [opt_params, gammas]
+
+    def get_upper_gamma_bnds(self, a_bnd, b_bnd):
+        fq_sums = [(1.0 / self.fq[self.t == t_]).sum() for t_ in self.t_levels]
+        fq_nm_wghts = [(1 / self.fq[self.t == self.t_levels[ind]]) / fq_sums[ind] for ind in range(len(self.t_levels))]
+        upper_gamma_bnds = np.asarray([np.sum(np.maximum(a_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind],
+                                                         b_bnd[self.t == t_] / fq_sums[ind] - fq_nm_wghts[ind])) for
+                                       ind, t_ in enumerate(self.t_levels)])
+        gammas = self.opt_params_og['rho'] * upper_gamma_bnds
+        return gammas
+
+    ''' 
+    'eval_type': assume known dgp or RCT evaluation 
+    '''
+    def init_evaluation(self, eval_conf):
+        eval_type = eval_conf['eval_type']
+        self.eval_data = eval_conf['eval_data']
+        if eval_type == "true_dgp":
+            oracle_risk = eval_conf['oracle_risk']
+            def get_risk(pi_test, baseline_pol = self.baseline_pol):
+                x_test = self.eval_data['x_test']; u_test = self.eval_data['u_test']; t_test = self.eval_data['t_test']
+                if self.treatment_n == 'binary':
+                    return np.mean(oracle_risk(pi_test, x_test, u_test) - oracle_risk( baseline_pol(x_test) , x_test, u_test))
+                else:
+                    # For evaluating multiple treatments with oracle outcomes, compute
+                    # \sum_i \sum_t   Y_i(t) (\pi(t, X_i) - \pi_0(t, X_i) )
+                    baseline_assignment = np.zeros(pi_test.shape)
+                    for t in np.unique(t_test):
+                        baseline_assignment[:, t] = baseline_pol(x_test, t*np.ones(x_test.shape[0]))
+                    return oracle_risk(pi_test, x_test, u_test) - oracle_risk(baseline_assignment, x_test, u_test)
+            self.get_risk = get_risk
+
+        elif eval_type == "rct":  # if evaluate on RCT data
+            def get_risk(pi_test, baseline_pol = self.baseline_pol):
+                x_test = self.eval_data['x_test']
+                t_test = self.eval_data['t_test']
+                y_test = self.eval_data['y_test']
+                pi_test = np.asarray(pi_test)
+                RCT_prop_test = self.eval_data['RCT_q0'] # propensities from RCT
+
+                if sum(pi_test) > 0:  # if treat at all
+                    if self.treatment_n == 'binary':
+                        t_test_sgn = get_sgn_0_1(t_test)  # if evaluating on an rct: need to put this in fulldata dict
+                        return np.mean( y_test * (pi_test - baseline_pol(x_test)) * t_test_sgn / RCT_prop_test )
+                    else:
+                        # \sum_i  Y_i (\pi(T[i]) - \pi_0(T[i]) ) / Q[i,T[i]]
+                        return np.mean(y_test * (pi_test - baseline_pol(x_test, t_test)) / RCT_prop_test)
+                else:
+                    return 0 # assuming baseline is all-control
+            self.get_risk = get_risk
+
+    def calibrate_risk_for_policies(self, GAMS, POLS, unc_set_type, POL_PROB_1, WGHTS_ = opt_wrapper):
+        ''' Compute a calibration plot under "GAMS" gamma parameters for "POLS" policies
+
+        calibration assesses the worst case regret of a policy learned under an assumption of Gamma = Gamma'
+        which would be incurred were the true state of the world Gamma_calib
+        '''
+        RBARS_ = np.zeros([len(GAMS),len(GAMS)])
+        # Post processing, Rbar # compute policy unconfounded  # data_dict: x_, t_sgned_, y_, PI_1, PREF, fq, WGHTS_
+        for ind_gam_k in range(len(GAMS)):
+            for ind_g_calib, gam_calib in enumerate(GAMS):
+                a_calib, b_calib = get_bnds(self.fq, gam_calib)                 # previously was used to calib for IST data # a_calib, b_calib = get_bnds(q0[train_ind]*nominal_selection_[train_ind], gam_calib)
+                if unc_set_type == 'L1-budget':
+                    gammas = self.get_upper_gamma_bnds(a_calib, b_calib)
+                    gamma = gammas
+                else:
+                    gamma = 0
+                if self.treatment_n == 'binary':
+                    RBARS_[ind_gam_k, ind_g_calib] = Rbar(th = POLS[ind_gam_k],
+                                                      x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1,BASELINE_POL= self.baseline_pol,
+                                                      a_bnd = a_calib, b_bnd = b_calib, fq=self.fq, gamma = gamma,WGHTS_ = WGHTS_)
+                else:
+                    RBARS_[ind_gam_k, ind_g_calib] = Rbar_mt(th = POLS[ind_gam_k],
+                                                      x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1,BASELINE_POL= self.baseline_pol,
+                                                      a_bnd = a_calib, b_bnd = b_calib, fq=self.fq, gamma = gamma,WGHTS_ = WGHTS_)
+
+        self.RBARS_ = RBARS_
+        return RBARS_
+    '''
+    Check losses on previous policies 
+    # TODO: fix 
+    '''
+    def reversion_check_optimality(self, ind_g, th, ls_th, a_bnd, b_bnd, gammas, POLS, POL_PROB_1, WGHTS_ = opt_wrapper):
+    #### Reversion for non-fdiv policies
+        RBARS_opt = np.zeros(ind_g)
+
+        for ind_gam_i in range(ind_g):  # for every gamma_i < gamma_k
+            # Evaluate previously found policies on current data; with current Gamma parameter bounds
+            if self.treatment_n=='binary':
+                RBARS_opt[ind_gam_i] = Rbar(th = POLS[ind_gam_i],
+                                        x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1, BASELINE_POL= self.baseline_pol,
+                                        a_bnd = a_bnd, b_bnd = b_bnd, fq = self.fq, gamma = gammas, WGHTS_ = WGHTS_)
+            else:
+                RBARS_opt[ind_gam_i] = Rbar_mt(th = POLS[ind_gam_i],
+                                        x=self.x, t=self.t, y=self.y, POL_PROB_1 = POL_PROB_1, BASELINE_POL= self.baseline_pol,
+                                        a_bnd = a_bnd, b_bnd = b_bnd, fq = self.fq, gamma = gammas, WGHTS_ = WGHTS_)
+        if self.verbose:
+            print RBARS_opt, 'others evaled'
+            print ls_th, 'current loss'
+        if np.min(RBARS_opt) < ls_th:  # if there is a policy achieving better risk, evaluated on this gamma
+            th = POLS[np.argmin(RBARS_opt)]  # set the policy to the one achieving the minimum evaluation
+            if self.verbose:
+                print 'reverting at ' + str(ind_g) + 'to policy learned at ' + str(np.argmin(RBARS_opt))
+        return [th, ls_th]
+
+    ''' revert to control policy (for linear policy, assuming intercept is last coefficient)'''
+    def check_loss_nonnegative(self, robust_th, ls, baseline_th, TOL = 1e-4):
+        if ls > TOL:
+            robust_th = baseline_th
+            if self.verbose:
+                print 'truncating because nonnegative loss'
+            ls = 0 # truncate loss
+        else:
+            robust_th = robust_th
+        return [robust_th, ls]
+
+
+
+    ''' get parameter values for OGD (perhaps clipping) 
+    '''
+    def get_ogd_params(self, a_bnd, b_bnd):
+        D = np.linalg.norm((a_bnd - b_bnd) / sum(a_bnd))
+        G = np.linalg.norm(0.25 * max(np.abs(self.y)) * self.x.shape[1])  # assume we bound the 2-norm of $\Theta$ by p
+        eps = 0.05
+        N_RNDS = np.clip(int(G ** 2 * D ** 2 / eps ** 2), 50, 200)
+        eta = np.clip((D * G) / np.sqrt(N_RNDS), 0.5, 0.6)
+        N_RNDS_tv = np.clip(N_RNDS, 10, 20)  # use OGD parameters
+        return [N_RNDS, eta, N_RNDS_tv]
+
+
+
+''' opt tree wrapper calls
+X__: preprocessed training data (standardize train/test data with same )
+t (unsigned)
+Y
+D (depth)
+nominal_Q_,
+pi_0,
+a_bnd_train,
+b_bnd_train
+Pass a signed treatment vector to the greedy warm start; but use the sharp version of optimal tree
+which uses integer encoding of t
+'''
+def get_opt_tree_policy(BASELINE_POL, x, t, fq, y, depth, a_bnd, b_bnd, sharp = True, verbose = False,TIME_LIMIT = 180, **params):
+    # optimal tree globals
+    # remove intercept
+    x, eps = preprocess_data_oct_pol(x[:,:-1])
+    n = len(y); n_ts = len(np.unique(t))
+    pi_0 = BASELINE_POL(x, t)
+    K=2; N = 2**(depth+1) - 1; N_nodes = np.arange(2**(depth+1) - 1)+1; N_TB = int(np.floor(N/2))
+    branch_nodes = N_nodes[0:(N_TB )]; N_L = N - N_TB
+    leaf_nodes = N_nodes[N_TB:]
+    A_L = [[] for i in range(N)]; A_R = [[] for i in range(N)]
+    [A_L, A_R] = get_ancestors(N_nodes, A_L, A_R)
+    [lowers, uppers] = getterminalindices(N)
+    #### train on optimal tree
+    T_sgned = get_sgn_0_1(t); # this makes a naive binarization
+    y_label = 0
+    mode_y = mode(y)[0][0]
+    L_hat = len(np.where(y == mode_y)[0])*1.0/ len(y)
+    [n, dim_d] = x.shape
+    [tree, warm_a, warm_b, warm_d, warm_z, warm_c ] = greedy_ws(N_TB, N_L, x,
+            y, T_sgned, pi_0, a_bnd, b_bnd, depth, leaf_nodes)
+    warm_c = np.ones(N_L)*0.5
+    # get leaf labels from warm start
+    leaf_labels = np.ones(N_L)*0.5
+    [warm_leaves,leaf_cts] = causal_tree_pred(x, leaf_nodes, warm_a, warm_b, leaf_labels, leaf_lbl=False)
+    # fill warm_z
+    for i in range(n):
+        which_leaf = int(warm_leaves[i])-leaf_nodes[0]
+        warm_z[i,which_leaf] = 1
+    if verbose:
+        print 'ws: ', warm_a, warm_b, warm_d
+        print warm_z.sum(axis=0), 'start leaf node counts'
+    [m_pol,a_pol,b_pol,d_pol, z_pol, c_pol, l_pol, P, lmbda, u, v] = policy_opt_tree_centered_on_p_0(x,
+    y,t,fq, pi_0, a_bnd, b_bnd, A_L, A_R, lowers, uppers, leaf_nodes,
+    branch_nodes, L_hat, N, K, eps, y_label, warm_a, warm_b, warm_d, warm_z, warm_c, sharp = sharp, TIME_LIMIT= TIME_LIMIT)
+    lmbda = lmbda.X
+    a_pol_ = np.zeros([dim_d, N_TB])
+    for i in range(N_TB):
+        a_pol_[:,i] = np.asarray([ a_pol[p,i+1].X for p in range(dim_d) ]).T
+    b_pol_ = [ b_pol[i+1].X for i in range(N_TB) ]
+    leaf_labels = np.asarray([ [c_pol[t_l, t].X for t in range(n_ts)  ]  for t_l in leaf_nodes ])
+#         leaf_labels = [ 0 if c_pol[t_l].X > 0 else 1 for t_l in leaf_nodes ]
+    if verbose:
+        print a_pol_, b_pol_
+        print ' c assignment: ', [c_pol[t_l,t].X for t in range(n_ts)  for t_l in leaf_nodes ]
+        print ' P assignment: ', [P[i,t].X  for t in range(n_ts)  for i in range(n) ]
+        print leaf_labels
+        print 'leaf nonempty', [l_pol[t_l].X for t_l in leaf_nodes ]
+        if sum(b_pol_) < 0.001:
+            print 'empty solution'
+
+    # pickle.dump( {'a': a_pol_,'b': b_pol_, 'leaves': leaf_labels, 'ws-tree': tree}, open('data/out/syn-glb/CR/opt-tree-gam-'+str(gam)+'-rep-'+str(ind_rep+5) + '-'+datetime.now().strftime('%Y-%m-%d-%H-%M-%S')+'.p', 'wb'))
+    # [Y_pred, leaf_node_cts] = causal_tree_pred(x_test, leaf_nodes, a_pol_, b_pol_,leaf_labels)
+    # print leaf_node_cts, 'leaf node cts'
+    # tree_rec = [tree.get_assignment(x_test[i,:]) for i in range(xtest.shape[0]) ]
+    # print 'sum Y pred: ' + str(sum(Y_pred))
+    # print 'sum treerec: ' + str(sum(tree_rec))
+    tree_pol = {'a': a_pol_,'b': b_pol_, 'leaves': leaf_labels, 'leaf_nodes': leaf_nodes}
+    return [tree_pol, lmbda]
\ No newline at end of file
diff --git a/methods_test.py b/methods_test.py
new file mode 100644
index 0000000..94bc36f
--- /dev/null
+++ b/methods_test.py
@@ -0,0 +1,121 @@
+import matplotlib.pyplot as plt
+import numpy as np
+from gurobipy import *
+from scipy.optimize import minimize
+import datetime as datetime
+import pickle
+from matplotlib.backends.backend_pdf import PdfPages
+from matplotlib import collections as matcoll
+from sklearn import svm
+from methods import *
+import os
+import sys
+from unconfoundedness_fns import *
+from subgrad import *
+from scipy.stats import norm
+from scipy.stats import multivariate_normal
+from joblib import Parallel, delayed
+from data_scenarios import *
+
+def real_risk_prob(prob_1, x, u):
+    n = len(u)
+    prob_1 = np.asarray(prob_1)
+    return prob_1 * real_risk_(np.ones(n), x, u) + (1 - prob_1) * real_risk_(np.zeros(n), x, u)
+
+N_REPS = int(sys.argv[1])
+arr = sys.argv[2].split(',')
+GAMS = [float(gam) for gam in arr]
+print(GAMS), 'gammas'
+
+ENN = 4000
+d = 5  # dimension of x
+th_ctrl = np.zeros(d+1)
+th_ctrl[-1] = -1000
+
+beta_cons = 2.5
+beta_x = np.asarray([0, .5, -0.5, 0, 0, 0])
+beta_x_T = np.asarray([-1.5,1,-1.5,1.,0.5,0])
+beta_T = np.asarray([0, .75, -.5,0,-1,0, 0])
+beta_T_conf = np.asarray([0, .75, -.5,0,-1,0])
+mu_x = np.asarray([-1,.5,-1,0,-1]);
+
+dgp_params = {'mu_x':mu_x, 'n':ENN, 'beta_cons':beta_cons, 'beta_x':beta_x, 'beta_x_T':beta_x_T}
+save_params = {'stump':'sharp-test-synthetic/', 'exp_name': 'testing'}
+
+
+opt_config_robust = {'N_RST': 15, 'GRAD_': get_implicit_grad_centered, 'WGHTS_': opt_wrapper,
+                     'GRAD_CTR': get_implicit_grad_centered, 'POL_PROB_1': logistic_pol_asgn,
+                     'POL_GRAD': qk_dpi_dtheta, 'DEFAULT_POL':th_ctrl,
+                     'BASELINE_POL': ctrl_p_1, 'P_1': ctrl_p_1, 'averaging': True, 'give_initial': True,
+                     'sharp': True}
+
+robust_opt_params = {'optimizer': opt_w_restarts, 'pol_opt': 'ogd',
+                     'unc_set_type': 'interval', 'opt_params': opt_config_robust,
+                     'BASELINE_POL': th_ctrl, 'type': 'logistic-interval'}
+
+fdiv_config = {'N_RST':15,'DEFAULT_POL':th_ctrl,
+               'rho':0.5, 'WGHTS_':get_general_interval_wghts_algo_centered_TV_prob, 'GRAD_':get_implicit_grad_centered,
+               'GRAD_CTR':get_implicit_grad_centered, 'POL_PROB_1':logistic_pol_asgn,'POL_GRAD':qk_dpi_dtheta,
+        'BASELINE_POL': ctrl_p_1,'eta_0':0.5, 'averaging':True, 'give_initial':True, 'sharp':True}
+
+fdiv_robust_opt_params = { 'optimizer':opt_w_restarts, 'pol_opt':'ogd',
+                     'unc_set_type':'L1-budget', 'opt_params':fdiv_config,
+                    'BASELINE_POL':th_ctrl, 'type':'logistic-L1-budget-0.5'}
+
+fdiv_config_025 = {'N_RST':15,'DEFAULT_POL':th_ctrl,
+               'rho':0.25, 'WGHTS_':get_general_interval_wghts_algo_centered_TV_prob, 'GRAD_':get_implicit_grad_centered,
+               'GRAD_CTR':get_implicit_grad_centered, 'POL_PROB_1':logistic_pol_asgn,'POL_GRAD':qk_dpi_dtheta,
+        'BASELINE_POL': ctrl_p_1,'eta_0':0.5, 'averaging':True, 'give_initial':True, 'sharp':True}
+
+fdiv_robust_opt_params_025 = { 'optimizer':opt_w_restarts, 'pol_opt':'ogd',
+                     'unc_set_type':'L1-budget', 'opt_params':fdiv_config_025,
+                    'BASELINE_POL':th_ctrl, 'type':'logistic-L1-budget-0.25'}
+
+#########
+## IPW and tree parameters
+opt_config_ipw = {'N_RST':15, 'N_RNDS':50, 'POL_PROB_1':logistic_pol_asgn, 'eta_0':1, 'averaging':True,'DEFAULT_POL':th_ctrl}
+
+ipw_opt_params = { 'optimizer':opt_w_restarts_vanilla_ipw, 'pol_opt':'IPW',
+                     'unc_set_type':'interval', 'opt_params':opt_config_ipw,
+                    'BASELINE_POL':th_ctrl, 'type':'IPW'}
+
+
+methods = ['ogd-interval', 'fdiv-0.5', 'fdiv-0.25']
+method_params = [robust_opt_params, fdiv_robust_opt_params, fdiv_robust_opt_params_025]
+
+
+def gen_data_run_for_gamma_for_joblib_mt(dgp_params, GAMS, real_risk_prob, method_params, ind_rep, gen_data=True,
+                                      save=False, save_params=[], already_gen_data=[]):
+    print ind_rep
+    if gen_data:
+        [x_full, u, T_, Y_, true_Q_, q0] = generate_log_data(**dgp_params)
+    else:
+        [x_full, u, T_, Y_, true_Q_, q0] = already_gen_data
+    np.random.seed(ind_rep)
+    random.seed(ind_rep)
+    train_ind, test_ind = model_selection.train_test_split(range(len(Y_)), test_size=0.9)
+    test_data = {'x_test': x_full[test_ind, :], 't_test': T_[test_ind], 'y_test': Y_[test_ind], 'u_test': u[test_ind]}
+    eval_conf = {'eval': True, 'eval_type': 'true_dgp', 'eval_data': test_data, 'oracle_risk': real_risk_prob_oracle}
+    ConfRobPols = [ConfoundingRobustPolicy(baseline_pol=ctrl_p_1_mt, save_params=save_params, save = True, verbose=True, treatment_n = 'multiple') for method in
+                   method_params]
+    for ind, method_param in enumerate(method_params):
+        ConfRobPols[ind].fit(x_full[train_ind, :], T_[train_ind], Y_[train_ind], q0[train_ind], GAMS, method_param,
+                             eval_conf=eval_conf)
+        del ConfRobPols[ind].x
+        del ConfRobPols[ind].t
+        del ConfRobPols[ind].y
+        del ConfRobPols[ind].eval_data
+
+    return ConfRobPols
+
+
+res = Parallel(n_jobs=-1, verbose=40)(
+    delayed(gen_data_run_for_gamma_for_joblib_mt)(dgp_params, GAMS, real_risk_prob, method_params, i, gen_data=True,
+                                               save=True, save_params=save_params) for i in range(N_REPS))
+
+#     delayed(gen_data_run_for_gamma_for_joblib)(dgp_params, GAMS, real_risk_prob, method_params, i, gen_data=True,
+#                                                save=True, save_params=save_params) for i in range(N_REPS))
+
+pickle.dump(res, open('res-'+save_params['exp_name'] + '.pkl', 'wb'))
+RISKS = [[ res[ind][meth].RISKS for ind in range(N_REPS) ] for meth in range(len(method_params)) ]
+pickle.dump(RISKS, open('risks_joblib_test_opt_tree_mt_50reps.pkl', 'wb') )
diff --git a/subgrad.py b/subgrad.py
new file mode 100644
index 0000000..d6a0356
--- /dev/null
+++ b/subgrad.py
@@ -0,0 +1,1035 @@
+"""
+@author: Angela Zhou
+"""
+import numpy as np
+import gurobipy as gp
+from unconfoundedness_fns import *
+import matplotlib.pyplot as plt
+from scipy.optimize import minimize
+from datetime import datetime
+import random
+import sys
+
+
+
+def log_progress(sequence, every=None, size=None, name='Items'):
+    from ipywidgets import IntProgress, HTML, VBox
+    from IPython.display import display
+
+    is_iterator = False
+    if size is None:
+        try:
+            size = len(sequence)
+        except TypeError:
+            is_iterator = True
+    if size is not None:
+        if every is None:
+            if size <= 200:
+                every = 1
+            else:
+                every = int(size / 200)     # every 0.5%
+    else:
+        assert every is not None, 'sequence is iterator, set every'
+
+    if is_iterator:
+        progress = IntProgress(min=0, max=1, value=1)
+        progress.bar_style = 'info'
+    else:
+        progress = IntProgress(min=0, max=size, value=0)
+    label = HTML()
+    box = VBox(children=[label, progress])
+    display(box)
+
+    index = 0
+    try:
+        for index, record in enumerate(sequence, 1):
+            if index == 1 or index % every == 0:
+                if is_iterator:
+                    label.value = '{name}: {index} / ?'.format(
+                        name=name,
+                        index=index
+                    )
+                else:
+                    progress.value = index
+                    label.value = u'{name}: {index} / {size}'.format(
+                        name=name,
+                        index=index,
+                        size=size
+                    )
+            yield record
+    except:
+        progress.bar_style = 'danger'
+        raise
+    else:
+        progress.bar_style = 'success'
+        progress.value = index
+        label.value = "{name}: {index}".format(
+            name=name,
+            index=str(index or '?')
+        )
+
+''' get gradient of pi wrt theta for logistic policy
+where pi = Pr[pi=1|x]
+'''
+def qk_dpi_dtheta(pi_1, pol_theta, x):
+    n = len(pi_1); dg_dz = np.multiply(pi_1,np.ones(len(pi_1))-pi_1)
+    return np.multiply(dg_dz[:,np.newaxis], x)
+    #return np.diag(dg_dz).dot( x ).reshape(x.shape) # n x p: enforce jic
+
+
+# """ get gradient of pi wrt theta for logistic policy
+# where pi = Pr[pi=1|x]
+# """
+# def qk_dpi_dtheta_mt(pi_1, pol_theta, x):
+#     n = len(pi_1);
+#     K = pol_theta.shape[1]
+#     grad = np.zeros(pol_theta.shape)
+#     for a in range(K):
+#         dg_dz = np.multiply(pi_1[:,a],np.ones(n)-pi_1[:,a])
+#         grad[:,a] = np.dot(dg_dz[:, np.newaxis].T, x)
+#     return grad
+
+""" get gradient of pi wrt theta for logistic policy
+where pi = Pr[pi=1|x]
+"""
+def qk_dpi_dtheta_mt_scalar(pi_1, x):
+    return pi_1*(1-pi_1) * x
+
+"""
+read in callbacks for derivative of pi given theta and optimal w, t
+PI_1, POL_GRAD (returns (p x 1) vector)
+take in ** normalized weights W **
+"""
+def get_implicit_grad_centered(pol_theta, PI_1, POL_GRAD, x, Y, t01, W):
+    # if need to get active index set
+    # rescaled weights in original
+    n = len(W); T_sgned = get_sgn_0_1(t01)
+    constants = np.multiply(Y, np.multiply(T_sgned,W))
+    policy_x = PI_1(pol_theta, x) # 1 x n
+    dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p
+    if x.ndim > 1:
+        return np.multiply(constants[:,np.newaxis], dpi_dtheta).sum(axis=0)
+    else:
+        return np.sum(np.multiply(constants,dpi_dtheta))
+
+"""
+returns gradient for multiple treatments case
+read in callbacks for derivative of pi given theta and optimal w, t
+theta is a d x K array 
+PI_1 returns probability pi = Pr[ A_i | x ] under policy   
+POL_GRAD returns gradient for observed A_i (returns (p x 1) vector)
+take in *** normalized weights W ***
+"""
+def get_implicit_grad_centered_mt(pol_theta, PI_1, POL_GRAD_scalar, x, Y, t01, W):
+    n = len(W); #t_levels=np.unique(t01)
+    constants = np.multiply(Y, W)
+    policy_x = PI_1(pol_theta, x, t01)
+        # np.asarray([ PI_1(pol_theta[:,k], x) for k in t01 ]).flatten() # k x n
+    dpi_dtheta = np.asarray([ POL_GRAD_scalar( policy_x[i], x[i,:] ) for i in range(n) ] )
+    # call qk_dpi_dtheta for pol_theta[a_i] for each datapoint
+    K = pol_theta.shape[1]
+    grad = np.zeros(pol_theta.shape)
+    for a in range(K):
+        if x.ndim > 1:
+            grad[:,a] = np.multiply(constants[(t01 == a), np.newaxis], dpi_dtheta[t01 == a,:]).sum(axis=0)
+        else:
+            grad[:,a] = np.sum(np.multiply(constants[t01 == a], dpi_dtheta[t01 == a]))
+    # print grad.shape
+    return grad
+
+"""
+find value of centered estimator, evaluated against a benchmark policy which assigns
+Pi(x) = 1 w.p p_1 for all x
+"""
+def centered_around_p1(a_bnd, b_bnd, Y_T, pi_1, p_1):
+    return find_opt_robust_ipw_val(np.multiply(Y_T, (pi_1 - p_1)), a_bnd, b_bnd, shorter=True)
+
+
+def plot_W_GDS(p_ths, W_GDs):
+    plot(p_ths, W_GDs[:,0])
+    for i in range(len(p_ths)):
+        plot([p_ths[i]-0.5, p_ths[i]+0.5], [W_GDs[i,0]-W_GDs[i,1]*0.5, W_GDs[i,0]+W_GDs[i,1]*0.5], c='b',alpha=0.1)
+
+""" test gradient fn for th, given vector of assignments p_1
+"""
+def test_subgrad_for_th(p_th, p_1, PI_1, POL_GRAD, x, y, t01):
+    n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01);
+    [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd)
+    grad = get_implicit_grad_centered(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum())
+    return [lda_opt,grad]
+
+""" test gradient fn for th, regret against the anti-policy -Pi
+"""
+def test_subgrad_for_anti(p_th, p_1, PI_1, POL_GRAD, x, y, t01):
+    n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01);
+    [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd)
+    grad = get_implicit_grad_centered_anti_pi(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum())
+    return [lda_opt,grad]
+
+
+
+""" centered problem with weights
+assume given pi_1 vector
+max (pi_1 - p_1)YT W
+"""
+def get_general_interval_wghts_algo_centered_TV_prob(gamma, Y, a_, b_, fq, quiet=True):
+    wm = 1/fq; wm_sum=wm.sum(); n = len(Y)
+    wm = wm/wm_sum # normalize propensities
+    # assume estimated propensities are probs of observing T_i
+    y = Y; weights = np.zeros(n);
+    m = gp.Model()
+    if quiet: m.setParam("OutputFlag", 0)
+    t = m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS)
+    w = [m.addVar(obj = -yy, lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y]
+    d = [m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y]
+    m.update()
+    m.addConstr(gp.quicksum(w)==1)
+    m.addConstr(gp.quicksum(d)<=gamma*t)
+    for i in range(len(y)):
+        m.addConstr(w[i] <= b_[i] * t/wm_sum)
+        m.addConstr(w[i] >= a_[i] * t/wm_sum)
+        m.addConstr(d[i] >=   w[i] - t*wm[i])
+        m.addConstr(d[i] >= - w[i] + t*wm[i])
+    m.optimize()
+    wghts = np.asarray([ ww.X for ww in w ]) # would like to have failsafe for not being able to optimize
+    return [-m.ObjVal,wghts,t.X/wm_sum]
+
+"""
+read in callbacks for derivative of pi given theta and optimal w, t
+Y(Pi) - Y(-Pi)
+PI_1, POL_GRAD (returns (p x 1) vector)
+take in ** normalized weights W **
+####
+DEPRECATED beceause slow/memory using 
+Use get_implicit_grad_centered instead
+"""
+# def get_implicit_grad_centered_anti_pi(pol_theta, PI_1, POL_GRAD, x, Y, t01, W):
+#     n = len(W); T_sgned = get_sgn_0_1(t01)
+#     dc_dpi = np.diag(2*Y*T_sgned)
+#     policy_x = PI_1(pol_theta, x) # 1 x n
+#     dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p
+#     return dc_dpi.dot(dpi_dtheta).T.dot(W) #! double check
+
+def logistic_pol_asgn(theta, x):
+    ''' Requires an intercept term
+    '''
+    n = x.shape[0]
+    theta = theta.flatten()
+    if len(theta) == 1:
+        logit = np.multiply(x, theta).flatten()
+    else:
+        logit = np.dot(x, theta).flatten()
+    LOGIT_TERM_POS = np.ones(n)*1.0 / ( np.ones(n) + np.exp( -logit ))
+    return LOGIT_TERM_POS
+''' wrapper to align arguments with the budgeted case
+'''
+def opt_wrapper(gamma, y, a_bnd, b_bnd, fq):
+    return find_opt_weights_shorter(y, a_bnd, b_bnd)
+
+def anti_p_1(pi_1):
+    return np.ones(len(pi_1))-pi_1 # anti policy
+def ctrl_p_1(pi_1):
+    return np.zeros(len(pi_1))
+def tmnt_p_1(pi_1):
+    return np.ones(len(pi_1))
+
+'''
+input: arbitrary policy vector (to get shape) 
+return 1 where treatment pattern is 0 (control
+'''
+def ctrl_p_1_mt(x,t01):
+    # treatment probs 0
+    pi_0 = np.zeros(x.shape[0])
+    pi_0[t01==0] = 1 #  control treatment with probablity 1
+    return pi_0
+
+
+
+
+""" subgrad descent template algo
+automatically augments data !
+take in theta_0, # rounds
+WGHTS_: fn obtaining optimal weights
+GRAD_: fn to obtain parametric subgradient
+POL_GRAD: gradient of parametrized policy wrt parameters
+PI_1: return prob of pi(x) = 1
+p_1: pi_0 probability of t = 1
+"""
+def grad_descent(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t01, fq, y,
+ a_, b_, gamma,eta_0,logging=False,step_schedule=0.5,linesearch=True):
+    n = x.shape[0]; t = get_sgn_0_1(t01)
+    assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_])
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    # x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS;
+
+    def opt_wrapper_here(th):#linesearch wrapper
+        pi_1 = PI_1(th, x_aug).flatten();
+        p_1 = P_1(pi_1);
+        [lda_opt, wghts, wghts_sum] = WGHTS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq)
+        return lda_opt
+
+    for k in range(N_RNDS) :
+        # print k; sys.stdout.flush()
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule); eta_t_og = eta_t
+        pi_1 = PI_1(th, x_aug).flatten();
+        p_1 = P_1(pi_1);
+        [lda_opt, wghts, wghts_sum] = WGHTS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq)
+        subgrad = GRAD_(th, PI_1, POL_GRAD, x_aug, y, t, wghts*1.0 / wghts_sum )
+        if (linesearch==True): # Armijo LineSearch
+            # print 'ls'; sys.stdout.flush()
+            ALS = ArmijoLineSearch(tfactor = 0.2)
+            d = -subgrad
+            slope = np.dot(subgrad, d)
+            eta_t = ALS.search(opt_wrapper_here, th, d, slope, f = lda_opt)
+        # print eta_t, 'eta_t'; sys.stdout.flush()
+        if eta_t is not None:
+            th = th - eta_t * subgrad
+        else:
+            th = th - eta_t_og * subgrad
+#         oosrisks[k] = np.mean( PI_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] );
+        THTS[k] = th; PSTARS[k,:] = wghts.flatten(); losses[k] = lda_opt
+    return [losses, THTS, PSTARS]
+
+""" subgrad descent template algo
+This is the sharp version
+which computes weights separately in each T=1, T=-1 component
+automatically augments data !
+take in theta_0, # rounds
+WGHTS_: fn obtaining optimal weights
+This weight function is overloaded to accommodate weight functions that take in extra parameters
+about the uncertainty set
+GRAD_: fn to obtain parametric subgradient
+POL_GRAD: gradient of parametrized policy wrt parameters
+PI_1: return prob of pi(x) = 1
+p_1: pi_0 probability of t = 1
+"""
+def grad_descent_sharp(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t01, fq, y,
+ a_, b_, gamma,eta_0,logging=False,step_schedule=0.5,linesearch=True):
+    n = x.shape[0]; t = get_sgn_0_1(t01)
+    t_levels = np.unique(t01)
+    assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_])
+    # Check if x data contains an intercept: only retain for backwards compatibility:
+    # If last column is all ones, don't augment
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS;
+    if not hasattr(gamma, "__len__"):
+        gammas = gamma * np.ones(len(t_levels))
+    else:
+        gammas = gamma
+    def opt_wrapper_here(th):#linesearch wrapper
+        pi_1 = POL_PROB_1(th, x_aug).flatten();
+        p_1 = BASELINE_POL(pi_1);
+        [lda_opt_neg1, wghts_neg1, wghts_sum_neg1] = WGHTS_(gammas[0], np.multiply(-1*y[t == -1], pi_1[t == -1] - p_1[t == -1]), a_[t == -1], b_[t == -1], fq[t == -1])
+        [lda_opt_1, wghts_1, wghts_sum_1] = WGHTS_(gammas[1], np.multiply(y[t == 1], pi_1[t == 1] - p_1[t == 1]), a_[t == 1], b_[t == 1], fq[t == 1])
+
+        wghts_total[t==1] = wghts_1*1.0 / wghts_sum_1; wghts_total[t==-1] = wghts_neg1*1.0 / wghts_sum_neg1;
+
+        return lda_opt_1 + lda_opt_neg1
+
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule); eta_t_og = eta_t
+        pi_1 = POL_PROB_1(th, x_aug).flatten();
+        p_1 = BASELINE_POL(pi_1);
+        # Modification for sharpness: Call the weight subroutine within treated and untreated groups separately
+        wghts_total = np.zeros(len(t)); subgrad_total = np.zeros(len(t))
+        # compute for T=1
+        #TODO: modify for general case, mult T
+        [lda_opt_1, wghts_1, wghts_sum_1] = WGHTS_(gammas[1], np.multiply(y[t == 1], pi_1[t == 1] - p_1[t == 1]), a_[t == 1], b_[t == 1], fq[t == 1])
+        # compute for T=-1
+        [lda_opt_neg1, wghts_neg1, wghts_sum_neg1] = WGHTS_(gammas[0], np.multiply(-1*y[t == -1], pi_1[t == -1] - p_1[t == -1]), a_[t == -1], b_[t == -1], fq[t == -1])
+        wghts_total[t==1] = wghts_1*1.0 / wghts_sum_1; wghts_total[t==-1] = wghts_neg1*1.0 / wghts_sum_neg1;
+        subgrad = GRAD_(th, POL_PROB_1, POL_GRAD, x_aug, y, t, wghts_total*1.0 / np.sum(wghts_total) )
+        lda_opt = lda_opt_1 + lda_opt_neg1
+        if (linesearch==True): # Armijo LineSearch
+            ALS = ArmijoLineSearch(tfactor = 0.2, default = eta_t)
+            d = -subgrad
+            slope = np.dot(subgrad, d)
+            eta_t = ALS.search(opt_wrapper_here, th, d, slope, f = lda_opt)
+        # print eta_t, 'eta_t'; sys.stdout.flush()
+        if eta_t is not None:
+            th = th - eta_t * subgrad
+        else:
+            th = th - eta_t_og * subgrad
+#         oosrisks[k] = np.mean( POL_PROB_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] );
+        THTS[k] = th; PSTARS[k,:] = wghts_total.flatten(); losses[k] = lda_opt
+    return [losses, THTS, PSTARS]
+
+
+
+# random restarts
+''' refactored signature
+anti_p_1 -> pi_0
+'''
+def opt_w_restarts_rf(N_RST, th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, pi_0,
+X, T, Y, q0, a_bnd, b_bnd, gamma,
+eta_0,logging=False,step_schedule=0.5, averaging = False, give_initial=False):
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        random.seed(j)
+        if not give_initial:
+            th_0 = np.random.randn(x.shape[1]+1);
+        else:
+            th_0 = th
+        [oosrisks, losses, THTS, PSTARS] = grad_descent(th_0, N_RNDS, WGHTS_,
+        GRAD_, POL_GRAD, PI_1, pi_0, X, T, Y, q0, a_bnd, b_bnd, gamma,
+        eta_0,step_schedule)
+        if averaging: #average losses: OGD rule
+            ls[j] = np.mean(losses); ths[j] = sum(THTS)/len(THTS)
+        else:
+        # return the best so far, not last
+            best_so_far = np.argmin(losses)
+            ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+# random restarts
+def opt_w_restarts(N_RST, th,
+                   N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, # specify policy functions, gradients, weight function s
+                   x, t, fq, y, a_bnd, b_bnd, gamma, eta_0, # give data
+        logging=False, step_schedule=0.5, averaging = False, give_initial=False, sharp = False, **kwargs): # other opt settings
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        random.seed(j)
+        if (give_initial) and (j==0):
+            th_0 = th
+        else:
+            th_0 = np.random.randn(x.shape[1])*0.25;
+        if sharp:
+            # print 'using sharp estimator'
+            [losses, THTS, PSTARS] = grad_descent_sharp(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule)
+        else:
+            [losses, THTS, PSTARS] = grad_descent(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule)
+        if averaging: #average losses: OGD rule
+            ls[j] = np.mean(losses); ths[j] = sum(THTS)/len(THTS)
+        else:
+            best_so_far = np.argmin(losses) # return the best so far, not last
+            ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    if logging:
+        print ls, 'opt losses'
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+
+""" Multiple treatments, optimize with restarts  
+"""
+def opt_w_restarts_mt(N_RST, th,
+                   N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, # specify policy functions, gradients, weight function s
+                   x, t, fq, y, a_bnd, b_bnd, gamma, eta_0, # give data
+        logging=False, step_schedule=0.5, averaging = False, give_initial=False, sharp = False, **kwargs): # other opt settings
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    n_ts = len(np.unique(t))
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        random.seed(j)
+
+        if (give_initial) and (j==0):
+            th_0 = th
+        else:
+            th_0 = np.random.randn(x.shape[1],n_ts)*0.25;
+
+        [losses, THTS, PSTARS] = grad_descent_sharp_mt(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_1, BASELINE_POL, x, t, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule)
+
+        if averaging: #average losses: OGD rule
+            ls[j] = np.mean(losses); ths[j] = sum(THTS)/len(THTS)
+        else:
+            best_so_far = np.argmin(losses) # return the best so far, not last
+            ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    if logging:
+        print ls, 'opt losses'
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+""" logistic pol assign observed treatment A_i
+"""
+
+
+""" 
+subgrad descent template algo
+This is the sharp version for multiple treatments 
+which computes weights separately in each T=1, T=-1 component
+automatically augments data !
+take in theta_0, # rounds
+WGHTS_: fn obtaining optimal weights
+This weight function is overloaded to accommodate weight functions that take in extra parameters
+about the uncertainty set
+GRAD_: fn to obtain parametric subgradient
+POL_GRAD: gradient of parametrized policy wrt parameters
+POL_PROB_A: return prob of pi(x) = A_i (for observed A_i)
+BASELINE_POL: pi_0 probability of pi_0 = A_i (for observed A_i)
+"""
+def grad_descent_sharp_mt(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, POL_PROB_A, BASELINE_POL, x, t01, fq, y,
+ a_, b_, gamma,eta_0,logging=False,step_schedule=0.5,linesearch=False):
+    n = x.shape[0]; #t = get_sgn_0_1(t01)
+    t_levels = np.unique(t01)
+    assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_])
+    # Check if x data contains an intercept: only retain for backwards compatibility:
+    # If last column is all ones, don't augment
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS;
+    if not hasattr(gamma, "__len__"): # Default to repeating gamma if not specified
+        gammas = gamma * np.ones(len(t_levels))
+    else:
+        gammas = gamma
+    def opt_wrapper_here(th):#linesearch wrapper
+        pi_1 = POL_PROB_A(th, x_aug, t01).flatten();
+        p_1 = BASELINE_POL(pi_1, t01);
+        wghts_total = np.zeros(len(t01)); lda_opt_total = 0
+        for ind,t_l in range(t_levels):
+            [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]), a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l])
+            wghts_total[t01==t_l] = wghts*1.0 / wghts_sum;
+            lda_opt_total += lda_opt
+        return lda_opt_total
+
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule); eta_t_og = eta_t
+        pi_1 = POL_PROB_A(th, x_aug, t01).flatten();
+        p_1 = BASELINE_POL(pi_1, t01);
+        # Modification for sharpness:
+        # Call the weight subroutine within each treatment partition separately
+        wghts_total_norm = np.zeros(len(t01));
+        lda_opt_total = 0
+        #TODO: modify for general case, mult T
+        for ind,t_l in enumerate(t_levels):
+            # print 'pi_1', pi_1
+            # print pi_1[t01 == t_l]
+            # print 'th', th
+            # print p_1[t01 == t_l]
+            [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]), a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l])
+            wghts_total_norm[t01==t_l] = wghts*1.0 / wghts_sum;
+            lda_opt_total += lda_opt
+
+        subgrad = GRAD_(th, POL_PROB_A, POL_GRAD, x_aug, y, t01, wghts_total_norm )
+
+        if (linesearch==True): # Armijo LineSearch
+            ALS = ArmijoLineSearch(tfactor = 0.2, default = eta_t)
+            d = -subgrad
+            slope = np.dot(subgrad, d)
+            eta_t = ALS.search(opt_wrapper_here, th, d, slope, f = lda_opt_total)
+        if eta_t is not None:
+            th = th - eta_t * subgrad
+        else:
+            th = th - eta_t_og * subgrad
+        THTS[k] = th; PSTARS[k,:] = wghts_total_norm.flatten(); losses[k] = lda_opt_total
+    return [losses, THTS, PSTARS]
+
+''' objective evaluation for multiple treatments 
+'''
+def mt_obj_eval(th, *args):  # linesearch wrapper
+    [WGHTS_, GRAD_, POL_GRAD, POL_PROB_A, BASELINE_POL, x, t01, fq, y, a_, b_, gamma] = args
+    t_levels = np.unique(t01)
+    if not hasattr(gamma, "__len__"): # Default to repeating gamma if not specified
+        gammas = gamma * np.ones(len(t_levels))
+    else:
+        gammas = gamma
+    n = x.shape[0]; #t = get_sgn_0_1(t01)
+    assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_])
+    # Check if x data contains an intercept: only retain for backwards compatibility:
+    # If last column is all ones, don't augment
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    pi_1 = POL_PROB_A(th, x_aug, t01).flatten();
+    p_1 = BASELINE_POL(pi_1, t01);
+    wghts_total = np.zeros(len(t01));
+    lda_opt_total = 0
+    for ind, t_l in enumerate(t_levels):
+        [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind],
+        np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]),
+        a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l])
+        wghts_total[t01 == t_l] = wghts * 1.0 / wghts_sum;
+        lda_opt_total += lda_opt
+    return lda_opt_total
+
+''' gradient evaluation for multiple treatments 
+'''
+def mt_grad_eval(th, *args):  # linesearch wrapper
+    [WGHTS_, GRAD_, POL_GRAD, POL_PROB_A, BASELINE_POL, x, t01, fq, y, a_, b_, gamma] = args
+    n = x.shape[0]; #t = get_sgn_0_1(t01)
+    t_levels = np.unique(t01)
+    if not hasattr(gamma, "__len__"): # Default to repeating gamma if not specified
+        gammas = gamma * np.ones(len(t_levels))
+    else:
+        gammas = gamma
+    assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_])
+    # Check if x data contains an intercept: only retain for backwards compatibility:
+    # If last column is all ones, don't augment
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    wghts_total_norm = np.zeros(len(t01));
+    lda_opt_total = 0
+    # TODO: modify for general case, mult T
+    pi_1 = POL_PROB_A(th, x_aug, t01).flatten();
+    p_1 = BASELINE_POL(pi_1, t01);
+    for ind, t_l in enumerate(t_levels):
+        [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind],
+                                             np.multiply(y[t01 == t_l], pi_1[t01 == t_l] - p_1[t01 == t_l]),
+                                             a_[t01 == t_l], b_[t01 == t_l], fq[t01 == t_l])
+        wghts_total_norm[t01 == t_l] = wghts * 1.0 / wghts_sum;
+        lda_opt_total += lda_opt
+    subgrad = GRAD_(th, POL_PROB_A, POL_GRAD, x_aug, y, t01, wghts_total_norm)
+    return subgrad
+
+# def grad_descent_vanilla_ipw(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5):
+def grad_descent_vanilla_ipw(th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5):
+    n = x.shape[0];
+    assert all(len(arr) == n for arr in [x,t,fq,y])
+    # If last column is all ones, don't augment
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS)
+    fq_norm = fq/np.sum(fq)
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3);
+        pi_1 = POL_PROB_1(th, x_aug).flatten();
+        pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] )
+        loss = np.sum( y*pi_t/fq_norm  )
+        pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug)
+        pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] )
+        subgrad = (y/fq_norm).dot(pi_t_grad).T
+        th = th - eta_t * subgrad
+        THTS[k] = th; losses[k] = loss
+    return [oosrisks, losses, THTS, PSTARS]
+
+# random restarts
+def opt_w_restarts_vanilla_ipw(N_RST, th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5,**params):
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        # assume data has intercept
+        th_0 = np.random.randn(x.shape[1]);
+        [oosrisks, losses, THTS, PSTARS] = grad_descent_vanilla_ipw(th_0, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule)
+        ls[j]=losses[-1]; ths[j] = THTS[-1]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+''' linear CATE projection 
+'''
+def grad_descent_linearproj_CATE(th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5):
+    n = x.shape[0];
+    assert all(len(arr) == n for arr in [x,t,fq,y])
+    # If last column is all ones, don't augment
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS)
+    fq_norm = fq/np.sum(fq)
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3);
+        pi_1 = POL_PROB_1(th, x_aug).flatten();
+        # pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] )
+        loss = np.sum( pi_1 * y ) # assume y is CATE estimate
+        pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug)
+        # pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] )
+        subgrad = (y).dot(pi_1_grad).T
+        th = th - eta_t * subgrad
+        THTS[k] = th; losses[k] = loss
+    return [oosrisks, losses, THTS, PSTARS]
+
+
+def opt_w_restarts_generic(GRAD_DESCENT, N_RST, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5,**params):
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        # assume data has intercept
+        th_0 = np.random.randn(x.shape[1]);
+        [oosrisks, losses, THTS, PSTARS] = GRAD_DESCENT(th_0, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule)
+        ls[j]=losses[-1]; ths[j] = THTS[-1]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+
+# def grad_descent_vanilla_ipw(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5):
+def grad_descent_vanilla_ipw_centered(th, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5):
+    n = x.shape[0];
+    assert all(len(arr) == n for arr in [x,t,fq,y])
+    x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS)
+    fq_norm = fq #/np.sum(fq)
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3);
+        pi_1 = PI_1(th, x_aug).flatten();
+        p_1 = P_1(pi_1);
+        pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] )
+        loss = np.sum( y*(pi_t-p_1)/fq_norm  )
+        pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug)
+        pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] )
+        subgrad = (y/fq_norm).dot(pi_t_grad).T
+        th = th - eta_t * subgrad
+        THTS[k] = th; losses[k] = loss
+    return [oosrisks, losses, THTS, PSTARS]
+
+def grad_descent_policynorm_ipw(th, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5):
+    n = x.shape[0];
+    assert all(len(arr) == n for arr in [x,t,fq,y])
+    x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS)
+    fq_norm = fq#/np.sum(fq)
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3);
+        pi_1 = PI_1(th, x_aug).flatten();
+        pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] )
+        policy_norm_denom = np.sum( pi_t / fq_norm )
+        loss = np.sum( y*pi_t/fq_norm  ) / policy_norm_denom # self normalized version
+        pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] )
+        subgrad = (y/fq_norm).dot(pi_t_grad).T / policy_norm_denom - (loss/policy_norm_denom)* (1/fq_norm).dot(pi_t_grad).T
+        th = th - eta_t * subgrad
+        THTS[k] = th; losses[k] = loss
+    return [oosrisks, losses, THTS, PSTARS]
+# random restarts
+def opt_w_restarts_vanilla_ipw_centered(N_RST, th, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,
+logging=False,step_schedule=0.5,normalized_policy=False):
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        th_0 = np.random.randn(x.shape[1]+1);
+        if normalized_policy:
+            [oosrisks, losses, THTS, PSTARS] = grad_descent_policynorm_ipw(th_0, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule)
+        else:
+            [oosrisks, losses, THTS, PSTARS] = grad_descent_vanilla_ipw(th_0, N_RNDS, PI_1, P_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule)
+        ls[j]=losses[-1]; ths[j] = THTS[-1]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+
+# random restarts
+def opt_w_restarts_sc_min(N_RST, FN_, JAC_, d, args_):
+    ls = np.zeros(N_RST); ths = [None] * N_RST
+    for j in range(N_RST):
+        th_0 = np.random.randn(d+1);
+        res = minimize(FN_, th_0, jac = JAC_, method='L-BFGS-B', args = tuple(args_),options={'disp': True})
+        ls[j]=FN_(res.x, *args_); ths[j] = res.x
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+
+""" general template algorithm
+"""
+def grad_descent_template(th, N_RNDS, LOSS_, GRAD_, POL_GRAD, PI_1, P_1, x, t01, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5):
+    n = x.shape[0]; t = get_sgn_0_1(t01)
+    assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_])
+    x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS)
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3);
+        pi_1 = PI_1(th, x_aug).flatten();
+        p_1 = P_1(pi_1);
+        loss = LOSS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq)
+        subgrad = GRAD_(th, PI_1, POL_GRAD, x_aug, y, t, wghts*1.0 / wghts_sum )
+        th = th - eta_t * subgrad
+#         oosrisks[k] = np.mean( PI_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] );
+        THTS[k] = th; PSTARS[k,:] = wghts.flatten(); losses[k] = lda_opt
+        if k > 0:
+            if np.isclose(lda_opt, losses[k-1], atol = 0.0001):
+                return [oosrisks, losses[0:k], THTS[0:k], PSTARS]
+    return [oosrisks, losses, THTS, PSTARS]
+
+
+#### Functions suitable for use with lbfgs
+def opt_wrapper_sc(th, *args):
+    C = 0.05
+    PI_1 = args[0]; x_aug=args[1]; y=args[2]; t_sgned=args[3]; fq = args[4]; P_1 = args[5]; a_bnd = args[6]; b_bnd = args[7]; n = len(y)
+    pi_1 = PI_1(th, x_aug).flatten(); p_1 = P_1(pi_1);
+    [lda_opt, wghts, wghts_sum]= find_opt_weights_shorter(np.multiply(y*t_sgned, pi_1 - p_1), a_bnd, b_bnd)
+    return lda_opt + C*np.linalg.norm(th,2)**2
+
+def get_implicit_grad_centered_sc(th, *args):
+    C = 0.05
+    PI_1 = args[0]; x_aug=args[1]; y=args[2]; t_sgned=args[3]; fq = args[4]; P_1 = args[5]; a_bnd = args[6]; b_bnd = args[7]; POL_GRAD = args[8]; n = len(y)
+    pi_1 = PI_1(th, x_aug) # 1 x n
+    p_1 = P_1(pi_1);
+    [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t_sgned, pi_1 - p_1), a_bnd, b_bnd)
+    W = wghts / wghts_sum
+    # more efficiently compute constants:
+    constants = np.multiply(y, np.multiply(t_sgned,W))
+    policy_x = PI_1(th, x_aug) # 1 x n
+    dpi_dtheta = POL_GRAD(policy_x, th, x_aug) # n x p
+    if x_aug.ndim > 1:
+        return np.multiply(constants[:,np.newaxis], dpi_dtheta).sum(axis=0)
+    else:
+        return np.sum(np.multiply(constants,dpi_dtheta))
+
+def get_implicit_grad_centered_anti_pi_sc(th, *args):
+    C = 0.05
+    PI_1 = args[0]; x_aug=args[1]; y=args[2]; t_sgned=args[3]; fq = args[4]; P_1 = args[5]; a_bnd = args[6]; b_bnd = args[7]; POL_GRAD = args[8]; n = len(y)
+    pi_1 = PI_1(th, x_aug) # 1 x n
+    p_1 = P_1(pi_1);
+    [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t_sgned, pi_1 - p_1), a_bnd, b_bnd)
+    W = wghts / wghts_sum
+    dc_dpi = np.diag(2*y*t_sgned)
+    dpi_dtheta = POL_GRAD(pi_1, th, x_aug) # n x p
+    return dc_dpi.dot(dpi_dtheta).T.dot(W) + 2*C*th #! double check
+
+""" gamma infinity subgradient descent
+"""
+def grad_descent_gam_inf(th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, P_1, x, t01, fq, y, a_, b_, gamma,eta_0,logging=False,step_schedule=0.5):
+    n = x.shape[0]; t = get_sgn_0_1(t01)
+    assert all(len(arr) == n for arr in [x,t01,fq,y,a_,b_])
+    x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS)
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, step_schedule);
+        pi_1 = PI_1(th, x_aug).flatten();
+        p_1 = P_1(pi_1);
+        options = np.multiply(y*t, pi_1 - p_1)
+        lda_opt = np.max( options ); best_ind = np.argmax( options )
+        # [lda_opt, wghts, wghts_sum] = WGHTS_(gamma, np.multiply(y*t, pi_1 - p_1), a_, b_, fq)
+        subgrad = y[best_ind]*t[best_ind] * pi_1[best_ind] * (1 - pi_1[best_ind]) * x_aug[best_ind, :]
+        th = th - eta_t * subgrad
+#         oosrisks[k] = np.mean( PI_1(th, x_test_aug)*y_test[t01==1]/true_Q_test[t01==1]+(1-PI_1(th, x_test_aug))*y_test[t01==0]/true_Q_test[t01==0] );
+        THTS[k] = th;
+        # PSTARS[k,:] = wghts.flatten();
+        losses[k] = lda_opt
+    return [oosrisks, losses, THTS, PSTARS]
+
+def opt_w_restarts_gam_inf(N_RST, th, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, anti_p_1, x, t_sgned, fq, y, a_bnd, b_bnd, gamma,eta_0,logging=False,step_schedule=0.5):
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        th_0 = np.random.randn(x.shape[1]+1);
+        [oosrisks, losses, THTS, PSTARS] = grad_descent_gam_inf(th_0, N_RNDS, WGHTS_, GRAD_, POL_GRAD, PI_1, anti_p_1, x, t_sgned, fq, y, a_bnd, b_bnd, gamma,eta_0,step_schedule)
+        # return the best so far, not last
+        best_so_far = np.argmin(losses)
+        ls[j]=losses[best_so_far]; ths[j] = THTS[best_so_far]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+'''
+Evaluate loss of policy th under different bounds a_, b_ (corresponding to a different value of Gamma)
+PI_1 policy prob: return probability pi(x) = 1
+p_1: baseline probability
+a_, b_: bounds corresponding to Gamma being assessed (e.g. Un)
+gamma: effective gamma bound (no-op for unbudgeted uncertainty set) 
+'''
+def Rbar(th, x, t, y, POL_PROB_1, BASELINE_POL, a_bnd, b_bnd, fq, gamma = 0, WGHTS_ = opt_wrapper, **kwargs ):
+    pi_1 = np.asarray(POL_PROB_1(th, x)).flatten()
+    t_levels = np.unique(t)
+    p_1 = BASELINE_POL(pi_1)
+    # old # [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter( np.multiply(y*t, pi_1 - p_1), a_, b_)
+    if not hasattr(gamma, "__len__"):
+        gammas = gamma * np.ones(len(t_levels))
+    else:
+        gammas = gamma
+    [lda_opt_neg1, wghts_neg1, wghts_sum_neg1] = WGHTS_(gammas[0],np.multiply(-1*y[t == -1], pi_1[t == -1] - p_1[t == -1]), a_bnd[t == -1], b_bnd[t == -1], fq[t == -1])
+    [lda_opt_1, wghts_1, wghts_sum_1] = WGHTS_(gammas[1], np.multiply(y[t == 1], pi_1[t == 1] - p_1[t == 1]), a_bnd[t == 1], b_bnd[t == 1], fq[t == 1])
+    return lda_opt_1 + lda_opt_neg1
+
+'''
+Evaluate loss of policy th under different bounds a_, b_ (corresponding to a different value of Gamma)
+PI_1 policy prob: return probability pi(x) = 1
+p_1: baseline probability (check in case handed a n x m array) 
+a_, b_: bounds corresponding to Gamma being assessed (e.g. Un)
+gamma: effective gamma bound (no-op for unbudgeted uncertainty set) 
+'''
+def Rbar_mt(th, x, t, y, POL_PROB_1, BASELINE_POL, a_bnd, b_bnd, fq, gamma = 0, WGHTS_ = opt_wrapper, **kwargs ):
+    pi_1 = np.asarray(POL_PROB_1(th, x, t)).flatten()
+    t_levels = np.unique(t)
+    p_1 = BASELINE_POL(pi_1, t)
+    if len(np.asarray(pi_1).shape) > 1:
+        p_1 = [ p_1[i,t[i]] for i in range(x.shape[0]) ]  # project n x m array to a vector based on observed teratment assignment
+    # old # [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter( np.multiply(y*t, pi_1 - p_1), a_, b_)
+    if not hasattr(gamma, "__len__"):
+        gammas = gamma * np.ones(len(t_levels))
+    else:
+        gammas = gamma
+    lda_opt_total = 0
+    for ind,t_l in enumerate(t_levels):
+        [lda_opt, wghts, wghts_sum] = WGHTS_(gammas[ind], np.multiply(y[t == t_l], pi_1[t == t_l] - p_1[t == t_l]), a_bnd[t == t_l], b_bnd[t == t_l], fq[t == t_l])
+        lda_opt_total += lda_opt
+    return lda_opt_total
+
+
+
+''' vanilla uncentered ipw
+extra args: PI_1, x (with intercept), y, t, fq,
+'''
+def vanilla_ipw(th,*args):
+    C = 0.05
+    PI_1 = args[0]; x_aug=args[1]; y=args[2]; t=args[3]; fq = args[4]; n = len(y)
+    fq_norm = fq/np.sum(fq)
+    pi_1 = PI_1(th, x_aug).flatten();
+    pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] )
+    return np.sum( y*pi_t/fq_norm ) + C*np.linalg.norm(th,2)**2
+''' vanilla uncentered ipw
+extra args: PI_1, x (with intercept), y, t, fq,
+'''
+def vanilla_ipw_subgrad(th,*args):
+    C = 0.05
+    PI_1 = args[0]; x_aug=args[1]; y=args[2]; t=args[3]; fq = args[4]; n = len(y)
+    fq_norm = fq/np.sum(fq)
+    pi_1 = PI_1(th, x_aug).flatten();
+    pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug)
+    pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] )
+    subgrad = (y/fq_norm).dot(pi_t_grad).T + 2*C*th
+    return subgrad
+
+def grad_descent_vanilla_ipw_mt(th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5):
+    n = x.shape[0];
+    assert all(len(arr) == n for arr in [x,t,fq,y])
+    # If last column is all ones, don't augment
+    if (x[:,-1] == np.ones(n)).all():
+        x_aug = x
+    else: # otherwise augment data
+        x_aug = np.hstack([x, np.ones([n,1])]);
+    risks = np.zeros(N_RNDS); THTS = [None]*N_RNDS; PSTARS = np.zeros([N_RNDS, n]); losses = [None]*N_RNDS; oosrisks = np.zeros(N_RNDS)
+    fq_norm = fq/np.sum(fq)
+    for k in range(N_RNDS) :
+        eta_t = eta_0 * 1.0/np.power((k+1)*1.0, 0.3);
+        pi_1 = POL_PROB_1(th, x_aug).flatten();
+        pi_t = np.asarray( [pi_1[i] if t[i] == 1 else 1- pi_1[i] for i in range(n)] )
+        loss = np.sum( y*pi_t/fq_norm  )
+        pi_1_grad = qk_dpi_dtheta(pi_1, th, x_aug)
+        pi_t_grad = np.asarray( [pi_1_grad[i] if t[i] == 1 else -pi_1_grad[i] for i in range(n)] )
+        subgrad = (y/fq_norm).dot(pi_t_grad).T
+        th = th - eta_t * subgrad
+        THTS[k] = th; losses[k] = loss
+    return [oosrisks, losses, THTS, PSTARS]
+
+# random restarts
+def opt_w_restarts_vanilla_ipw_mt(N_RST, th, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=0.5,**params):
+    ls = np.zeros(N_RST); ths = [None] *N_RST
+    iterator = log_progress(range(N_RST),every=1) if logging else range(N_RST)
+    for j in iterator:
+        # assume data has intercept
+        th_0 = np.random.randn(x.shape[1]);
+        [oosrisks, losses, THTS, PSTARS] = grad_descent_vanilla_ipw(th_0, N_RNDS, POL_PROB_1, x, t, fq, y, eta_0,logging=False,step_schedule=step_schedule)
+        ls[j]=losses[-1]; ths[j] = THTS[-1]
+        if logging:
+            plt.plot(range(N_RNDS), losses)
+            plt.pause(0.05)
+    return [ths[np.argmin(ls)], min(ls)] #return tht achieving min loss
+
+
+# https://github.com/funsim/moola/blob/master/moola/linesearch/dcsrch_fortran/linesearch.py
+# armijo linesearch
+class LineSearch:
+    """
+    A generic linesearch class. Most methods of this
+    class should be overridden by subclassing.
+    """
+
+    def __init__(self, **kwargs):
+        self._id = 'Generic Linesearch'
+        return
+
+    def _test(self, func, x, d, slope, f = None, t = 1.0, **kwargs):
+        """
+        Given a descent direction d for function func at the
+        current iterate x, see if the steplength t satisfies
+        a specific linesearch condition.
+        Must be overridden.
+        """
+        return True # Must override
+
+    def search(self, func, x, d, slope, f = None, **kwargs):
+        """
+        Given a descent direction d for function func at the
+        current iterate x, compute a steplength t such that
+        func(x + t * d) satisfies a linesearch condition
+        when compared to func(x). The value of the argument
+        slope should be the directional derivative of func in
+        the direction d: slope = f'(x;d) < 0. If given, f should
+        be the value of func(x). If not given, it will be evaluated.
+        func can point to a defined function or be a lambda function.
+        For example, in the univariate case::
+            test(lambda x: x**2, 2.0, -1, 4.0)
+        """
+        # return to default stepsize if not a descent dir (due to finite differences and nonconvex opt)
+        t = 1.0
+        if slope >= 0.0:
+            return t
+        while not self._test(func, x, d, f = f, t = t, **kwargs):
+            pass
+        return t
+
+
+class ArmijoLineSearch(LineSearch):
+    """
+    An Armijo linesearch with backtracking. This class implements the simple
+    Armijo test
+    f(x + t * d) <= f(x) + t * beta * f'(x;d)
+    where 0 < beta < 1/2 and f'(x;d) is the directional derivative of f in the
+    direction d. Note that f'(x;d) < 0 must be true.
+    :keywords:
+        :beta:      Value of beta (default 0.001)
+        :tfactor:   Amount by which to reduce the steplength
+                    during the backtracking (default 0.5).
+    """
+
+    def __init__(self, **kwargs):
+        LineSearch.__init__(self, **kwargs)
+        self.beta = max(min(kwargs.get('beta', 1.0e-4), 0.5), 1.0e-10)
+        self.tfactor = max(min(kwargs.get('tfactor', 0.1), 0.999), 1.0e-3)
+        self.default = kwargs.get('default')
+        return
+
+    def _test(self, func, x, d, slope, f = None, t = 1.0, **kwargs):
+        """
+        Given a descent direction d for function func at the
+        current iterate x, see if the steplength t satisfies
+        the Armijo linesearch condition.
+        """
+        if f is None:
+            f = func(x)
+        f_plus = func(x + t * d)
+        return (f_plus <= f + t * self.beta * slope)
+
+    def search(self, func, x, d, slope, f = None, **kwargs):
+        """
+        Given a descent direction d for function func at the
+        current iterate x, compute a steplength t such that
+        func(x + t * d) satisfies the Armijo linesearch condition
+        when compared to func(x). The value of the argument
+        slope should be the directional derivative of func in
+        the direction d: slope = f'(x;d) < 0. If given, f should
+        be the value of func(x). If not given, it will be evaluated.
+        func can point to a defined function or be a lambda function.
+        For example, in the univariate case:
+            `test(lambda x: x**2, 2.0, -1, 4.0)`
+        """
+        if f is None:
+            f = func(x)
+
+        t = 1.0
+        if slope >= 0.0:
+            return t # don't linesearch if not a descent direction
+        iterations = 0
+        while not self._test(func, x, d, slope, f = f, t = t, **kwargs):
+            t *= self.tfactor
+            # print iterations; sys.stdout.flush()
+            iterations += 1
+            if iterations >= 20:
+                return self.default
+                # raise ValueError("line search doesn't terminate")
+                # return None
+        return t
diff --git a/unconfoundedness_fns.py b/unconfoundedness_fns.py
new file mode 100644
index 0000000..c37854b
--- /dev/null
+++ b/unconfoundedness_fns.py
@@ -0,0 +1,676 @@
+"""
+@author: Angela Zhou
+"""
+import numpy as np
+from random import sample
+import math
+import cvxpy as cvx
+import matplotlib.pyplot as plt
+from scipy.optimize import minimize
+# from matplotlib import collections as matcoll
+from sklearn import svm
+from scipy.integrate import quad
+from sklearn import linear_model
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from scipy.stats import norm
+from scipy import integrate
+# from sympy import mpmath as mp
+
+
+
+
+'''
+Helper functions for estimating propensities
+'''
+def estimate_prop(x, T, predict_x, predict_T):
+    clf_dropped = LogisticRegression()
+    clf_dropped.fit(x, T)
+    est_prop = clf_dropped.predict_proba(predict_x)
+    est_Q = np.asarray( [est_prop[k,1] if predict_T[k] == 1 else est_prop[k,0] for k in range(len(predict_T))] )
+    return [est_Q, clf_dropped]
+
+def get_prop(clf, x, T):
+    est_prop = clf_dropped.predict_proba(x)
+    est_Q = np.asarray( [est_prop[k,1] if T[k] == 1 else est_prop[k,0] for k in range(len(T))] )
+    return est_Q
+
+
+
+# get indicator vector from signed vector
+def get_0_1_sgn(vec):
+    n = len(vec)
+    return np.asarray([1 if vec[i] == 1 else 0 for i in range(n) ]).flatten()
+# get signed vector from indicator vector
+def get_sgn_0_1(vec):
+    n = len(vec)
+    return np.asarray([1 if vec[i] == 1 else -1 for i in range(n) ]).flatten()
+'''
+performs policy match indicator function; returns 1 or 0
+input: signed treatments, signed policy assignment
+'''
+def pol_match(T_sgned, pol):
+    sgn_match = np.multiply(T_sgned, pol )
+    return get_0_1_sgn(sgn_match)
+
+
+
+
+
+''' faster sorting routines
+'''
+''' Get general weights by sorting for the uncentered, smoothed estimator
+'''
+def get_general_interval_wghts_algo_uncentered_smoothed(init_policy, **params):
+    T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x'];
+    n = params['n']; weights = np.zeros(n); a = params['a']; b = params['b']
+    T_sgned=get_sgn_0_1(T_obs)
+    probs_pi_T = params['pi_pol'](T_sgned, init_policy, x)
+    a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T)
+    # Sort all Y!
+    plus_inds = np.argsort(Y);
+    Y_plus_std = Y[plus_inds] # Sorted Y
+    n_plus = n; vals = np.zeros(n_plus+1)
+    prev_val = -np.inf; k=0; val = np.inf
+    ## !!!! You can speed up by binary search
+    while (val > prev_val) and (k < n_plus+1):
+        denom = 0; num = 0; val = prev_val
+#     for i in range(len(vals)):
+        num = sum(np.multiply(a_mod[plus_inds[0:k]], Y_plus_std[0:k])) + sum(np.multiply(b_mod[plus_inds[k:]], Y_plus_std[k:]))
+        denom = sum(a_mod[plus_inds[0:k]])+sum(b_mod[plus_inds[k:]])
+#         for k in range(n_plus):
+#             if k<i: # threshold on order statistics
+#                 num += a_mod[plus_inds][k]*Y_plus_std[k]
+#                 denom += a_mod[plus_inds][k]
+#             else:
+#                 num += b_mod[plus_inds][k]*Y_plus_std[k]
+#                 denom += b_mod[plus_inds][k]
+#         vals[i] = vals[i] / denom
+        val = num / denom; k+=1;
+    lda_opt = val; k_star = k
+#     lda_opt = np.max(vals); k_star = np.argmax(vals) #kstar follows python indexing
+    weights[plus_inds[0:k_star]] = a_mod[plus_inds[0:k_star]]
+    weights[plus_inds[k_star:]] = b_mod[plus_inds[k_star:]]
+    return [weights, sum(weights)]
+
+
+'''
+copypasted from ipynb
+'''
+# def get_optimal_interval_wghts_CVX(init_policy, **params):
+#     T_obs = params['T'].astype(int) ; Y = params['Y']; n = params['n'];
+#     x = params['x']; a = params['a']; b = params['b']
+#     W = cvx.Variable(n); t = cvx.Variable(); hinge = params['hinge']
+#     constraints = [ np.multiply(a, hinge) * t <= W, W <= np.multiply(b, hinge) * t  ]
+#     constraints += [ sum(W) == 1, t > 0, W >= 0 ]
+#     prim_obj =  (  Y.T * W ) #+ LAMBDA*(cvx.norm(opt_theta)+cvx.norm(opt_theta_0) + cvx.norm(opt_theta_1))
+#     obj = cvx.Maximize(prim_obj) # -1* minimize negative rewards = max rewards
+#     prob = cvx.Problem(obj, constraints) # since we add a convex regularizer
+#     prob.solve()
+#     return [ W.value, t.value, prob.value ]
+
+'''
+solves for optimal weights for the problem with \sum W_i (pi Y_i)
+returns w, unnormalized weights
+'''
+def get_general_interval_wghts_pol(init_policy, **params):
+    n = params['n']; weights = np.zeros(n)
+    T_sgned = np.asarray([ 1 if params['T'][i] == 1 else -1 for i in range(n)]).flatten()
+    policy_x = params['pi_pol'](T_sgned, init_policy, params['x']).flatten()
+    params['hinge'] = policy_x
+    [W, t, val] = get_optimal_interval_wghts_CVX(init_policy, **params) # W is normalized weights
+    weights = (W.flatten()/t).T
+    return [weights,t]
+
+'''
+evaluate loss function from parametric problem (uncentered), under probabilistic policy assumption
+#! weights are weights multiplied separately by probabilities
+compute the loss as weights^T * (\pi Y)
+'''
+def normalized_parametric_loss_all(pol_theta, *args):
+    params = dict(args[0]); x = params['x']; C = params['C']; sign = params['sign'];
+    n = params['n']; q = params['q']; Y = params['Y'];
+    T = params['T']; a = params['a']; b = params['b']; pi = params['pi_pol'];
+    T_sgned = np.asarray([ 1 if T[i] == 1 else -1 for i in range(n)]).flatten();
+    W = params['weights']; opt_t = params['opt_t']; d = len(pol_theta);
+    policy_x = pi(T_sgned, pol_theta, x).reshape([n,1]); #policy_x_prime = pi_prime(pol_theta, x)
+    # already accounted for probability
+    loss_val = np.dot(params['weights'].reshape([n,1]).T, Y)
+#     loss_val = params['weights'].reshape([n,1]).T * np.multiply(policy_x, Y)
+    return sign*( loss_val + C*np.linalg.norm(pol_theta,2))
+
+
+#! This is function returning Pr[ \pi(x) = T_sgned]
+def logistic_pol(T_sgned, theta, x):
+    n = len(T_sgned); theta = theta.flatten()
+    if len(theta) == 1:
+        pol_match = np.multiply(T_sgned, np.multiply(x, theta).flatten())
+    else:
+        pol_match = np.multiply(T_sgned, np.dot(x, theta).flatten())
+    LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -pol_match ))
+    return LOGIT_TERM_POS
+
+
+'''
+performs policy match indicator function; returns 1 or 0
+input: signed treatments, signed policy assignment
+'''
+def pol_match(T_sgned, pol):
+    sgn_match = np.multiply(T_sgned, pol )
+    return get_0_1_sgn(sgn_match)
+
+
+''' Get general weights by sorting for the uncentered, smoothed estimator
+'''
+def get_general_interval_wghts_algo_uncentered_smoothed(init_policy, **params):
+    T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x'];
+    n = params['n']; weights = np.zeros(n); a = params['a']; b = params['b']
+    T_sgned=get_sgn_0_1(T_obs)
+    probs_pi_T = params['pi_pol'](T_sgned, init_policy, x)
+    a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T)
+    # Sort all Y!
+    plus_inds = np.argsort(Y);
+    Y_plus_std = Y[plus_inds] # Sorted Y
+    n_plus = n; vals = np.zeros(n_plus+1)
+    prev_val = -np.inf; k=1; val = sum(np.multiply(b_mod[plus_inds], Y_plus_std)) /  sum(b_mod[plus_inds])
+    ## !!!! You can speed up by binary search
+    while (val > prev_val) and (k < n_plus+1):
+        denom = 0; num = 0; prev_val = val
+        num = sum(np.multiply(a_mod[plus_inds[0:k]], Y_plus_std[0:k])) + sum(np.multiply(b_mod[plus_inds[k:]], Y_plus_std[k:]))
+        denom = sum(a_mod[plus_inds[0:k]])+sum(b_mod[plus_inds[k:]])
+        val = num / denom; k+=1;
+    lda_opt = val; k_star = k-1
+    weights[plus_inds[0:k_star]] = a_mod[plus_inds[0:k_star]]
+    weights[plus_inds[k_star:]] = b_mod[plus_inds[k_star:]]
+    return [weights, sum(weights)]
+
+
+''' return Pr[ \pi(x)=T ]
+'''
+def logistic_pol_match_obs(T_sgned, theta, x):
+    n = len(T_sgned); pol_match = np.multiply(T_sgned, np.dot(x, theta).flatten())
+    LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -pol_match ))
+    return LOGIT_TERM_POS
+''' return Pr[ \pi(x)=1 ]
+'''
+def logistic_pol_asgn(theta, x):
+    theta = theta.flatten()
+    n = x.shape[0]
+    if len(theta) == 1:
+        logit = np.multiply(x, theta).flatten()
+    else:
+        logit = np.dot(x, theta).flatten()
+    LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -logit ))
+    return LOGIT_TERM_POS
+
+''' multinomial logistic: return all probabilities 
+We index theta as a d x K array 
+x is n x d
+output is n x K 
+'''
+def logistic_pol_asgn_mt(theta, x):
+    n=x.shape[0]; d = x.shape[1]
+    k = theta.shape[1]
+    logit = np.zeros([n,k]); LOGIT_TERM_POS = np.zeros([n,k])
+    if d == 1:
+        for a in range(k):
+            logit[:,a] = np.multiply(x, theta[:,a])
+    else:
+        for a in range(k):
+            logit[:,a] = np.dot(x, theta[:,a])
+    for i in range(n):
+        # compute probability of observed action
+        LOGIT_TERM_POS[i,:] = np.asarray([np.exp(logit[i,k_]) / ( np.sum( np.exp(logit[i,:]) ) ) for k_ in range(k) ]) # sum over classes
+    return LOGIT_TERM_POS # output is nxk
+
+''' multinomial logistic: return all probabilities 
+We index theta as a d x K array 
+x is n x d
+output is n x K 
+(overloaded arguments just in case) 
+'''
+def logistic_pol_asgn_mt(theta, x, t_test=0):
+    n=x.shape[0]; d = x.shape[1]
+    k = theta.shape[1]
+    logit = np.zeros([n,k]); LOGIT_TERM_POS = np.zeros([n,k])
+    if d == 1:
+        for a in range(k):
+            logit[:,a] = np.multiply(x, theta[:,a])
+    else:
+        for a in range(k):
+            logit[:,a] = np.dot(x, theta[:,a])
+    for i in range(n):
+        # compute probability of observed action
+        LOGIT_TERM_POS[i,:] = np.asarray([np.exp(logit[i,k_]) / ( np.sum( np.exp(logit[i,:]) ) ) for k_ in range(k) ]) # sum over classes
+        if np.isinf(np.exp(logit[i,k_])):
+            LOGIT_TERM_POS[i] = 1 # overflow fix
+    return LOGIT_TERM_POS # output is nxk
+
+
+''' multinomial logistic: return probability of assigning observed treatment
+We index theta as a d x K array 
+x is n x d 
+Output is filtered based on the observed treatment pattern 
+'''
+def logistic_pol_asgn_mt_obsA(theta, x, t01):
+    n = x.shape[0]
+    d = x.shape[1];
+    k = theta.shape[1] # n treatments
+    logit = np.zeros([n,k])
+    LOGIT_TERM_POS = np.zeros(n)
+    if d == 1:
+        for a in range(k):
+            logit[:,a] = np.multiply(x, theta[:,a])
+    else:
+        for a in range(k):
+            logit[:,a] = np.dot(x, theta[:,a])
+    # numerically stable version
+    amax = max(logit)
+    for i in range(n):
+        # compute probability of observed action
+        LOGIT_TERM_POS[i] = np.exp(logit[i,t01[i]]) / ( np.sum( np.exp(logit[i,:]) ) ) # sum over classes
+        if np.isinf(np.exp(logit[i,t01[i]])):
+            LOGIT_TERM_POS[i] = 1 # overflow fix
+        if np.isnan(LOGIT_TERM_POS[i]):
+            print 'num',np.exp(logit[i,t01[i]])
+            print 'denom',np.sum( np.exp(logit[i,:]) )
+    # if sum(np.isnan(LOGIT_TERM_POS))>0:
+    #     print 'nan theta',theta
+    #     return LOGIT_TERM_POS
+    return LOGIT_TERM_POS # output is nx1
+
+
+def find_opt_weights_short_val(a_,b_,Y):
+    [lda_opt, weights, s_wghts] = find_opt_weights_short(a_, b_, Y)
+    return lda_opt
+
+'''get odds ratio bounds on estimated propensities (est_Q) given sensitivity level Gamma
+'''
+def get_bnds(est_Q,LogGamma):
+    n = len(est_Q)
+    p_hi = np.multiply(np.exp(LogGamma), est_Q ) / (np.ones(n) - est_Q + np.multiply(np.exp(LogGamma), est_Q ))
+    p_lo = np.multiply(np.exp(-LogGamma), est_Q ) / (np.ones(n) - est_Q + np.multiply(np.exp(-LogGamma), est_Q ))
+    assert (p_lo < p_hi).all()
+    a_bnd = 1/p_hi;
+    b_bnd = 1/p_lo
+    return [ a_bnd, b_bnd ]
+
+
+
+# ''' Given the truncated list of weights, Y (unsorted) and
+# return Lambda (problem value), k
+# '''
+# def find_opt_weights_short(a_, b_, Y, sub_ind=[]):
+#     if len(sub_ind)>0:
+#         a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind]
+#     sort_inds = np.argsort(Y); a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds]
+#     n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1; val = sum(np.multiply(b_, Y)) /sum(b_) ## !!!! You can speed up by binary search
+#     while (val > prev_val) and (k < n_plus+1):
+#         denom = 0; num = 0; prev_val = val; num = 1.0*sum(np.multiply(a_[0:k], Y[0:k])) + sum(np.multiply(b_[k:], Y[k:]))
+#         denom = sum(a_[0:k])+sum(b_[k:]); val = num / denom; k+=1;
+#     lda_opt = prev_val; k_star = k-1
+#     sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:]
+#     weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:]
+#
+#     return [lda_opt, weights, sum(weights)]
+
+
+
+''' Given  Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind,
+return Lambda (min problem value), weights, sum(weights)
+'''
+def find_opt_weights_short(Y, a_, b_, sub_ind=[]):
+    if len(sub_ind)>0:
+        print sub_ind
+        a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind]
+    sort_inds = np.lexsort((a_,Y)); a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds]
+    n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1; val = sum(np.multiply(b_, Y)) /sum(b_)
+    while (val > prev_val) and (k < n_plus+1):
+        denom = 0; num = 0; prev_val = val; num = 1.0*sum(np.multiply(a_[0:k], Y[0:k])) + sum(np.multiply(b_[k:], Y[k:]))
+        denom = sum(a_[0:k])+sum(b_[k:]); val = num / denom; k+=1;
+    lda_opt = prev_val; k_star = k-1
+    sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:]
+    weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:]
+    return [lda_opt, weights, sum(weights)]
+
+''' explicit: include plots of all values
+'''
+def find_opt_weights_plot(Y,a_,b_,sub_ind=[], lexsort = False):
+    if len(sub_ind)>0:
+        print sub_ind
+        a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind]
+    if lexsort:
+        sort_inds = np.lexsort((a_,Y))
+    else:
+        sort_inds = np.argsort(Y);
+    a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds]
+    n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1;
+    val = np.sum(np.multiply(b_, Y)) /np.sum(b_)
+    vals = [ (1.0*np.sum(np.multiply(a_[0:k], Y[0:k])) + np.sum(np.multiply(b_[k:], Y[k:])))/(np.sum(a_[0:k])+np.sum(b_[k:])) for k in range(n_plus) ]
+    lda_opt = np.max(vals); k_star = np.argmax(vals)-1
+    plt.figure()
+    plt.plot(vals)
+    plt.figure()
+    plt.plot(np.diff(vals))
+    sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:]
+    weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:]
+    return [lda_opt, weights, sum(weights)]
+
+
+def rnd_k_val(k_,a_,b_,Y):
+    k= int(np.floor(k_)) # floor or round?
+    return (1.0*np.sum(np.multiply(a_[0:k], Y[0:k])) + np.sum(np.multiply(b_[k:], Y[k:])))/(np.sum(a_[0:k])+np.sum(b_[k:]))
+
+''' Given  Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind,
+use ternary search to find the optimal value.
+Possibly off by one error but it shouldn't matter.
+return Lambda (min problem value), weights, sum(weights)
+'''
+def find_opt_weights_shorter(Y, a_, b_, sub_ind=[]):
+    if len(sub_ind)>0:
+        a_=a_[sub_ind]; b_=b_[sub_ind]; Y = Y[sub_ind]
+    # sort_inds = np.argsort(Y);
+    sort_inds = np.lexsort((b_-a_,Y));
+    a_=a_[sort_inds]; Y = Y[sort_inds]; b_=b_[sort_inds]
+    n_plus = len(Y); weights = np.zeros(n_plus); prev_val = -np.inf; k=1; val = np.sum(np.multiply(b_, Y)) /sum(b_)
+    left = 0; right = n_plus-1;keepgoing=True
+    while keepgoing:
+        #left and right are the current bounds; the maximum is between them
+#         print (left,right)
+#         print (rnd_k_val(leftThird,a_,b_,Y), rnd_k_val(rightThird,a_,b_,Y))
+        if abs(right - left) < 2.1: # separation in index space
+            k = np.floor((left + right)/2)
+            keepgoing=False
+        leftThird = left + (right - left)/3
+        rightThird = right - (right - left)/3
+        if rnd_k_val(leftThird,a_,b_,Y) < rnd_k_val(rightThird,a_,b_,Y):
+            left = leftThird
+        else:
+            right = rightThird
+    k_star=int(k); k=int(k)
+    lda_opt = (1.0*np.sum(np.multiply(a_[0:k], Y[0:k])) + np.sum(np.multiply(b_[k:], Y[k:])))/(np.sum(a_[0:k])+np.sum(b_[k:]))
+    sort_inds_a = sort_inds[0:k_star]; sort_inds_b = sort_inds[k_star:]
+    weights[sort_inds_a] = a_[0:k_star]; weights[sort_inds_b] = b_[k_star:]
+    return [lda_opt, weights, np.sum(weights)]
+
+'''Given  Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind,
+Lambda (min problem value), weights, sum(weights)
+'''
+def find_opt_robust_ipw_val(Y, a_, b_,shorter=False):
+    if shorter:
+        [lda_opt, weights, s_wghts] = find_opt_weights_shorter(Y, a_, b_)
+    else:
+        [lda_opt, weights, s_wghts] = find_opt_weights_short(Y, a_, b_)
+    return lda_opt
+
+'''Given  Y (unsorted), lower bound 'a_', upper bound 'b_' on weights, and possible index list sub_ind,
+Lambda (max problem value), weights, sum(weights)
+'''
+def find_opt_robust_ipw_val_min(Y, a_,b_,shorter=False):
+    if shorter:
+        [lda_opt, weights, s_wghts] = find_opt_weights_shorter(-Y, a_, b_)
+    else:
+        [lda_opt, weights, s_wghts] = find_opt_weights_short(-Y, a_, b_)
+    return -lda_opt
+
+
+''' functions for computing TV divergence
+'''
+
+''' get optimal weights with TV constraint
+'''
+import gurobipy as gp
+
+''' minimize wrt TV bound
+'''
+def get_general_interval_wghts_algo_uncentered_smoothed_f_divergence_TV(incumbent_pol, quiet=True, **params):
+    T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x'];
+    n = params['n']; a = params['a']; b = params['b']
+    gamma = params['gamma']; wm = 1/params['q']
+    if params['subind'] == True:
+        subinds = params['subinds']
+        Y = Y[subinds]; n = len(subinds); T_obs = T_obs[subinds]; x = x[subinds]; a= a[subinds]; b= b[subinds]; wm = wm[subinds]
+    # assume estimated propensities are probs of observing T_i
+    y = Y; weights = np.zeros(n)
+     # nominal propensities
+    # smoothing probabilities
+    T_sgned=get_sgn_0_1(T_obs)
+    probs_pi_T = params['pi_pol'](T_sgned, incumbent_pol, x)
+    a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T)
+    wm = np.multiply(wm, probs_pi_T)
+    m = gp.Model()
+    if quiet: m.setParam("OutputFlag", 0)
+    t = m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS)
+    w = [m.addVar(obj = -yy, lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y]
+    d = [m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y]
+    m.update()
+    m.addConstr(gp.quicksum(w)==1)
+    m.addConstr(gp.quicksum(d)<=gamma*t)
+    for i in range(len(y)):
+        m.addConstr(w[i] <= b_mod[i] * t)
+        m.addConstr(w[i] >= a_mod[i] * t)
+        m.addConstr(d[i] >=   w[i] - t*wm[i])
+        m.addConstr(d[i] >= - w[i] + t*wm[i])
+    m.optimize()
+    return -m.ObjVal
+
+''' maximize wrt TV bound
+'''
+def get_general_interval_wghts_algo_uncentered_smoothed_f_divergence_TV_max(incumbent_pol, quiet=True, **params):
+    T_obs = params['T'].astype(int) ; Y = params['Y'].flatten(); n = params['n']; x = params['x'];
+    n = params['n']; weights = np.zeros(n); a = params['a']; b = params['b']
+    gamma = params['gamma']
+    # assume estimated propensities are probs of observing T_i
+    y = -Y
+    wm = 1/params['q'] # nominal propensities
+    # smoothing probabilities
+    T_sgned=get_sgn_0_1(T_obs)
+    probs_pi_T = params['pi_pol'](T_sgned, incumbent_pol, x)
+    a_mod = np.multiply(a, probs_pi_T); b_mod = np.multiply(b, probs_pi_T)
+    wm = np.multiply(wm, probs_pi_T)
+    m = gp.Model()
+    if quiet: m.setParam("OutputFlag", 0)
+    t = m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS)
+    w = [m.addVar(obj = -yy, lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y]
+    d = [m.addVar(lb = 0., ub = gp.GRB.INFINITY, vtype=gp.GRB.CONTINUOUS) for yy in y]
+    m.update()
+    m.addConstr(gp.quicksum(w)==1)
+    m.addConstr(gp.quicksum(d)<=gamma*t)
+    for i in range(len(y)):
+        m.addConstr(w[i] <= b_mod[i] * t)
+        m.addConstr(w[i] >= a_mod[i] * t)
+        m.addConstr(d[i] >=   w[i] - t*wm[i])
+        m.addConstr(d[i] >= - w[i] + t*wm[i])
+    m.optimize()
+    return m.ObjVal
+
+def logistic_pol_asgn(theta, x):
+    n = x.shape[0]
+    theta = theta.flatten()
+    if len(theta) == 1:
+        logit = np.multiply(x, theta).flatten()
+    else:
+        logit = np.dot(x, theta).flatten()
+    LOGIT_TERM_POS = np.ones(n) / ( np.ones(n) + np.exp( -logit ))
+    return LOGIT_TERM_POS
+
+####
+''' Getting subgradients for the robust value function
+'''
+
+'''
+read in callbacks for derivative of pi given theta and optimal w, t
+PI_1, POL_GRAD (returns (p x 1) vector)
+take in ** normalized weights W **
+'''
+def get_implicit_grad_centered(pol_theta, PI_1, POL_GRAD, x, Y, t01, W):
+    # if need to get active index set
+    # rescaled weights in original
+    n = len(W); T_sgned = get_sgn_0_1(t01)
+
+    # dc_dpi = np.diag(Y*T_sgned)
+    # policy_x = PI_1(pol_theta, x) # 1 x n
+    # dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p
+    # return dc_dpi.dot(dpi_dtheta).T.dot(W)
+    constants = np.multiply(Y, np.multiply(T_sgned,W))
+    policy_x = PI_1(pol_theta, x) # 1 x n
+    dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p
+    if x.ndim > 1:
+        return np.multiply(constants[:,np.newaxis], dpi_dtheta).sum(axis=0)
+    else:
+        return np.sum(np.multiply(constants,dpi_dtheta))
+# # Minibatch version:
+#     CHUNKN = 20
+#     id_batches = np.array_split(range(n),CHUNKN) # list of arrays of indices
+#     if x.ndim > 1:
+#         grad = np.zeros([x.shape[1], 1])
+#     else:
+#         grad = 0
+#     for batch in id_batches:
+#         constants = Y[batch]*T_sgned[batch]*W[batch]
+#         policy_x = PI_1(pol_theta, x[batch,]) # 1 x n
+#         dpi_dtheta = POL_GRAD(policy_x, pol_theta, x[batch,]) # n x p
+#         grad += np.sum(np.multiply(constants[:,np.newaxis], dpi_dtheta)) # should be in px1
+#     return grad # no regularization
+'''
+read in callbacks for derivative of pi given theta and optimal w, t
+Y(Pi) - Y(-Pi)
+PI_1, POL_GRAD (returns (p x 1) vector)
+take in ** normalized weights W **
+
+'''
+def get_implicit_grad_centered_anti_pi(pol_theta, PI_1, POL_GRAD, x, Y, t01, W):
+    n = len(W); T_sgned = get_sgn_0_1(t01)
+    dc_dpi = np.diag(2*Y*T_sgned)
+    policy_x = PI_1(pol_theta, x) # 1 x n
+    dpi_dtheta = POL_GRAD(policy_x, pol_theta, x) # n x p
+    return dc_dpi.dot(dpi_dtheta).dot(W)
+
+
+
+
+'''
+find value of centered estimator, evaluated against a benchmark policy which assigns
+Pi(x) = 1 w.p p_1 for all x
+'''
+def centered_around_p1(a_bnd, b_bnd, Y_T, pi_1, p_1):
+    return find_opt_robust_ipw_val(np.multiply(Y_T, (pi_1 - p_1)), a_bnd, b_bnd, shorter=True)
+
+def plot_W_GDS(p_ths, W_GDs):
+    plot(p_ths, W_GDs[:,0])
+    for i in range(len(p_ths)):
+        plot([p_ths[i]-0.5, p_ths[i]+0.5], [W_GDs[i,0]-W_GDs[i,1]*0.5, W_GDs[i,0]+W_GDs[i,1]*0.5], c='b',alpha=0.1)
+
+''' test gradient fn for th, given vector of assignments p_1
+'''
+def test_subgrad_for_th(p_th, p_1, PI_1, POL_GRAD, x, y, t01):
+    n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01);
+    [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd)
+    grad = get_implicit_grad_centered(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum())
+    return [lda_opt,grad]
+
+''' test gradient fn for th, regret against the anti-policy -Pi
+'''
+def test_subgrad_for_anti(p_th, p_1, PI_1, POL_GRAD, x, y, t01):
+    n = x.shape[0]; pi_1 = PI_1(np.asarray([p_th]), x).flatten(); t=get_sgn_0_1(t01);
+    [lda_opt, wghts, wghts_sum] = find_opt_weights_shorter(np.multiply(y*t, pi_1 - p_1), a_bnd, b_bnd)
+    grad = get_implicit_grad_centered_anti_pi(p_th, PI_1, POL_GRAD, x, y, t01, wghts/wghts.sum())
+    return [lda_opt,grad]
+
+########### Data generation tools ########################################################################
+"""
+"""
+
+
+    # generate propensity model
+def real_prop(x, beta_prop):
+    n = x.shape[0]; d = x.shape[1]
+    prop = np.zeros(n)
+    for i in range(n):
+        prop[i] = np.exp(np.dot(beta_prop[0:d], x[i,:]) + beta_prop[-1] )/ (1 + np.exp(np.dot(beta_prop[0:d], x[i,:]) + beta_prop[-1] ))
+    return prop
+'''
+requires specifying globals
+HIGH_FREQ_N = 5
+FREQ = 20
+'''
+def generate_data_nd(mu_x, sigma_x_mat, n, beta_cons, beta_x, beta_x_T, TRUE_PROP_BETA, beta_x_high_freq):
+#     x = np.random.normal(mu_x, sigma_x, size = n)
+    # generate n datapoints from the same multivariate normal distribution
+    x = np.random.multivariate_normal(mean = mu_x, cov= sigma_x_mat, size = n )
+    true_Q = real_prop(x, TRUE_PROP_BETA)
+    T = np.array(np.random.uniform(size=n) < true_Q).astype(int).flatten()
+    T = T.reshape([n,1])
+
+    clf = LogisticRegression(); clf.fit(x, T)
+    propensities = clf.predict_proba(x)
+    print clf.coef_
+    T_sgned = np.asarray([ 1 if T[i] == 1 else -1 for i in range(n)]).flatten()
+
+    y_sigma = 0.5
+    nominal_propensities_pos = propensities[:,1]; nominal_propensities_null = propensities[:,0]
+    Q = np.asarray( [nominal_propensities_pos[i] if T[i] == 1 else nominal_propensities_null[i] for i in range(n)] )
+    q = Q
+    Y = np.zeros(n)
+    for i in range(n):
+        Y[i] = T[i]*beta_cons + np.dot(beta_x.T, x[i,:]) + np.dot(beta_x_T.T, x[i,:]*T[i]) + np.dot(beta_x_high_freq.T, np.sin(x[i,0:HIGH_FREQ_N]*FREQ)*T[i])
+
+    white_noise_coef = 0.2
+    # add random noise
+    Y += np.random.multivariate_normal(mean = np.zeros(n), cov=white_noise_coef * np.eye(n))
+    Y_OFFSET = np.abs(np.min(Y))
+    Y = Y + Y_OFFSET
+    T = T.flatten()
+#     px = x.reshape([n,p]); x_augmented = np.hstack([x, np.ones([n,1])])
+    T_colors = [ 'r' if T[i] == 0 else 'b' for i in range(n)]
+#     x_poly = np.hstack([x, x**2, x**3, np.ones([n,1])])
+#     x_u_poly = np.hstack([x_poly[:,0:3], u.reshape([n,1])])
+    return [x, T, Y, true_Q, clf, T_colors, Y_OFFSET]
+
+########################################################################################
+
+### Scale continuous
+def scale_continuous(train_dict,test_dict):
+    continuous = np.asarray([len(np.unique(train_dict['X'][:,j])) for j in range(train_dict['X'].shape[1])]) > 10
+    print np.where(continuous)[0]
+    def scale_columns(x_train_col, x_test_col):
+        mn = np.mean(x_train_col); sd = np.std(x_train_col)
+        x_train_col = (x_train_col*1.0 - mn*1.0)/sd
+        x_test_col = (x_test_col*1.0 - mn)*1.0/sd
+        return [x_train_col, x_test_col]
+
+    for ind in np.where(continuous)[0]:
+        [x_train_col, x_test_col] = scale_columns(train_dict['X'][:,ind], test_dict['X'][:,ind])
+        train_dict['X'][:,ind] = x_train_col
+        test_dict['X'][:,ind] = x_test_col
+        return [train_dict, test_dict]
+
+
+''' add new versions of items to data dict '''
+def subsample_traindict(train_dict,TEST_FRAC):
+    train_ind, test_ind = train_test_split(range(len(train_dict['Y'])), test_size=TEST_FRAC)
+    dictlabels = ['T', 'Y', 'Yhf', 'prop_T' ]
+    train_dict['X'] = train_dict['X'][train_ind,:]
+    for ind,key in enumerate(dictlabels):
+        train_dict[key] = train_dict[key][train_ind];
+    # x_, x_test, t_sgned_, t_sgned_test, y_, y_test, yhf_, yhftest_, nominal_Q_, nominal_Q_test, train_ind, test_ind \
+    # = train_test_split(train_dict['X'], train_dict['T'], train_dict['Y'],train_dict['Yhf'], train_dict['prop_T'], range(len(train_dict['Y'])), test_size=TEST_FRAC)
+    # new_list = [x_, t_sgned_, y_,yhf_, nominal_Q_, train_ind]
+    # dictlabels = [ 'X', 'T', 'Y', 'Yhf', 'q0' ] # this ends up permuting the estimated propensities
+    # for ind,key in enumerate(dictlabels):
+    #     train_dict[key] = new_list[ind];
+    return [train_dict, train_ind]
+
+def scale_columns(x_train_col, x_test_col):
+    mn = np.mean(x_train_col); sd = np.std(x_train_col)
+    x_train_col = (x_train_col*1.0 - mn*1.0)/sd
+    x_test_col = (x_test_col*1.0 - mn)*1.0/sd
+    return [x_train_col, x_test_col]
+
+def scale_dicts(train_dict, test_dict):
+    continuous = np.asarray([len(np.unique(train_dict['X'][:,j])) for j in range(train_dict['X'].shape[1])]) > 10
+    for ind in np.where(continuous)[0]:
+        [x_train_col, x_test_col] = scale_columns(train_dict['X'][:,ind], test_dict['X'][:,ind])
+        train_dict['X'][:,ind] = x_train_col
+        test_dict['X'][:,ind] = x_test_col
+    return [train_dict, test_dict]