diff --git a/apps/CardinalBanditsPureExploration/algs/EpsilonGreedy.py b/apps/CardinalBanditsPureExploration/algs/EpsilonGreedy.py new file mode 100644 index 00000000..e87f75d4 --- /dev/null +++ b/apps/CardinalBanditsPureExploration/algs/EpsilonGreedy.py @@ -0,0 +1,113 @@ +""" +LilUCB app implements CardinalBanditsPureExplorationPrototype +author: Kevin Jamieson +last updated: 11/13/2015 +""" + +import numpy +import numpy.random +import next.utils as utils + +class MyAlg: + + def initExp(self,butler,n,R,failure_probability,params={}): + butler.algorithms.set(key='n', value=n) + butler.algorithms.set(key='delta', value=failure_probability) + butler.algorithms.set(key='R', value=R) + butler.algorithms.set(key='pT', value=0) + + empty_list = numpy.zeros(n).tolist() + butler.algorithms.set(key='Xsum',value=empty_list) + butler.algorithms.set(key='X2sum',value=empty_list) + butler.algorithms.set(key='T',value=empty_list) + + priority_list = numpy.random.permutation(n).tolist() + butler.algorithms.set(key='priority_list',value=priority_list) + + return True + + + def getQuery(self,butler,participant_uid): + + # Get Query Indicies + kv_dict = butler.algorithms.increment_many(key_value_dict={'priority_list':0,'priority_list_cnt':1}) + priority_list = kv_dict['priority_list'] # List of queries? + priority_list_cnt = kv_dict['priority_list_cnt'] # Length of queries? + + # EpsilonGreedy Implementation + key_value_dict = butler.algorithms.get() + t = key_value_dict['pT'] + pt = 1 - (1 / (t + 1)) + if numpy.random.rand() < pt: + index = priority_list[0] + index = numpy.random.choice(priority_list) + + # Update t for next run + newT = t + 1 + butler.algorithms.set(key='pt', value=newT) + + return index + + def processAnswer(self,butler,target_id,target_reward): + butler.algorithms.append(key='S',value=(target_id,target_reward)) + + if numpy.random.rand()<.1: # occurs about 1/10 of trials + butler.job('update_priority_list', {},time_limit=5) + + return True + + def getModel(self,butler): + key_value_dict = butler.algorithms.get() + R = key_value_dict['R'] + n = key_value_dict['n'] + sumX = key_value_dict['Xsum'] + sumX2 = key_value_dict['X2sum'] + T = key_value_dict['T'] + + mu = numpy.zeros(n) + prec = numpy.zeros(n) + for i in range(n): + if T[i]==0 or mu[i]==float('inf'): + mu[i] = -1 + prec[i] = -1 + elif T[i]==1: + mu[i] = float(sumX[i]) / T[i] + prec[i] = R + else: + mu[i] = float(sumX[i]) / T[i] + prec[i] = numpy.sqrt( float( max(1.,sumX2[i] - T[i]*mu[i]*mu[i]) ) / ( T[i] - 1. ) / T[i] ) + + return mu.tolist(),prec.tolist(), T + + def update_priority_list(self,butler,args): + S = butler.algorithms.get_and_delete(key='S') + + if S!=None: + doc = butler.algorithms.get() + + R = doc['R'] + delta = doc['delta'] + n = doc['n'] + Xsum = doc['Xsum'] + X2sum = doc['X2sum'] + T = doc['T'] + + for q in S: + Xsum[q[0]] += q[1] + X2sum[q[0]] += q[1]*q[1] + T[q[0]] += 1 + + mu = numpy.zeros(n) + UCB = numpy.zeros(n) + for i in range(n): + if T[i]==0: + mu[i] = float('inf') + UCB[i] = float('inf') + else: + mu[i] = Xsum[i] / T[i] + UCB[i] = mu[i] + numpy.sqrt( 2.0*R*R*numpy.log( 4*T[i]*T[i]/delta ) / T[i] ) + + # sort by -UCB first then break ties randomly + priority_list = numpy.lexsort((numpy.random.randn(n), -mu)).tolist() + + butler.algorithms.set_many(key_value_dict={'priority_list':priority_list,'priority_list_cnt':0,'Xsum':Xsum,'X2sum':X2sum,'T':T}) diff --git a/apps/CardinalBanditsPureExploration/myApp.yaml b/apps/CardinalBanditsPureExploration/myApp.yaml index 4cb1c362..f8f0ba01 100644 --- a/apps/CardinalBanditsPureExploration/myApp.yaml +++ b/apps/CardinalBanditsPureExploration/myApp.yaml @@ -6,7 +6,7 @@ initExp: args: values: failure_probability: - description: The maximum probability of an algorithm failing to meet its requirements (e.g. correctness) + description: The maximum probability of an algorithm failing to meet its requirements (e.g. correctness) type: num rating_scale: description: A set of ratings that are presented to the user on the query page. @@ -30,7 +30,7 @@ initExp: values: alg_id: description: Supported algorithm types for DuelingBanditsPureExploration. - values: [RoundRobin,LilUCB, KLUCB] + values: [RoundRobin,LilUCB, KLUCB, EpsilonGreedy] instructions: default: Please select, using your mouse or arrow keys, the better rating. @@ -42,17 +42,24 @@ initExp: default: 0.5 optional: true + pT: + description: tracks time of EpsilonGreedy alg. + type: num + default: 0 + optional: true + context: description: Specifies the context. In the query page, this appears above the potential ratings. type: str optional: true + context_type: description: Specifies the type of the context. If the type is not text a url to the resource should be provided. type: str values: [image, text, video] optional: true - + processAnswer: args: args: @@ -60,4 +67,3 @@ processAnswer: target_reward: description: The reward of the target. If being used through the query page, this should align with the rating scale specified in initExp. Otherwise can be any numerical value. type: num -