-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun_experiments.py
80 lines (63 loc) · 2.9 KB
/
run_experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import matplotlib.pyplot as plt
from algorithms.full_bandit_exp3_inv import FullBanditExp3Inv
from algorithms.full_bandit_exp3_inv import FullBanditNewTuning
from algorithms.real_lin_exp3 import RealLinExp3
from algorithms.semi_bandit_ftrl_inv import SemiBanditFTRLInv
from algorithms.one_per_context_sp import OnePerContextSP
from algorithms.shortest_path import ShortestPath
from algorithms.one_per_context_bubeck import OnePerContextBubeck
from algorithms.bubeck import Bubeck
from distributions.distribution_by_sequence import DistributionBySequence
from distributions.distribution import Distribution
from distributions.sequence import Sequence
from algorithms.semi_bandit_exp3 import SemiBanditExp3
from algorithms.full_bandit_exp3 import FullBanditExp3
from algorithms.semi_bandit_ftrl import SemiBanditFTRL
from algorithms.uniform_random import UniformRandom
from algorithms.non_contextual_exp3 import NonContextualExp3
from algorithms.one_per_context import OnePerContext
from algorithms.one_per_context import OnePerContextCorrect
from experiment_manager.experiment_manager import ExperimentManager
from distributions.actionsets.msets import MSets
from distributions.contexts.binary_context import BinaryContext
from distributions.thetas.single_hole import SingleHole
from distributions.thetas.independent_bernoulli import IndependentBernoulli
import multiprocessing as mp
def get_dist(rng, d, K, m):
p = np.zeros((d, K)) + 0.5
for i in range(d):
placed_already = []
while len(placed_already) < m:
index = rng.integers(K)
if index not in placed_already:
placed_already.append(index)
p[i, index] -= 0.3
return IndependentBernoulli(d, K, p)
if __name__ == "__main__":
rng = np.random.default_rng(0)
exp_manager = ExperimentManager()
#algos = [UniformRandom(), OnePerContext(), NonContextualExp3(), RealLinExp3(), SemiBanditFTRLInv(), FullBanditExp3Inv()]
algos = [UniformRandom(), OnePerContextBubeck(), Bubeck(), RealLinExp3(), SemiBanditFTRLInv(), FullBanditExp3Inv()]
#algos = [Bubeck()]
print(algos[0].__class__)
algos.reverse()
lenghts = [100000]
distributions = []
for d, number_of_ones in [(4, 1), (5, 2), (6, 3)]:
for K, m in [(3, 1), (4, 2), (5, 3)]:
actionset = MSets(K, m)
distributions.append(Distribution(BinaryContext(d, number_of_ones), get_dist(rng, d, K, m), actionset))
override_constants = [{}, {
"gamma": 1/np.sqrt(100000),
"eta": 1/np.sqrt(100000),
}]
# for gamma in [0.1, 0.25]:
# for eta in [1e-4, 1e-5]:
# override_constants.append({
# "gamma": gamma,
# "eta": eta
# })
exp_manager.create_output_dir(25, lenghts, distributions)
# data = exp_manager.run_on_existing(algos, override_constants, 1)
data = exp_manager.run_on_existing(algos, override_constants, 5)