-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment.py
37 lines (28 loc) · 881 Bytes
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from typing import Protocol, Tuple
import numpy as np
class Environment(Protocol):
def pull(self, arm: int) -> float:
"""
Draw the reward from a given arm.
:param arm: an arm index
:return: a realised reward
"""
...
def optimal_arm(self) -> Tuple[int, float]:
"""
Return an arm with the largest population mean,
breaking ties by choosing the smallest index.
:return: a tuple of arm index and its expected mean
"""
...
def suboptimality_gap(self, arm: int) -> float:
"""
Return the difference between the most optimal arm's mean and the given one.
:param arm: a given arm index
:return: an absolute difference in means
"""
...
def num_arms(self) -> int:
...
def means(self) -> np.ndarray:
...