From 10fa07ce254fdc5c7a668f114db81fd489eb3f1c Mon Sep 17 00:00:00 2001 From: Xingyou Song Date: Mon, 18 Nov 2024 09:25:42 -0800 Subject: [PATCH] Change MAB experimenters to use more general `arm_to_rewards` dictionary. PiperOrigin-RevId: 697654482 --- .../normalizing_experimenter_test.py | 2 +- .../experimenters/synthetic/multiarm.py | 47 +++++++------------ 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/vizier/_src/benchmarks/experimenters/normalizing_experimenter_test.py b/vizier/_src/benchmarks/experimenters/normalizing_experimenter_test.py index 66ae2e45a..4d8ab7457 100644 --- a/vizier/_src/benchmarks/experimenters/normalizing_experimenter_test.py +++ b/vizier/_src/benchmarks/experimenters/normalizing_experimenter_test.py @@ -75,7 +75,7 @@ def testNormalizationApply(self, func): def test_NormalizingCategoricals(self): mab_exptr = multiarm.FixedMultiArmExperimenter( - rewards=[-1e6, 0.0, 1e6], arms_as_chars=False + arms_to_rewards={'0': -1e6, '1': 0.0, '2': 1e6} ) norm_exptr = normalizing_experimenter.NormalizingExperimenter(mab_exptr) metric_name = norm_exptr.problem_statement().metric_information.item().name diff --git a/vizier/_src/benchmarks/experimenters/synthetic/multiarm.py b/vizier/_src/benchmarks/experimenters/synthetic/multiarm.py index 88ea5d825..6a015de42 100644 --- a/vizier/_src/benchmarks/experimenters/synthetic/multiarm.py +++ b/vizier/_src/benchmarks/experimenters/synthetic/multiarm.py @@ -20,32 +20,20 @@ distributions. """ -import copy -from typing import Optional, Sequence +from typing import Mapping, Optional, Sequence import numpy as np from vizier import pyvizier as vz from vizier._src.benchmarks.experimenters import experimenter -def _default_multiarm_problem( - num_arms: int, arms_as_chars: bool -) -> vz.ProblemStatement: +def _default_multiarm_problem(arms: Sequence[str]) -> vz.ProblemStatement: """Returns default multi-arm problem statement.""" problem = vz.ProblemStatement() problem.metric_information.append( vz.MetricInformation(name="reward", goal=vz.ObjectiveMetricGoal.MAXIMIZE) ) - - if arms_as_chars: - # Starts with 'a' character. - feasible_values = [chr(i + 97) for i in range(num_arms)] - else: - feasible_values = [str(i) for i in range(num_arms)] - - problem.search_space.root.add_categorical_param( - name="arm", feasible_values=feasible_values - ) + problem.search_space.root.add_categorical_param("arm", feasible_values=arms) return problem @@ -54,23 +42,24 @@ class BernoulliMultiArmExperimenter(experimenter.Experimenter): def __init__( self, - probs: Sequence[float], - arms_as_chars: bool = True, + arms_to_probs: Mapping[str, float], seed: Optional[int] = None, ): - self._probs = probs + if sum(arms_to_probs.values()) != 1.0: + raise ValueError( + "Sum of probabilities must be 1, got %s" % sum(arms_to_probs.values()) + ) + self._arms_to_probs = arms_to_probs self._rng = np.random.RandomState(seed) - self._problem = _default_multiarm_problem(len(self._probs), arms_as_chars) def problem_statement(self) -> vz.ProblemStatement: - return copy.deepcopy(self._problem) + return _default_multiarm_problem(list(self._arms_to_probs.keys())) def evaluate(self, suggestions: Sequence[vz.Trial]) -> None: """Each arm has a fixed probability of outputting 0 or 1 reward.""" - feasibles = self._problem.search_space.parameters[0].feasible_values for suggestion in suggestions: - arm_index = feasibles.index(suggestion.parameters["arm"].value) - prob = self._probs[arm_index] + arm = suggestion.parameters["arm"].value + prob = self._arms_to_probs[arm] reward = self._rng.choice([0, 1], p=[1 - prob, prob]) suggestion.final_measurement = vz.Measurement(metrics={"reward": reward}) @@ -78,16 +67,14 @@ def evaluate(self, suggestions: Sequence[vz.Trial]) -> None: class FixedMultiArmExperimenter(experimenter.Experimenter): """Rewards are deterministic.""" - def __init__(self, rewards: Sequence[float], arms_as_chars: bool = True): - self._rewards = rewards - self._problem = _default_multiarm_problem(len(self._rewards), arms_as_chars) + def __init__(self, arms_to_rewards: Mapping[str, float]): + self._arms_to_rewards = arms_to_rewards def problem_statement(self) -> vz.ProblemStatement: - return copy.deepcopy(self._problem) + return _default_multiarm_problem(list(self._arms_to_rewards.keys())) def evaluate(self, suggestions: Sequence[vz.Trial]) -> None: - feasibles = self._problem.search_space.parameters[0].feasible_values for suggestion in suggestions: - arm_index = feasibles.index(suggestion.parameters["arm"].value) - reward = self._rewards[arm_index] + arm = suggestion.parameters["arm"].value + reward = self._arms_to_rewards[arm] suggestion.final_measurement = vz.Measurement(metrics={"reward": reward})