include pb2

automl · Sep 3, 2024 · ffdefd7 · ffdefd7
1 parent ec6fb54
commit ffdefd7
Show file tree

Hide file tree

Showing 14 changed files with 2,759 additions and 30 deletions.
diff --git a/Makefile b/Makefile
@@ -102,7 +102,7 @@ install: clean ## install the package to the active Python's site-packages
 	pip install -e . --config-settings editable_mode=compat
 
 install-dev: clean ## install the package to the active Python's site-packages
-	pip install -e ".[dev,examples,doc,all]"
+	pip install -e ".[dev,examples,doc,all]" --config-settings editable_mode=compat
 
 check:
 	pre-commit run --all-files

diff --git a/examples/configs/pb2_sb3_sac.yaml b/examples/configs/pb2_sb3_sac.yaml
@@ -0,0 +1,69 @@
+
+defaults:
+  - _self_
+  - override hydra/sweeper: HyperPB2
+
+env_name: MountainCarContinuous-v0
+load: false
+save: false
+reward_curves: false
+
+algorithm:
+  agent_class: SAC
+  total_timesteps: 1e5
+  n_eval_episodes: 5
+  policy_model: MlpPolicy
+  model_kwargs:
+    learning_rate: 0.0003
+    batch_size: 256
+    tau: 1.0
+    gamma: 0.99
+    learning_starts: 100
+    buffer_size: 1000000
+    train_freq: 1
+    gradient_steps: 1
+    use_sde: False
+    sde_sample_freq: -1
+
+hydra:
+  sweeper:
+    budget: ${algorithm.total_timesteps}
+    budget_variable: algorithm.total_timesteps
+    loading_variable: load
+    saving_variable: save
+    sweeper_kwargs:
+      optimizer_kwargs:
+        population_size: 2
+        config_interval: 1e4
+        categorical_mutation: "mix"
+      checkpoint_tf: true
+      load_tf: true
+    search_space:
+      hyperparameters:
+        algorithm.model_kwargs.learning_rate:
+          type: uniform_float
+          lower: 0.000001
+          upper: 0.01
+          log: true
+        algorithm.model_kwargs.batch_size:
+          type: categorical
+          choices: [64, 128, 256, 512]
+        algorithm.model_kwargs.gradient_steps:
+          type: uniform_int
+          lower: 1
+          upper: 10
+          log: false
+        algorithm.model_kwargs.learning_starts:
+          type: uniform_int
+          lower: 10
+          upper: 1e4
+          log: false
+        algorithm.model_kwargs.buffer_size:
+          type: uniform_int
+          lower: 5e3
+          upper: 5e7
+          log: false
+  run:
+    dir: ./tmp/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: ./tmp/${now:%Y-%m-%d}/${now:%H-%M-%S}
diff --git a/examples/sb3_rl_agent.py b/examples/sb3_rl_agent.py
@@ -32,19 +32,23 @@ def train_sb3(cfg: DictConfig):
     else:
         model = agent_class(cfg.algorithm.policy_model, env, **cfg.algorithm.model_kwargs)
 
-    model.learn(total_timesteps=cfg.algorithm.total_timesteps, reset_num_timesteps=False)
-
-    if cfg.save:
-        model.save(cfg.save)
-
-    mean_reward, std_reward = evaluate_policy(
-        model,
-        model.get_env(),
-        n_eval_episodes=cfg.algorithm.n_eval_episodes,
-    )
-    log.info(
-        f"Mean evaluation reward at the end of training across {cfg.algorithm.n_eval_episodes} episodes was {mean_reward}"
-    )
+    try:
+        model.learn(total_timesteps=cfg.algorithm.total_timesteps, reset_num_timesteps=False)
+
+        if cfg.save:
+            model.save(cfg.save)
+
+        mean_reward, std_reward = evaluate_policy(
+            model,
+            model.get_env(),
+            n_eval_episodes=cfg.algorithm.n_eval_episodes,
+        )
+        log.info(
+            f"Mean evaluation reward at the end of training across {cfg.algorithm.n_eval_episodes} episodes was {mean_reward}"
+        )
+    except:
+        print("Error in training")
+        mean_reward = -1e6
     if cfg.reward_curves:
         episode_rewards = [-r for r in env.get_episode_rewards()]
         return episode_rewards

diff --git a/hydra_plugins/hyper_hebo/hyper_hebo.py b/hydra_plugins/hyper_hebo/hyper_hebo.py
@@ -61,7 +61,7 @@ def make_hebo(configspace, hebo_args):
 
 
 # These functions were taken from the CARP-S project here: https://github.com/automl/CARP-S/blob/main/carps/optimizers/hebo.py#L23
-def configspaceHP2HEBOHP(hp: Hyperparameter) -> dict:  # noqa: PLR0911, N802
+def configspaceHP2HEBOHP(hp: Hyperparameter) -> dict:  # noqa: PLR0911
     """Convert ConfigSpace hyperparameter to HEBO hyperparameter.
 
     Parameters
@@ -105,7 +105,7 @@ def configspaceHP2HEBOHP(hp: Hyperparameter) -> dict:  # noqa: PLR0911, N802
         raise NotImplementedError(f"Unknown hyperparameter type: {hp.__class__.__name__}")
 
 
-def HEBOcfg2ConfigSpacecfg(  # noqa: N802
+def HEBOcfg2ConfigSpacecfg(
     hebo_suggestion: pd.DataFrame,
     design_space: DesignSpace,
     config_space: ConfigurationSpace,
@@ -145,7 +145,7 @@ def HEBOcfg2ConfigSpacecfg(  # noqa: N802
     return Configuration(configuration_space=config_space, values=hyp)
 
 
-def ConfigSpacecfg2HEBOcfg(config: Configuration) -> pd.DataFrame:  # noqa: N802
+def ConfigSpacecfg2HEBOcfg(config: Configuration) -> pd.DataFrame:
     """Convert ConfigSpace config to HEBO suggestion.
 
     Parameters

diff --git a/hydra_plugins/hyper_pbt/__init__.py b/hydra_plugins/hyper_pbt/__init__.py
@@ -1,3 +1,3 @@
-from .config import HyperPBTConfig
+from .config import HyperPB2Config, HyperPBTConfig
 
-__all__ = ["HyperPBTConfig"]
+__all__ = ["HyperPBTConfig", "HyperPB2Config"]