Skip to content

Commit

Permalink
include pb2
Browse files Browse the repository at this point in the history
  • Loading branch information
TheEimer committed Sep 3, 2024
1 parent ec6fb54 commit ffdefd7
Show file tree
Hide file tree
Showing 14 changed files with 2,759 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ install: clean ## install the package to the active Python's site-packages
pip install -e . --config-settings editable_mode=compat

install-dev: clean ## install the package to the active Python's site-packages
pip install -e ".[dev,examples,doc,all]"
pip install -e ".[dev,examples,doc,all]" --config-settings editable_mode=compat

check:
pre-commit run --all-files
Expand Down
69 changes: 69 additions & 0 deletions examples/configs/pb2_sb3_sac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@

defaults:
- _self_
- override hydra/sweeper: HyperPB2

env_name: MountainCarContinuous-v0
load: false
save: false
reward_curves: false

algorithm:
agent_class: SAC
total_timesteps: 1e5
n_eval_episodes: 5
policy_model: MlpPolicy
model_kwargs:
learning_rate: 0.0003
batch_size: 256
tau: 1.0
gamma: 0.99
learning_starts: 100
buffer_size: 1000000
train_freq: 1
gradient_steps: 1
use_sde: False
sde_sample_freq: -1

hydra:
sweeper:
budget: ${algorithm.total_timesteps}
budget_variable: algorithm.total_timesteps
loading_variable: load
saving_variable: save
sweeper_kwargs:
optimizer_kwargs:
population_size: 2
config_interval: 1e4
categorical_mutation: "mix"
checkpoint_tf: true
load_tf: true
search_space:
hyperparameters:
algorithm.model_kwargs.learning_rate:
type: uniform_float
lower: 0.000001
upper: 0.01
log: true
algorithm.model_kwargs.batch_size:
type: categorical
choices: [64, 128, 256, 512]
algorithm.model_kwargs.gradient_steps:
type: uniform_int
lower: 1
upper: 10
log: false
algorithm.model_kwargs.learning_starts:
type: uniform_int
lower: 10
upper: 1e4
log: false
algorithm.model_kwargs.buffer_size:
type: uniform_int
lower: 5e3
upper: 5e7
log: false
run:
dir: ./tmp/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: ./tmp/${now:%Y-%m-%d}/${now:%H-%M-%S}
30 changes: 17 additions & 13 deletions examples/sb3_rl_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,23 @@ def train_sb3(cfg: DictConfig):
else:
model = agent_class(cfg.algorithm.policy_model, env, **cfg.algorithm.model_kwargs)

model.learn(total_timesteps=cfg.algorithm.total_timesteps, reset_num_timesteps=False)

if cfg.save:
model.save(cfg.save)

mean_reward, std_reward = evaluate_policy(
model,
model.get_env(),
n_eval_episodes=cfg.algorithm.n_eval_episodes,
)
log.info(
f"Mean evaluation reward at the end of training across {cfg.algorithm.n_eval_episodes} episodes was {mean_reward}"
)
try:
model.learn(total_timesteps=cfg.algorithm.total_timesteps, reset_num_timesteps=False)

if cfg.save:
model.save(cfg.save)

mean_reward, std_reward = evaluate_policy(
model,
model.get_env(),
n_eval_episodes=cfg.algorithm.n_eval_episodes,
)
log.info(
f"Mean evaluation reward at the end of training across {cfg.algorithm.n_eval_episodes} episodes was {mean_reward}"
)
except:
print("Error in training")
mean_reward = -1e6
if cfg.reward_curves:
episode_rewards = [-r for r in env.get_episode_rewards()]
return episode_rewards
Expand Down
6 changes: 3 additions & 3 deletions hydra_plugins/hyper_hebo/hyper_hebo.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def make_hebo(configspace, hebo_args):


# These functions were taken from the CARP-S project here: https://github.com/automl/CARP-S/blob/main/carps/optimizers/hebo.py#L23
def configspaceHP2HEBOHP(hp: Hyperparameter) -> dict: # noqa: PLR0911, N802
def configspaceHP2HEBOHP(hp: Hyperparameter) -> dict: # noqa: PLR0911
"""Convert ConfigSpace hyperparameter to HEBO hyperparameter.
Parameters
Expand Down Expand Up @@ -105,7 +105,7 @@ def configspaceHP2HEBOHP(hp: Hyperparameter) -> dict: # noqa: PLR0911, N802
raise NotImplementedError(f"Unknown hyperparameter type: {hp.__class__.__name__}")


def HEBOcfg2ConfigSpacecfg( # noqa: N802
def HEBOcfg2ConfigSpacecfg(
hebo_suggestion: pd.DataFrame,
design_space: DesignSpace,
config_space: ConfigurationSpace,
Expand Down Expand Up @@ -145,7 +145,7 @@ def HEBOcfg2ConfigSpacecfg( # noqa: N802
return Configuration(configuration_space=config_space, values=hyp)


def ConfigSpacecfg2HEBOcfg(config: Configuration) -> pd.DataFrame: # noqa: N802
def ConfigSpacecfg2HEBOcfg(config: Configuration) -> pd.DataFrame:
"""Convert ConfigSpace config to HEBO suggestion.
Parameters
Expand Down
4 changes: 2 additions & 2 deletions hydra_plugins/hyper_pbt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .config import HyperPBTConfig
from .config import HyperPB2Config, HyperPBTConfig

__all__ = ["HyperPBTConfig"]
__all__ = ["HyperPBTConfig", "HyperPB2Config"]
Loading

0 comments on commit ffdefd7

Please sign in to comment.