Skip to content

Commit

Permalink
Merge pull request #33 from automl/api-checks
Browse files Browse the repository at this point in the history
API refactoring + API tests and checks
  • Loading branch information
Neeratyoy authored Nov 28, 2023
2 parents 132454f + 9fa64ec commit eec1222
Show file tree
Hide file tree
Showing 21 changed files with 489 additions and 203 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ jobs:

- name: Run pytest
timeout-minutes: 15
run: poetry run pytest -m "all_examples or metahyper or summary_csv"
run: poetry run pytest -m "all_examples or metahyper or neps_api or summary_csv"
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def run_pipeline(architecture):
pipeline_space=pipeline_space,
root_directory="results/hierarchical_architecture_example_new",
max_evaluations_total=15,
searcher="bayesian_optimization",
surrogate_model=surrogate_model,
surrogate_model_args=surrogate_model_args,
)
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ profile = 'black'
line_length = 90

[tool.pytest.ini_options]
addopts = "--basetemp ./tests_tmpdir -m 'core_examples or yaml_api'"
markers = ["all_examples", "core_examples", "regression_all", "metahyper", "yaml_api", "summary_csv"]
addopts = "--basetemp ./tests_tmpdir -m 'neps_api or core_examples'"
markers = ["all_examples", "core_examples", "regression_all", "metahyper", "neps_api", "summary_csv"]
filterwarnings = "ignore::DeprecationWarning:torch.utils.tensorboard.*:"

[tool.mypy]
Expand Down
19 changes: 10 additions & 9 deletions src/metahyper/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from copy import deepcopy
from dataclasses import dataclass
from pathlib import Path
from typing import Any, List
from typing import Any, Iterable

from ._locker import Locker
from .utils import YamlSerializer, find_files, non_empty_file
Expand Down Expand Up @@ -124,17 +124,14 @@ def _process_sampler_info(
thread safety, preventing potential conflicts when multiple threads access the file
simultaneously.
Parameters:
Args:
- serializer: The YAML serializer object used for loading and dumping data.
- sampler_info: The dictionary containing information for the optimizer.
- sampler_info_file: The path to the YAML file storing optimizer data if available.
- decision_locker: The Locker file to use during multi-thread communication.
- logger: An optional logger object for logging messages (default is 'neps').
Note:
- The file-locking mechanism is employed to avoid potential errors in multiple threads.
- The `Locker` class and `YamlSerializer` should be appropriately defined or imported.
- Ensure that potential side effects or dependencies are considered when using this function.
"""
if logger is None:
logger = logging.getLogger("neps")
Expand All @@ -159,6 +156,9 @@ def _process_sampler_info(
else:
# If the file is empty or doesn't exist, write the sampler_info
serializer.dump(sampler_info, sampler_info_file, sort_keys=False)
except ValueError as ve:
# Handle specific value error
raise ve
except Exception as e:
raise RuntimeError(f"Error during data saving: {e}") from e
finally:
Expand Down Expand Up @@ -292,14 +292,14 @@ def _sample_config(optimization_dir, sampler, serializer, logger, pre_load_hooks
base_result_directory = optimization_dir / "results"

logger.debug("Sampling a new configuration")

for hook in pre_load_hooks:
# executes operations on the sampler before setting its state
# can be used for setting custom constraints on the optimizer state
# for example, can be used to input custom grid of configs, meta learning
# for example, can be used to input custom grid of configs, meta learning
# information for surrogate building, any non-stationary auxiliary information
sampler = hook(sampler)

sampler.load_results(previous_results, pending_configs)
config, config_id, previous_config_id = sampler.get_config_and_ids()

Expand Down Expand Up @@ -426,7 +426,7 @@ def run(
logger=None,
post_evaluation_hook=None,
overwrite_optimization_dir=False,
pre_load_hooks: List=[],
pre_load_hooks: Iterable | None = None,
):
serializer = YamlSerializer(sampler.load_config)
if logger is None:
Expand All @@ -451,6 +451,7 @@ def run(
decision_lock_file.touch(exist_ok=True)
decision_locker = Locker(decision_lock_file, logger.getChild("_locker"))

# Configuring the .optimizer_info.yaml file
_process_sampler_info(
serializer, sampler_info, sampler_info_file, decision_locker, logger
)
Expand Down
146 changes: 68 additions & 78 deletions src/neps/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
import warnings
from pathlib import Path
from typing import Callable, List, Literal
from typing import Callable, Iterable, Literal

import ConfigSpace as CS

Expand All @@ -18,7 +18,7 @@
from .search_spaces.parameter import Parameter
from .search_spaces.search_space import SearchSpace, pipeline_space_from_configspace
from .status.status import post_run_csv
from .utils.common import get_searcher_data
from .utils.common import get_searcher_data, get_value
from .utils.result_utils import get_loss


Expand Down Expand Up @@ -95,8 +95,10 @@ def write_loss_and_config(file_handle, loss_, config_id_, config_):

def run(
run_pipeline: Callable,
pipeline_space: dict[str, Parameter | CS.ConfigurationSpace] | CS.ConfigurationSpace,
root_directory: str | Path,
pipeline_space: dict[str, Parameter | CS.ConfigurationSpace]
| CS.ConfigurationSpace
| None = None,
overwrite_working_directory: bool = False,
post_run_summary: bool = False,
development_stage_id=None,
Expand All @@ -108,7 +110,7 @@ def run(
ignore_errors: bool = False,
loss_value_on_error: None | float = None,
cost_value_on_error: None | float = None,
pre_load_hooks: List=[],
pre_load_hooks: Iterable | None = None,
searcher: Literal[
"default",
"bayesian_optimization",
Expand Down Expand Up @@ -169,7 +171,6 @@ def run(
Raises:
ValueError: If deprecated argument working_directory is used.
ValueError: If root_directory is None.
TypeError: If pipeline_space has invalid type.
Example:
Expand Down Expand Up @@ -204,24 +205,35 @@ def run(
)
max_cost_total = searcher_kwargs["budget"]
del searcher_kwargs["budget"]


if pre_load_hooks is None:
pre_load_hooks = []

logger = logging.getLogger("neps")
logger.info(f"Starting neps.run using root directory {root_directory}")


# Used to create the yaml holding information about the searcher.
# Also important for testing and debugging the api.
searcher_info = {
"searcher_name": "",
"searcher_alg": "",
"searcher_selection": "",
"neps_decision_tree": True,
"searcher_args": {},
}

if isinstance(searcher, BaseOptimizer):
searcher_instance = searcher
searcher_name = "custom"
searcher_alg = searcher.whoami()
user_defined_searcher = True
searcher_info["searcher_name"] = "baseoptimizer"
searcher_info["searcher_alg"] = searcher.whoami()
searcher_info["searcher_selection"] = "user-instantiation"
searcher_info["neps_decision_tree"] = False
else:
(
searcher_name,
searcher_instance,
searcher_alg,
searcher_config,
searcher_info,
user_defined_searcher
(
searcher_instance,
searcher_info,
) = _run_args(
searcher_info=searcher_info,
pipeline_space=pipeline_space,
max_cost_total=max_cost_total,
ignore_errors=ignore_errors,
Expand All @@ -233,51 +245,19 @@ def run(
**searcher_kwargs,
)

# Used to create the yaml holding information about the searcher.
# Also important for testing and debugging the api.
searcher_info = {
"searcher_name": searcher_name,
"searcher_alg": searcher_alg,
"user_defined_searcher": user_defined_searcher,
"searcher_args_user_modified": False,
}

# Check to verify if the target directory contains the history of another optimizer state
# Check to verify if the target directory contains history of another optimizer state
# This check is performed only when the `searcher` is built during the run
if isinstance(searcher, BaseOptimizer):
if not isinstance(searcher, (BaseOptimizer, str)):
raise ValueError(
f"Unrecognized `searcher` of type {type(searcher)}. Not str or BaseOptimizer."
)
elif isinstance(searcher, BaseOptimizer):
# This check is not strict when a user-defined neps.optimizer is provided
logger.warn(
"An instantiated optimizer is provided. The safety checks of NePS will be "
"skipped. Accurate continuation of runs can no longer be guaranteed!"
)
elif isinstance(searcher, str):
# Updating searcher arguments from searcher_kwargs
for key, value in searcher_kwargs.items():
if user_defined_searcher:
if key not in searcher_config or searcher_config[key] != value:
searcher_config[key] = value
logger.info(
f"Updating the current searcher argument '{key}'"
f" with the value '{value}'"
)
else:
logger.info(
f"The searcher argument '{key}' has the same"
f" value '{value}' as default."
)
searcher_info["searcher_args_user_modified"] = True
else:
# No searcher argument updates when NePS decides the searcher.
logger.info(35 * "=" + "WARNING" + 35 * "=")
logger.info("CHANGING ARGUMENTS ONLY WORK WHEN SEARCHER IS DEFINED")
logger.info(
f"The searcher argument '{key}' will not change to '{value}'"
f" because NePS chose the searcher"
)
searcher_info["searcher_args_user_modified"] = False
else:
raise ValueError(f"Unrecognized `searcher`. Not str or BaseOptimizer.")


metahyper.run(
run_pipeline,
searcher_instance,
Expand All @@ -301,7 +281,10 @@ def run(


def _run_args(
pipeline_space: dict[str, Parameter | CS.ConfigurationSpace] | CS.ConfigurationSpace,
searcher_info: dict,
pipeline_space: dict[str, Parameter | CS.ConfigurationSpace]
| CS.ConfigurationSpace
| None = None,
max_cost_total: int | float | None = None,
ignore_errors: bool = False,
loss_value_on_error: None | float = None,
Expand All @@ -320,8 +303,13 @@ def _run_args(
| BaseOptimizer = "default",
searcher_path: Path | str | None = None,
**searcher_kwargs,
) -> None:
) -> tuple[BaseOptimizer, dict]:
try:
# Raising an issue if pipeline_space is None
if pipeline_space is None:
raise ValueError(
"The choice of searcher requires a pipeline space to be provided"
)
# Support pipeline space as ConfigurationSpace definition
if isinstance(pipeline_space, CS.ConfigurationSpace):
pipeline_space = pipeline_space_from_configspace(pipeline_space)
Expand All @@ -335,21 +323,20 @@ def _run_args(
else:
new_pipeline_space[key] = value
pipeline_space = new_pipeline_space

# Transform to neps internal representation of the pipeline space
pipeline_space = SearchSpace(**pipeline_space)
except TypeError as e:
message = f"The pipeline_space has invalid type: {type(pipeline_space)}"
raise TypeError(message) from e

user_defined_searcher = False

if isinstance(searcher, str) and searcher_path is not None:
# The users has their own custom searcher.
logging.info("Preparing to run user created searcher")

config = get_searcher_data(searcher, searcher_path)
user_defined_searcher = True
searcher_info["searcher_selection"] = "user-yaml"
searcher_info["neps_decision_tree"] = False
else:
if searcher in ["default", None]:
# NePS decides the searcher according to the pipeline space.
Expand All @@ -361,42 +348,41 @@ def _run_args(
if pipeline_space.has_fidelity
else "bayesian_optimization"
)
searcher_info["searcher_selection"] = "neps-default"
else:
# Users choose one of NePS searchers.
user_defined_searcher = True
searcher_info["neps_decision_tree"] = False
searcher_info["searcher_selection"] = "neps-default"
# Fetching the searcher data, throws an error when the searcher is not found
config = get_searcher_data(searcher)

searcher_alg = config["searcher_init"]["algorithm"]
searcher_config = {} if config["searcher_kwargs"] is None else config["searcher_kwargs"]
searcher_config = (
{} if config["searcher_kwargs"] is None else config["searcher_kwargs"]
)

logger.info(f"Running {searcher} as the searcher")
logger.info(f"Algorithm: {searcher_alg}")

# Used to create the yaml holding information about the searcher.
# Also important for testing and debugging the api.
searcher_info = {
"searcher_name": searcher,
"searcher_alg": searcher_alg,
"user_defined_searcher": user_defined_searcher,
"searcher_args_user_modified": False,
}
searcher_info["searcher_name"] = searcher
searcher_info["searcher_alg"] = searcher_alg

# Updating searcher arguments from searcher_kwargs
for key, value in searcher_kwargs.items():
if user_defined_searcher:
if not searcher_info["neps_decision_tree"]:
if key not in searcher_config or searcher_config[key] != value:
searcher_config[key] = value
logger.info(
f"Updating the current searcher argument '{key}'"
f" with the value '{value}'"
f" with the value '{get_value(value)}'"
)
else:
logger.info(
f"The searcher argument '{key}' has the same"
f" value '{value}' as default."
f" value '{get_value(value)}' as default."
)
searcher_info["searcher_args_user_modified"] = True
else:
# No searcher argument updates when NePS decides the searcher.
logger.info(35 * "=" + "WARNING" + 35 * "=")
Expand All @@ -405,7 +391,8 @@ def _run_args(
f"The searcher argument '{key}' will not change to '{value}'"
f" because NePS chose the searcher"
)
searcher_info["searcher_args_user_modified"] = False

searcher_info["searcher_args"] = get_value(searcher_config)

searcher_config.update(
{
Expand All @@ -414,13 +401,16 @@ def _run_args(
"ignore_errors": ignore_errors,
}
)

searcher_instance = instance_from_map(
SearcherMapping, searcher_alg, "searcher", as_class=True
)(
pipeline_space=pipeline_space,
budget=max_cost_total, # TODO: use max_cost_total everywhere
**searcher_config,
)

return searcher, searcher_instance, searcher_alg, searcher_config, searcher_info, user_defined_searcher

return (
searcher_instance,
searcher_info,
)
Loading

0 comments on commit eec1222

Please sign in to comment.