From 82b48903d0896db7d84ea2810f567a29d01688c3 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:04:41 +0200 Subject: [PATCH] refactor: Introduce Settings Class and Default() Arguments (#116) --- docs/reference/declarative_usage.md | 9 +- neps/api.py | 166 ++++----- neps/utils/run_args.py | 178 ++++++++-- tests/test_settings/overwrite_run_args.yaml | 43 +++ .../run_args_optimizer_outside.yaml | 20 ++ .../run_args_optimizer_settings.yaml | 50 +++ tests/test_settings/run_args_optional.yaml | 14 + tests/test_settings/run_args_required.yaml | 8 + tests/test_settings/test_settings.py | 333 ++++++++++++++++++ .../test_yaml_run_args/test_yaml_run_args.py | 2 +- 10 files changed, 687 insertions(+), 136 deletions(-) create mode 100644 tests/test_settings/overwrite_run_args.yaml create mode 100644 tests/test_settings/run_args_optimizer_outside.yaml create mode 100644 tests/test_settings/run_args_optimizer_settings.yaml create mode 100644 tests/test_settings/run_args_optional.yaml create mode 100644 tests/test_settings/run_args_required.yaml create mode 100644 tests/test_settings/test_settings.py diff --git a/docs/reference/declarative_usage.md b/docs/reference/declarative_usage.md index 6bad0610..3b20d5df 100644 --- a/docs/reference/declarative_usage.md +++ b/docs/reference/declarative_usage.md @@ -4,9 +4,12 @@ Configure your experiments using a YAML file, which serves as a central reference for setting up your project. This approach simplifies sharing, reproducing and modifying configurations. -!!! note - You can partially define arguments in the YAML file and partially provide the arguments directly to `neps.run`. - However, double referencing is not allowed. You cannot define the same argument in both places. +!!! note "Argument Handling and Prioritization" + You can partially define and provide arguments via `run_args` (YAML file) and partially provide the arguments + directly to `neps.run`. Arguments directly provided to `neps.run` get prioritized over those defined in the YAML file. An exception to this + is for `searcher_kwargs` where a merge happens between the configurations. In this case, the directly provided arguments + are still prioritized, but the values from both the directly provided arguments and the YAML file are merged. + #### Simple YAML Example Below is a straightforward YAML configuration example for NePS covering the required arguments. diff --git a/neps/api.py b/neps/api.py index cf92de88..edb09bfa 100644 --- a/neps/api.py +++ b/neps/api.py @@ -9,9 +9,7 @@ from typing import Callable, Iterable, Literal import ConfigSpace as CS -from neps.utils.run_args import check_essential_arguments, \ - get_run_args_from_yaml, \ - check_double_reference +from neps.utils.run_args import Settings, Default from neps.utils.common import instance_from_map from neps.runtime import launch_runtime @@ -26,30 +24,29 @@ from neps.utils.common import get_searcher_data, get_value from neps.optimizers.info import SearcherConfigs - def run( - run_pipeline: Callable | None = None, - root_directory: str | Path | None = None, + run_pipeline: Callable | None = Default(None), + root_directory: str | Path | None = Default(None), pipeline_space: ( dict[str, Parameter | CS.ConfigurationSpace] | str | Path | CS.ConfigurationSpace | None - ) = None, - run_args: str | Path | None = None, - overwrite_working_directory: bool = False, - post_run_summary: bool = True, - development_stage_id=None, - task_id=None, - max_evaluations_total: int | None = None, - max_evaluations_per_run: int | None = None, - continue_until_max_evaluation_completed: bool = False, - max_cost_total: int | float | None = None, - ignore_errors: bool = False, - loss_value_on_error: None | float = None, - cost_value_on_error: None | float = None, - pre_load_hooks: Iterable | None = None, + ) = Default(None), + run_args: str | Path | None = Default(None), + overwrite_working_directory: bool = Default(False), + post_run_summary: bool = Default(True), + development_stage_id=Default(None), + task_id=Default(None), + max_evaluations_total: int | None = Default(None), + max_evaluations_per_run: int | None = Default(None), + continue_until_max_evaluation_completed: bool = Default(False), + max_cost_total: int | float | None = Default(None), + ignore_errors: bool = Default(False), + loss_value_on_error: None | float = Default(None), + cost_value_on_error: None | float = Default(None), + pre_load_hooks: Iterable | None = Default(None), searcher: ( Literal[ "default", @@ -62,7 +59,7 @@ def run( "regularized_evolution", ] | BaseOptimizer | Path - ) = "default", + ) = Default("default"), **searcher_kwargs, ) -> None: """Run a neural pipeline search. @@ -146,54 +143,17 @@ def run( ) max_cost_total = searcher_kwargs["budget"] del searcher_kwargs["budget"] + settings = Settings(locals(), run_args) + # TODO: check_essentials, + logger = logging.getLogger("neps") - if run_args: - optim_settings = get_run_args_from_yaml(run_args) - check_double_reference(run, locals(), optim_settings) - run_pipeline = optim_settings.get("run_pipeline", run_pipeline) - root_directory = optim_settings.get("root_directory", root_directory) - pipeline_space = optim_settings.get("pipeline_space", pipeline_space) - overwrite_working_directory = optim_settings.get( - "overwrite_working_directory", overwrite_working_directory - ) - post_run_summary = optim_settings.get("post_run_summary", post_run_summary) - development_stage_id = optim_settings.get("development_stage_id", - development_stage_id) - task_id = optim_settings.get("task_id", task_id) - max_evaluations_total = optim_settings.get("max_evaluations_total", - max_evaluations_total) - max_evaluations_per_run = optim_settings.get("max_evaluations_per_run", - max_evaluations_per_run) - continue_until_max_evaluation_completed = optim_settings.get( - "continue_until_max_evaluation_completed", - continue_until_max_evaluation_completed) - max_cost_total = optim_settings.get("max_cost_total", max_cost_total) - ignore_errors = optim_settings.get("ignore_errors", ignore_errors) - loss_value_on_error = optim_settings.get("loss_value_on_error", - loss_value_on_error) - cost_value_on_error = optim_settings.get("cost_value_on_error", - cost_value_on_error) - pre_load_hooks = optim_settings.get("pre_load_hooks", pre_load_hooks) - searcher = optim_settings.get("searcher", searcher) - # considers arguments of a provided SubClass of BaseOptimizer - searcher_class_arguments = optim_settings.get("custom_class_searcher_kwargs", {}) - - # check if necessary arguments are provided. - check_essential_arguments( - run_pipeline, - root_directory, - pipeline_space, - max_cost_total, - max_evaluations_total, - searcher, - run_args, - ) + # DO NOT use any neps arguments directly; instead, access them via the Settings class. - if pre_load_hooks is None: - pre_load_hooks = [] + if settings.pre_load_hooks is None: + settings.pre_load_hooks = [] - logger.info(f"Starting neps.run using root directory {root_directory}") + logger.info(f"Starting neps.run using root directory {settings.root_directory}") # Used to create the yaml holding information about the searcher. # Also important for testing and debugging the api. @@ -206,24 +166,26 @@ def run( } # special case if you load your own optimizer via run_args - if inspect.isclass(searcher): - if issubclass(searcher, BaseOptimizer): - search_space = SearchSpace(**pipeline_space) + if inspect.isclass(settings.searcher): + if issubclass(settings.searcher, BaseOptimizer): + search_space = SearchSpace(**settings.pipeline_space) # aligns with the behavior of the internal neps searcher which also overwrites # its arguments by using searcher_kwargs - merge_kwargs = {**searcher_class_arguments, **searcher_kwargs} - searcher_info["searcher_args"] = merge_kwargs - searcher = searcher(search_space, **merge_kwargs) + # TODO habe hier searcher kwargs gedroppt, sprich das merging muss davor statt + # finden + searcher_info["searcher_args"] = settings.searcher_kwargs + settings.searcher = settings.searcher(search_space, + **settings.searcher_kwargs) else: # Raise an error if searcher is not a subclass of BaseOptimizer raise TypeError( "The provided searcher must be a class that inherits from BaseOptimizer." ) - if isinstance(searcher, BaseOptimizer): - searcher_instance = searcher + if isinstance(settings.searcher, BaseOptimizer): + searcher_instance = settings.searcher searcher_info["searcher_name"] = "baseoptimizer" - searcher_info["searcher_alg"] = searcher.whoami() + searcher_info["searcher_alg"] = settings.searcher.whoami() searcher_info["searcher_selection"] = "user-instantiation" searcher_info["neps_decision_tree"] = False else: @@ -232,52 +194,56 @@ def run( searcher_info, ) = _run_args( searcher_info=searcher_info, - pipeline_space=pipeline_space, - max_cost_total=max_cost_total, - ignore_errors=ignore_errors, - loss_value_on_error=loss_value_on_error, - cost_value_on_error=cost_value_on_error, + pipeline_space=settings.pipeline_space, + max_cost_total=settings.max_cost_total, + ignore_errors=settings.ignore_errors, + loss_value_on_error=settings.loss_value_on_error, + cost_value_on_error=settings.cost_value_on_error, logger=logger, - searcher=searcher, - **searcher_kwargs, + searcher=settings.searcher, + **settings.searcher_kwargs, ) # Check to verify if the target directory contains history of another optimizer state # This check is performed only when the `searcher` is built during the run - if not isinstance(searcher, (BaseOptimizer, str, dict, Path)): + if not isinstance(settings.searcher, (BaseOptimizer, str, dict, Path)): raise ValueError( - f"Unrecognized `searcher` of type {type(searcher)}. Not str or BaseOptimizer." + f"Unrecognized `searcher` of type {type(settings.searcher)}. Not str or " + f"BaseOptimizer." ) - elif isinstance(searcher, BaseOptimizer): + elif isinstance(settings.searcher, BaseOptimizer): # This check is not strict when a user-defined neps.optimizer is provided logger.warning( "An instantiated optimizer is provided. The safety checks of NePS will be " "skipped. Accurate continuation of runs can no longer be guaranteed!" ) - if task_id is not None: - root_directory = Path(root_directory) / f"task_{task_id}" - if development_stage_id is not None: - root_directory = Path(root_directory) / f"dev_{development_stage_id}" + if settings.task_id is not None: + settings.root_directory = Path(settings.root_directory) / (f"task_" + f"{settings.task_id}") + if settings.development_stage_id is not None: + settings.root_directory = (Path(settings.root_directory) / + f"dev_{settings.development_stage_id}") launch_runtime( - evaluation_fn=run_pipeline, + evaluation_fn=settings.run_pipeline, sampler=searcher_instance, optimizer_info=searcher_info, - optimization_dir=root_directory, - max_evaluations_total=max_evaluations_total, - max_evaluations_per_run=max_evaluations_per_run, - continue_until_max_evaluation_completed=continue_until_max_evaluation_completed, + optimization_dir=settings.root_directory, + max_evaluations_total=settings.max_evaluations_total, + max_evaluations_per_run=settings.max_evaluations_per_run, + continue_until_max_evaluation_completed + =settings.continue_until_max_evaluation_completed, logger=logger, - loss_value_on_error=loss_value_on_error, - ignore_errors=ignore_errors, - overwrite_optimization_dir=overwrite_working_directory, - pre_load_hooks=pre_load_hooks, + loss_value_on_error=settings.loss_value_on_error, + ignore_errors=settings.ignore_errors, + overwrite_optimization_dir=settings.overwrite_working_directory, + pre_load_hooks=settings.pre_load_hooks, ) - if post_run_summary: - assert root_directory is not None - post_run_csv(root_directory) + if settings.post_run_summary: + assert settings.root_directory is not None + post_run_csv(settings.root_directory) def _run_args( diff --git a/neps/utils/run_args.py b/neps/utils/run_args.py index 71870c98..9d7f6445 100644 --- a/neps/utils/run_args.py +++ b/neps/utils/run_args.py @@ -5,11 +5,10 @@ from __future__ import annotations import importlib.util -import inspect import logging import sys from pathlib import Path -from typing import Callable +from typing import Any, Callable import yaml @@ -37,7 +36,7 @@ PRE_LOAD_HOOKS = "pre_load_hooks" # searcher_kwargs is used differently in yaml and just play a role for considering # arguments of a custom searcher class (BaseOptimizer) -SEARCHER_KWARGS = "custom_class_searcher_kwargs" +SEARCHER_KWARGS = "searcher_kwargs" MAX_EVALUATIONS_PER_RUN = "max_evaluations_per_run" @@ -452,8 +451,7 @@ def check_essential_arguments( pipeline_space: dict | None, max_cost_total: int | None, max_evaluation_total: int | None, - searcher: BaseOptimizer | None, - run_args: str | None, + searcher: BaseOptimizer | dict | str | None, ) -> None: """Validates essential NePS configuration arguments. @@ -469,7 +467,6 @@ def check_essential_arguments( max_cost_total: Max allowed total cost for experiments. max_evaluation_total: Max allowed evaluations. searcher: Optimizer for the configuration space. - run_args: A YAML file containing the configuration settings. Raises: ValueError: Missing or invalid essential arguments. @@ -478,7 +475,7 @@ def check_essential_arguments( raise ValueError("'run_pipeline' is required but was not provided.") if not root_directory: raise ValueError("'root_directory' is required but was not provided.") - if not pipeline_space and (run_args or not isinstance(searcher, BaseOptimizer)): + if not pipeline_space and not isinstance(searcher, BaseOptimizer): # handling special case for searcher instance, in which user doesn't have to # provide the search_space because it's the argument of the searcher. raise ValueError("'pipeline_space' is required but was not provided.") @@ -490,31 +487,148 @@ def check_essential_arguments( ) -def check_double_reference( - func: Callable, func_arguments: dict, yaml_arguments: dict -) -> None: - """Checks if no argument is defined both via function arguments and YAML. +# Handle Settings - Args: - func (Callable): The function to check arguments against. - func_arguments (Dict): A dictionary containing the provided arguments to the - function and their values. - yaml_arguments (Dict): A dictionary containing the arguments provided via a YAML - file. - Raises: - ValueError: If any provided argument is defined both via function arguments and - the YAML file. +class Sentinel: + """Introduce a sentinel object as default value for checking variable assignment.""" + + def __repr__(self) -> str: + return "" + + +UNSET = Sentinel() + + +class Settings: + """Centralizes and manages configuration settings from various sources of NePS + arguments (run_args (yaml) and neps func_args). """ - sig = inspect.signature(func) - - for name, param in sig.parameters.items(): - if param.default != func_arguments[name]: - if name == RUN_ARGS: - # Ignoring run_args argument - continue - if name in yaml_arguments: - raise ValueError( - f"Conflict for argument '{name}': Argument is defined both via " - f"function arguments and YAML, which is not allowed." - ) + + def __init__(self, func_args: dict, yaml_args: str | Default | None = None): + """Initializes the Settings object by merging function arguments with YAML + configuration settings and assigning them to class attributes. It checks for + necessary configurations and handles default values where specified. + + Args: + func_args (dict): The function arguments directly passed to NePS. + yaml_args (dict | None): Optional. YAML file arguments provided via run_args. + """ + self.run_pipeline = UNSET + self.root_directory = UNSET + self.pipeline_space = UNSET + self.overwrite_working_directory = UNSET + self.post_run_summary = UNSET + self.development_stage_id = UNSET + self.task_id = UNSET + self.max_evaluations_total = UNSET + self.max_evaluations_per_run = UNSET + self.continue_until_max_evaluation_completed = UNSET + self.max_cost_total = UNSET + self.ignore_errors = UNSET + self.loss_value_on_error = UNSET + self.cost_value_on_error = UNSET + self.pre_load_hooks = UNSET + self.searcher = UNSET + self.searcher_kwargs = UNSET + + if not isinstance(yaml_args, Default) and yaml_args is not None: + yaml_settings = get_run_args_from_yaml(yaml_args) + dict_settings = self.merge(func_args, yaml_settings) + else: + dict_settings = {} + for key, value in func_args.items(): + if isinstance(value, Default): + dict_settings[key] = value.value + else: + dict_settings[key] = value + + # drop run_args, not needed as a setting attribute + del dict_settings[RUN_ARGS] + self.assign(dict_settings) + self.check() + + def merge(self, func_args: dict, yaml_args: dict) -> dict: + """Merge func_args and yaml_args. func_args gets priority over yaml_args.""" + # Initialize with YAML settings + merged_settings = yaml_args.copy() + + # overwrite or merge keys + for key, value in func_args.items(): + # Handle searcher_kwargs for BaseOptimizer case + if key == SEARCHER_KWARGS: + merged_settings[SEARCHER_KWARGS] = { + **yaml_args.pop(SEARCHER_KWARGS, {}), + **func_args[SEARCHER_KWARGS], + } + elif not isinstance(value, Default): + merged_settings[key] = value + elif key not in yaml_args: + # If the key is not in yaml_args, set it from Default + merged_settings[key] = value.value + return merged_settings + + def assign(self, dict_settings: dict) -> None: + """Updates existing attributes with values from `dict_settings`. + Raises AttributeError if any attribute in `dict_settings` does not exist. + """ + for key, value in dict_settings.items(): + if hasattr(self, key): + setattr(self, key, value) + else: + raise AttributeError(f"'Settings' object has no attribute '{key}'") + + def check_unassigned_attributes(self) -> list: + """Check for UNSET and Default class.""" + return [ + key + for key, value in self.__dict__.items() + if value is UNSET or isinstance(value, Default) + ] + + def check(self) -> None: + """Check if all values are assigned and if the essentials are provided + correctly. + """ + unassigned_attributes = self.check_unassigned_attributes() + if unassigned_attributes: + raise ValueError( + f"Unassigned or default-initialized attributes detected: " + f"{', '.join(unassigned_attributes)}" + ) + check_essential_arguments( + self.run_pipeline, # type: ignore + self.root_directory, # type: ignore + self.pipeline_space, # type: ignore + self.max_cost_total, # type: ignore + self.max_evaluations_total, # type: ignore + self.searcher, # type: ignore + ) + + +class Default: + """A class to enable default detection. + + Attributes: + value: The value to be stored as the default. + + Methods: + __init__(self, value): Initializes the Default object with a value. + __repr__(self): Returns a string representation of the Default object. + """ + + def __init__(self, value: Any): + """Initialize the Default object with the specified value. + + Args: + value: The value to store as default. Can be any data type. + """ + self.value = value + + def __repr__(self) -> str: + """Return the string representation of the Default object. + + Returns: + A string that represents the Default object in the format . + """ + return f"" diff --git a/tests/test_settings/overwrite_run_args.yaml b/tests/test_settings/overwrite_run_args.yaml new file mode 100644 index 00000000..fb69b401 --- /dev/null +++ b/tests/test_settings/overwrite_run_args.yaml @@ -0,0 +1,43 @@ +# Full Configuration Template for NePS +run_pipeline: + path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py + name: run_pipeline_constant + +pipeline_space: + learning_rate: + lower: 1e-5 + upper: 1e-1 + log: True # Log scale for learning rate + epochs: + lower: 5 + upper: 20 + is_fidelity: True + optimizer: + choices: [adam, sgd, adamw] + batch_size: 64 + +root_directory: "tests_tmpdir/test_declarative_usage_docs/full_config" +max_evaluations_total: 20 # Budget +max_cost_total: + +# Debug and Monitoring +overwrite_working_directory: True +post_run_summary: False +development_stage_id: 1 +task_id: 3 + +# Parallelization Setup +max_evaluations_per_run: 6 +continue_until_max_evaluation_completed: False + +# Error Handling +loss_value_on_error: 1.0 +cost_value_on_error: 1.0 +ignore_errors: True + +# Customization Options +searcher: hyperband # Internal key to select a NePS optimizer. + +# Hooks +pre_load_hooks: + hook1: "tests/test_settings/test_settings.py" diff --git a/tests/test_settings/run_args_optimizer_outside.yaml b/tests/test_settings/run_args_optimizer_outside.yaml new file mode 100644 index 00000000..1dbfce01 --- /dev/null +++ b/tests/test_settings/run_args_optimizer_outside.yaml @@ -0,0 +1,20 @@ +run_pipeline: + name: run_pipeline + path: "tests/test_settings/test_settings.py" +pipeline_space: + name: pipeline_space + path: "tests/test_settings/test_settings.py" + +root_directory: "path/to/root_directory" +max_evaluations_total: 10 # Budget +searcher: + path: "tests/test_settings/test_settings.py" + name: my_bayesian + # Specific arguments depending on the searcher + initial_design_size: 7 + surrogate_model: gp + acquisition: EI + acquisition_sampler: random + random_interleave_prob: 0.1 + +overwrite_working_directory: True diff --git a/tests/test_settings/run_args_optimizer_settings.yaml b/tests/test_settings/run_args_optimizer_settings.yaml new file mode 100644 index 00000000..00e34d18 --- /dev/null +++ b/tests/test_settings/run_args_optimizer_settings.yaml @@ -0,0 +1,50 @@ +# Full Configuration Template for NePS +run_pipeline: + path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py + name: run_pipeline_constant + +pipeline_space: + learning_rate: + lower: 1e-5 + upper: 1e-1 + log: True # Log scale for learning rate + epochs: + lower: 5 + upper: 20 + is_fidelity: True + optimizer: + choices: [adam, sgd, adamw] + batch_size: 64 + +root_directory: "tests_tmpdir/test_declarative_usage_docs/full_config" +max_evaluations_total: 20 # Budget +max_cost_total: + +# Debug and Monitoring +overwrite_working_directory: True +post_run_summary: False +development_stage_id: 1 +task_id: 3 + +# Parallelization Setup +max_evaluations_per_run: 6 +continue_until_max_evaluation_completed: False + +# Error Handling +loss_value_on_error: 1.0 +cost_value_on_error: 1.0 +ignore_errors: True + +# Customization Options +searcher: + strategy: "hyperband" # Internal key to select a NePS optimizer. + eta: 3 + initial_design_type: max_budget + use_priors: false + random_interleave_prob: 0.0 + sample_default_first: false + sample_default_at_target: false + +# Hooks +pre_load_hooks: + hook1: "tests/test_settings/test_settings.py" diff --git a/tests/test_settings/run_args_optional.yaml b/tests/test_settings/run_args_optional.yaml new file mode 100644 index 00000000..5e46ff99 --- /dev/null +++ b/tests/test_settings/run_args_optional.yaml @@ -0,0 +1,14 @@ +max_cost_total: +overwrite_working_directory: True +post_run_summary: False +development_stage_id: +task_id: +max_evaluations_per_run: +continue_until_max_evaluation_completed: False +max_evaluations_total: 11 # get ignored +root_directory: "get/ignored" +loss_value_on_error: +cost_value_on_error: +ignore_errors: +searcher: hyperband +pre_load_hooks: diff --git a/tests/test_settings/run_args_required.yaml b/tests/test_settings/run_args_required.yaml new file mode 100644 index 00000000..d3c6afdd --- /dev/null +++ b/tests/test_settings/run_args_required.yaml @@ -0,0 +1,8 @@ +run_pipeline: + name: run_pipeline + path: "tests/test_settings/test_settings.py" +pipeline_space: + name: pipeline_space + path: "tests/test_settings/test_settings.py" +max_evaluations_total: 10 +root_directory: "path/to/root_directory" diff --git a/tests/test_settings/test_settings.py b/tests/test_settings/test_settings.py new file mode 100644 index 00000000..fcdac758 --- /dev/null +++ b/tests/test_settings/test_settings.py @@ -0,0 +1,333 @@ +from neps.utils.run_args import Settings, Default +import pytest +import neps +from neps.utils.run_args import get_run_args_from_yaml +from tests.test_yaml_run_args.test_yaml_run_args import (run_pipeline, hook1, hook2, + pipeline_space) +from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization +from typing import Union, Callable, Dict, List, Type + +BASE_PATH = "tests/test_settings" +run_pipeline = run_pipeline +hook1 = hook1 +hook2 = hook2 +pipeline_space = pipeline_space +my_bayesian = BayesianOptimization + + +@pytest.mark.neps_api +@pytest.mark.parametrize("func_args, yaml_args, expected_output", [ + ( + { # only essential arguments provided by func_args, no yaml + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "run_args": Default(None), + "overwrite_working_directory": Default(False), + "post_run_summary": Default(True), + "development_stage_id": Default(None), + "task_id": Default(None), + "max_evaluations_total": 10, + "max_evaluations_per_run": Default(None), + "continue_until_max_evaluation_completed": Default(False), + "max_cost_total": Default(None), + "ignore_errors": Default(False), + "loss_value_on_error": Default(None), + "cost_value_on_error": Default(None), + "pre_load_hooks": Default(None), + "searcher": Default("default"), + "searcher_kwargs": {}, + } + , + Default(None), + { + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "overwrite_working_directory": False, + "post_run_summary": True, + "development_stage_id": None, + "task_id": None, + "max_evaluations_total": 10, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": "default", + "searcher_kwargs": {} + } + ), + ({ # only required elements of run_args + "run_pipeline": Default(None), + "root_directory": Default(None), + "pipeline_space": Default(None), + "run_args": Default(None), + "overwrite_working_directory": Default(False), + "post_run_summary": Default(True), + "development_stage_id": Default(None), + "task_id": Default(None), + "max_evaluations_total": Default(None), + "max_evaluations_per_run": Default(None), + "continue_until_max_evaluation_completed": Default(False), + "max_cost_total": Default(None), + "ignore_errors": Default(False), + "loss_value_on_error": Default(None), + "cost_value_on_error": Default(None), + "pre_load_hooks": Default(None), + "searcher": Default("default"), + "searcher_kwargs": {}, + }, + "/run_args_required.yaml", + { + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "overwrite_working_directory": False, + "post_run_summary": True, + "development_stage_id": None, + "task_id": None, + "max_evaluations_total": 10, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": "default", + "searcher_kwargs": {} + }), + ({ # required via func_args, optional via yaml + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "run_args": "tests/path/to/run_args", # will be ignored by Settings + "overwrite_working_directory": Default(False), + "post_run_summary": Default(True), + "development_stage_id": Default(None), + "task_id": Default(None), + "max_evaluations_total": 10, + "max_evaluations_per_run": Default(None), + "continue_until_max_evaluation_completed": Default(False), + "max_cost_total": Default(None), + "ignore_errors": Default(False), + "loss_value_on_error": Default(None), + "cost_value_on_error": Default(None), + "pre_load_hooks": Default(None), + "searcher": Default("default"), + "searcher_kwargs": {}, + }, + "/run_args_optional.yaml", + { + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "overwrite_working_directory": True, + "post_run_summary": False, + "development_stage_id": None, + "task_id": None, + "max_evaluations_total": 10, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": "hyperband", + "searcher_kwargs": {} + }), + ({ # overwrite all yaml values + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "run_args": "test", + "overwrite_working_directory": False, + "post_run_summary": True, + "development_stage_id": 5, + "task_id": None, + "max_evaluations_total": 17, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": "default", + "searcher_kwargs": {}, + } + , + "/overwrite_run_args.yaml", + { + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "overwrite_working_directory": False, + "post_run_summary": True, + "development_stage_id": 5, + "task_id": None, + "max_evaluations_total": 17, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": "default", + "searcher_kwargs": {}, + } + ), + ({ # optimizer args special case + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "run_args": "test", + "overwrite_working_directory": False, + "post_run_summary": True, + "development_stage_id": 5, + "task_id": None, + "max_evaluations_total": 17, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": Default("default"), + "searcher_kwargs": {"initial_design_type": "max_budget", + "use_priors": False, + "random_interleave_prob": 0.0, + "sample_default_first": False, + "sample_default_at_target": False}, + } + , + "/run_args_optimizer_settings.yaml", + { + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "overwrite_working_directory": False, + "post_run_summary": True, + "development_stage_id": 5, + "task_id": None, + "max_evaluations_total": 17, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": { + "strategy": "hyperband", + "eta": 3, + "initial_design_type": "max_budget", + "use_priors": False, + "random_interleave_prob": 0.0, + "sample_default_first": False, + "sample_default_at_target": False}, + "searcher_kwargs": {"initial_design_type": "max_budget", + "use_priors": False, + "random_interleave_prob": 0.0, + "sample_default_first": False, + "sample_default_at_target": False}, + }), +({ # load optimizer with args + "run_pipeline": Default(None), + "root_directory": Default(None), + "pipeline_space": Default(None), + "run_args": Default(None), + "overwrite_working_directory": Default(False), + "post_run_summary": Default(True), + "development_stage_id": Default(None), + "task_id": Default(None), + "max_evaluations_total": Default(None), + "max_evaluations_per_run": Default(None), + "continue_until_max_evaluation_completed": Default(False), + "max_cost_total": Default(None), + "ignore_errors": Default(False), + "loss_value_on_error": Default(None), + "cost_value_on_error": Default(None), + "pre_load_hooks": Default(None), + "searcher": Default("default"), + "searcher_kwargs": {"random_interleave_prob": 0.2, + "initial_design_size": 9}, + } + , + "/run_args_optimizer_outside.yaml", + { + "run_pipeline": run_pipeline, + "root_directory": "path/to/root_directory", + "pipeline_space": pipeline_space, + "overwrite_working_directory": True, + "post_run_summary": True, + "development_stage_id": None, + "task_id": None, + "max_evaluations_total": 10, + "max_evaluations_per_run": None, + "continue_until_max_evaluation_completed": False, + "max_cost_total": None, + "ignore_errors": False, + "loss_value_on_error": None, + "cost_value_on_error": None, + "pre_load_hooks": None, + "searcher": my_bayesian, + "searcher_kwargs": {"acquisition": "EI", + "acquisition_sampler": "random", + "random_interleave_prob": 0.2, + "initial_design_size": 9, + "surrogate_model": "gp" + }, + }) +]) +def test_check_settings(func_args: Dict, yaml_args: str, expected_output: Dict) -> None: + """ + Check if expected settings are set + """ + if not isinstance(yaml_args, Default): + yaml_args = BASE_PATH + yaml_args + settings = Settings(func_args, yaml_args) + print(settings) + for key, value in expected_output.items(): + assert getattr(settings, key) == value + + +@pytest.mark.neps_api +@pytest.mark.parametrize("func_args, yaml_args, error", [ + ( + { + "root_directory": Default(None), + "pipeline_space": Default(None), + "run_args": Default(None), + "overwrite_working_directory": Default(False), + "post_run_summary": Default(True), + "development_stage_id": Default(None), + "task_id": Default(None), + "max_evaluations_total": Default(None), + "max_evaluations_per_run": Default(None), + "continue_until_max_evaluation_completed": Default(False), + "max_cost_total": Default(None), + "ignore_errors": Default(False), + "loss_value_on_error": Default(None), + "cost_value_on_error": Default(None), + "pre_load_hooks": Default(None), + "searcher": Default("default"), + "searcher_kwargs": {}, + }, + Default(None), + ValueError + ) +]) +def test_settings_initialization_error(func_args: Dict, yaml_args: Union[str, Default], + error: Exception) -> None: + """ + Test if Settings raises Error when essential arguments are missing + """ + with pytest.raises(error): + Settings(func_args, yaml_args) diff --git a/tests/test_yaml_run_args/test_yaml_run_args.py b/tests/test_yaml_run_args/test_yaml_run_args.py index 3ce2185f..5a0c5d22 100644 --- a/tests/test_yaml_run_args/test_yaml_run_args.py +++ b/tests/test_yaml_run_args/test_yaml_run_args.py @@ -179,7 +179,7 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]], "cost_value_on_error": 2.1, "ignore_errors": False, "searcher": BayesianOptimization, - "custom_class_searcher_kwargs": {'initial_design_size': 5, + "searcher_kwargs": {'initial_design_size': 5, 'surrogate_model': 'gp'}, "pre_load_hooks": [hook1]