From 82b48903d0896db7d84ea2810f567a29d01688c3 Mon Sep 17 00:00:00 2001
From: Daniel <63580393+danrgll@users.noreply.github.com>
Date: Wed, 24 Jul 2024 13:04:41 +0200
Subject: [PATCH] refactor: Introduce Settings Class and Default() Arguments
 (#116)

---
 docs/reference/declarative_usage.md           |   9 +-
 neps/api.py                                   | 166 ++++-----
 neps/utils/run_args.py                        | 178 ++++++++--
 tests/test_settings/overwrite_run_args.yaml   |  43 +++
 .../run_args_optimizer_outside.yaml           |  20 ++
 .../run_args_optimizer_settings.yaml          |  50 +++
 tests/test_settings/run_args_optional.yaml    |  14 +
 tests/test_settings/run_args_required.yaml    |   8 +
 tests/test_settings/test_settings.py          | 333 ++++++++++++++++++
 .../test_yaml_run_args/test_yaml_run_args.py  |   2 +-
 10 files changed, 687 insertions(+), 136 deletions(-)
 create mode 100644 tests/test_settings/overwrite_run_args.yaml
 create mode 100644 tests/test_settings/run_args_optimizer_outside.yaml
 create mode 100644 tests/test_settings/run_args_optimizer_settings.yaml
 create mode 100644 tests/test_settings/run_args_optional.yaml
 create mode 100644 tests/test_settings/run_args_required.yaml
 create mode 100644 tests/test_settings/test_settings.py

diff --git a/docs/reference/declarative_usage.md b/docs/reference/declarative_usage.md
index 6bad0610..3b20d5df 100644
--- a/docs/reference/declarative_usage.md
+++ b/docs/reference/declarative_usage.md
@@ -4,9 +4,12 @@
 Configure your experiments using a YAML file, which serves as a central reference for setting up your project.
 This approach simplifies sharing, reproducing and modifying configurations.
 
-!!! note
-    You can partially define arguments in the YAML file and partially provide the arguments directly to `neps.run`.
-    However, double referencing is not allowed. You cannot define the same argument in both places.
+!!! note "Argument Handling and Prioritization"
+    You can partially define and provide arguments via `run_args` (YAML file) and partially provide the arguments
+    directly to `neps.run`. Arguments directly provided to `neps.run` get prioritized over those defined in the YAML file. An exception to this
+    is for `searcher_kwargs` where a merge happens between the configurations. In this case, the directly provided arguments
+    are still prioritized, but the values from both the directly provided arguments and the YAML file are merged.
+
 
 #### Simple YAML Example
 Below is a straightforward YAML configuration example for NePS covering the required arguments.
diff --git a/neps/api.py b/neps/api.py
index cf92de88..edb09bfa 100644
--- a/neps/api.py
+++ b/neps/api.py
@@ -9,9 +9,7 @@
 from typing import Callable, Iterable, Literal
 
 import ConfigSpace as CS
-from neps.utils.run_args import check_essential_arguments, \
-    get_run_args_from_yaml, \
-    check_double_reference
+from neps.utils.run_args import Settings, Default
 
 from neps.utils.common import instance_from_map
 from neps.runtime import launch_runtime
@@ -26,30 +24,29 @@
 from neps.utils.common import get_searcher_data, get_value
 from neps.optimizers.info import SearcherConfigs
 
-
 def run(
-    run_pipeline: Callable | None = None,
-    root_directory: str | Path | None = None,
+    run_pipeline: Callable | None = Default(None),
+    root_directory: str | Path | None = Default(None),
     pipeline_space: (
         dict[str, Parameter | CS.ConfigurationSpace]
         | str
         | Path
         | CS.ConfigurationSpace
         | None
-    ) = None,
-    run_args: str | Path | None = None,
-    overwrite_working_directory: bool = False,
-    post_run_summary: bool = True,
-    development_stage_id=None,
-    task_id=None,
-    max_evaluations_total: int | None = None,
-    max_evaluations_per_run: int | None = None,
-    continue_until_max_evaluation_completed: bool = False,
-    max_cost_total: int | float | None = None,
-    ignore_errors: bool = False,
-    loss_value_on_error: None | float = None,
-    cost_value_on_error: None | float = None,
-    pre_load_hooks: Iterable | None = None,
+    ) = Default(None),
+    run_args: str | Path | None = Default(None),
+    overwrite_working_directory: bool = Default(False),
+    post_run_summary: bool = Default(True),
+    development_stage_id=Default(None),
+    task_id=Default(None),
+    max_evaluations_total: int | None = Default(None),
+    max_evaluations_per_run: int | None = Default(None),
+    continue_until_max_evaluation_completed: bool = Default(False),
+    max_cost_total: int | float | None = Default(None),
+    ignore_errors: bool = Default(False),
+    loss_value_on_error: None | float = Default(None),
+    cost_value_on_error: None | float = Default(None),
+    pre_load_hooks: Iterable | None = Default(None),
     searcher: (
         Literal[
             "default",
@@ -62,7 +59,7 @@ def run(
             "regularized_evolution",
         ]
         | BaseOptimizer | Path
-    ) = "default",
+    ) = Default("default"),
     **searcher_kwargs,
 ) -> None:
     """Run a neural pipeline search.
@@ -146,54 +143,17 @@ def run(
         )
         max_cost_total = searcher_kwargs["budget"]
         del searcher_kwargs["budget"]
+    settings = Settings(locals(), run_args)
+    # TODO: check_essentials,
+
     logger = logging.getLogger("neps")
 
-    if run_args:
-        optim_settings = get_run_args_from_yaml(run_args)
-        check_double_reference(run, locals(), optim_settings)
-        run_pipeline = optim_settings.get("run_pipeline", run_pipeline)
-        root_directory = optim_settings.get("root_directory", root_directory)
-        pipeline_space = optim_settings.get("pipeline_space", pipeline_space)
-        overwrite_working_directory = optim_settings.get(
-            "overwrite_working_directory", overwrite_working_directory
-        )
-        post_run_summary = optim_settings.get("post_run_summary", post_run_summary)
-        development_stage_id = optim_settings.get("development_stage_id",
-                                                  development_stage_id)
-        task_id = optim_settings.get("task_id", task_id)
-        max_evaluations_total = optim_settings.get("max_evaluations_total",
-                                                   max_evaluations_total)
-        max_evaluations_per_run = optim_settings.get("max_evaluations_per_run",
-                                                     max_evaluations_per_run)
-        continue_until_max_evaluation_completed = optim_settings.get(
-            "continue_until_max_evaluation_completed",
-            continue_until_max_evaluation_completed)
-        max_cost_total = optim_settings.get("max_cost_total", max_cost_total)
-        ignore_errors = optim_settings.get("ignore_errors", ignore_errors)
-        loss_value_on_error = optim_settings.get("loss_value_on_error",
-                                                 loss_value_on_error)
-        cost_value_on_error = optim_settings.get("cost_value_on_error",
-                                                 cost_value_on_error)
-        pre_load_hooks = optim_settings.get("pre_load_hooks", pre_load_hooks)
-        searcher = optim_settings.get("searcher", searcher)
-        # considers arguments of a provided SubClass of BaseOptimizer
-        searcher_class_arguments = optim_settings.get("custom_class_searcher_kwargs", {})
-
-    # check if necessary arguments are provided.
-    check_essential_arguments(
-        run_pipeline,
-        root_directory,
-        pipeline_space,
-        max_cost_total,
-        max_evaluations_total,
-        searcher,
-        run_args,
-    )
+    # DO NOT use any neps arguments directly; instead, access them via the Settings class.
 
-    if pre_load_hooks is None:
-        pre_load_hooks = []
+    if settings.pre_load_hooks is None:
+        settings.pre_load_hooks = []
 
-    logger.info(f"Starting neps.run using root directory {root_directory}")
+    logger.info(f"Starting neps.run using root directory {settings.root_directory}")
 
     # Used to create the yaml holding information about the searcher.
     # Also important for testing and debugging the api.
@@ -206,24 +166,26 @@ def run(
     }
 
     # special case if you load your own optimizer via run_args
-    if inspect.isclass(searcher):
-        if issubclass(searcher, BaseOptimizer):
-            search_space = SearchSpace(**pipeline_space)
+    if inspect.isclass(settings.searcher):
+        if issubclass(settings.searcher, BaseOptimizer):
+            search_space = SearchSpace(**settings.pipeline_space)
             # aligns with the behavior of the internal neps searcher which also overwrites
             # its arguments by using searcher_kwargs
-            merge_kwargs = {**searcher_class_arguments, **searcher_kwargs}
-            searcher_info["searcher_args"] = merge_kwargs
-            searcher = searcher(search_space, **merge_kwargs)
+            # TODO habe hier searcher kwargs gedroppt, sprich das merging muss davor statt
+            # finden
+            searcher_info["searcher_args"] = settings.searcher_kwargs
+            settings.searcher = settings.searcher(search_space,
+                                                  **settings.searcher_kwargs)
         else:
             # Raise an error if searcher is not a subclass of BaseOptimizer
             raise TypeError(
                 "The provided searcher must be a class that inherits from BaseOptimizer."
             )
 
-    if isinstance(searcher, BaseOptimizer):
-        searcher_instance = searcher
+    if isinstance(settings.searcher, BaseOptimizer):
+        searcher_instance = settings.searcher
         searcher_info["searcher_name"] = "baseoptimizer"
-        searcher_info["searcher_alg"] = searcher.whoami()
+        searcher_info["searcher_alg"] = settings.searcher.whoami()
         searcher_info["searcher_selection"] = "user-instantiation"
         searcher_info["neps_decision_tree"] = False
     else:
@@ -232,52 +194,56 @@ def run(
             searcher_info,
         ) = _run_args(
             searcher_info=searcher_info,
-            pipeline_space=pipeline_space,
-            max_cost_total=max_cost_total,
-            ignore_errors=ignore_errors,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
+            pipeline_space=settings.pipeline_space,
+            max_cost_total=settings.max_cost_total,
+            ignore_errors=settings.ignore_errors,
+            loss_value_on_error=settings.loss_value_on_error,
+            cost_value_on_error=settings.cost_value_on_error,
             logger=logger,
-            searcher=searcher,
-            **searcher_kwargs,
+            searcher=settings.searcher,
+            **settings.searcher_kwargs,
         )
 
     # Check to verify if the target directory contains history of another optimizer state
     # This check is performed only when the `searcher` is built during the run
-    if not isinstance(searcher, (BaseOptimizer, str, dict, Path)):
+    if not isinstance(settings.searcher, (BaseOptimizer, str, dict, Path)):
         raise ValueError(
-            f"Unrecognized `searcher` of type {type(searcher)}. Not str or BaseOptimizer."
+            f"Unrecognized `searcher` of type {type(settings.searcher)}. Not str or "
+            f"BaseOptimizer."
         )
-    elif isinstance(searcher, BaseOptimizer):
+    elif isinstance(settings.searcher, BaseOptimizer):
         # This check is not strict when a user-defined neps.optimizer is provided
         logger.warning(
             "An instantiated optimizer is provided. The safety checks of NePS will be "
             "skipped. Accurate continuation of runs can no longer be guaranteed!"
         )
 
-    if task_id is not None:
-        root_directory = Path(root_directory) / f"task_{task_id}"
-    if development_stage_id is not None:
-        root_directory = Path(root_directory) / f"dev_{development_stage_id}"
+    if settings.task_id is not None:
+        settings.root_directory = Path(settings.root_directory) / (f"task_"
+                                                          f"{settings.task_id}")
+    if settings.development_stage_id is not None:
+        settings.root_directory = (Path(settings.root_directory) /
+                          f"dev_{settings.development_stage_id}")
 
     launch_runtime(
-        evaluation_fn=run_pipeline,
+        evaluation_fn=settings.run_pipeline,
         sampler=searcher_instance,
         optimizer_info=searcher_info,
-        optimization_dir=root_directory,
-        max_evaluations_total=max_evaluations_total,
-        max_evaluations_per_run=max_evaluations_per_run,
-        continue_until_max_evaluation_completed=continue_until_max_evaluation_completed,
+        optimization_dir=settings.root_directory,
+        max_evaluations_total=settings.max_evaluations_total,
+        max_evaluations_per_run=settings.max_evaluations_per_run,
+        continue_until_max_evaluation_completed
+        =settings.continue_until_max_evaluation_completed,
         logger=logger,
-        loss_value_on_error=loss_value_on_error,
-        ignore_errors=ignore_errors,
-        overwrite_optimization_dir=overwrite_working_directory,
-        pre_load_hooks=pre_load_hooks,
+        loss_value_on_error=settings.loss_value_on_error,
+        ignore_errors=settings.ignore_errors,
+        overwrite_optimization_dir=settings.overwrite_working_directory,
+        pre_load_hooks=settings.pre_load_hooks,
     )
 
-    if post_run_summary:
-        assert root_directory is not None
-        post_run_csv(root_directory)
+    if settings.post_run_summary:
+        assert settings.root_directory is not None
+        post_run_csv(settings.root_directory)
 
 
 def _run_args(
diff --git a/neps/utils/run_args.py b/neps/utils/run_args.py
index 71870c98..9d7f6445 100644
--- a/neps/utils/run_args.py
+++ b/neps/utils/run_args.py
@@ -5,11 +5,10 @@
 from __future__ import annotations
 
 import importlib.util
-import inspect
 import logging
 import sys
 from pathlib import Path
-from typing import Callable
+from typing import Any, Callable
 
 import yaml
 
@@ -37,7 +36,7 @@
 PRE_LOAD_HOOKS = "pre_load_hooks"
 # searcher_kwargs is used differently in yaml and just play a role for considering
 # arguments of a custom searcher class (BaseOptimizer)
-SEARCHER_KWARGS = "custom_class_searcher_kwargs"
+SEARCHER_KWARGS = "searcher_kwargs"
 MAX_EVALUATIONS_PER_RUN = "max_evaluations_per_run"
 
 
@@ -452,8 +451,7 @@ def check_essential_arguments(
     pipeline_space: dict | None,
     max_cost_total: int | None,
     max_evaluation_total: int | None,
-    searcher: BaseOptimizer | None,
-    run_args: str | None,
+    searcher: BaseOptimizer | dict | str | None,
 ) -> None:
     """Validates essential NePS configuration arguments.
 
@@ -469,7 +467,6 @@ def check_essential_arguments(
         max_cost_total: Max allowed total cost for experiments.
         max_evaluation_total: Max allowed evaluations.
         searcher: Optimizer for the configuration space.
-        run_args: A YAML file containing the configuration settings.
 
     Raises:
         ValueError: Missing or invalid essential arguments.
@@ -478,7 +475,7 @@ def check_essential_arguments(
         raise ValueError("'run_pipeline' is required but was not provided.")
     if not root_directory:
         raise ValueError("'root_directory' is required but was not provided.")
-    if not pipeline_space and (run_args or not isinstance(searcher, BaseOptimizer)):
+    if not pipeline_space and not isinstance(searcher, BaseOptimizer):
         # handling special case for searcher instance, in which user doesn't have to
         # provide the search_space because it's the argument of the searcher.
         raise ValueError("'pipeline_space' is required but was not provided.")
@@ -490,31 +487,148 @@ def check_essential_arguments(
         )
 
 
-def check_double_reference(
-    func: Callable, func_arguments: dict, yaml_arguments: dict
-) -> None:
-    """Checks if no argument is defined both via function arguments and YAML.
+# Handle Settings
 
-    Args:
-        func (Callable): The function to check arguments against.
-        func_arguments (Dict): A dictionary containing the provided arguments to the
-        function and their values.
-        yaml_arguments (Dict): A dictionary containing the arguments provided via a YAML
-        file.
 
-    Raises:
-        ValueError: If any provided argument is defined both via function arguments and
-        the YAML file.
+class Sentinel:
+    """Introduce a sentinel object as default value for checking variable assignment."""
+
+    def __repr__(self) -> str:
+        return "<Sentinel>"
+
+
+UNSET = Sentinel()
+
+
+class Settings:
+    """Centralizes and manages configuration settings from various sources of NePS
+    arguments (run_args (yaml) and neps func_args).
     """
-    sig = inspect.signature(func)
-
-    for name, param in sig.parameters.items():
-        if param.default != func_arguments[name]:
-            if name == RUN_ARGS:
-                # Ignoring run_args argument
-                continue
-            if name in yaml_arguments:
-                raise ValueError(
-                    f"Conflict for argument '{name}': Argument is defined both via "
-                    f"function arguments and YAML, which is not allowed."
-                )
+
+    def __init__(self, func_args: dict, yaml_args: str | Default | None = None):
+        """Initializes the Settings object by merging function arguments with YAML
+        configuration settings and assigning them to class attributes. It checks for
+        necessary configurations and handles default values where specified.
+
+        Args:
+        func_args (dict): The function arguments directly passed to NePS.
+        yaml_args (dict | None): Optional. YAML file arguments provided via run_args.
+        """
+        self.run_pipeline = UNSET
+        self.root_directory = UNSET
+        self.pipeline_space = UNSET
+        self.overwrite_working_directory = UNSET
+        self.post_run_summary = UNSET
+        self.development_stage_id = UNSET
+        self.task_id = UNSET
+        self.max_evaluations_total = UNSET
+        self.max_evaluations_per_run = UNSET
+        self.continue_until_max_evaluation_completed = UNSET
+        self.max_cost_total = UNSET
+        self.ignore_errors = UNSET
+        self.loss_value_on_error = UNSET
+        self.cost_value_on_error = UNSET
+        self.pre_load_hooks = UNSET
+        self.searcher = UNSET
+        self.searcher_kwargs = UNSET
+
+        if not isinstance(yaml_args, Default) and yaml_args is not None:
+            yaml_settings = get_run_args_from_yaml(yaml_args)
+            dict_settings = self.merge(func_args, yaml_settings)
+        else:
+            dict_settings = {}
+            for key, value in func_args.items():
+                if isinstance(value, Default):
+                    dict_settings[key] = value.value
+                else:
+                    dict_settings[key] = value
+
+        # drop run_args, not needed as a setting attribute
+        del dict_settings[RUN_ARGS]
+        self.assign(dict_settings)
+        self.check()
+
+    def merge(self, func_args: dict, yaml_args: dict) -> dict:
+        """Merge func_args and yaml_args. func_args gets priority over yaml_args."""
+        # Initialize with YAML settings
+        merged_settings = yaml_args.copy()
+
+        # overwrite or merge keys
+        for key, value in func_args.items():
+            # Handle searcher_kwargs for BaseOptimizer case
+            if key == SEARCHER_KWARGS:
+                merged_settings[SEARCHER_KWARGS] = {
+                    **yaml_args.pop(SEARCHER_KWARGS, {}),
+                    **func_args[SEARCHER_KWARGS],
+                }
+            elif not isinstance(value, Default):
+                merged_settings[key] = value
+            elif key not in yaml_args:
+                # If the key is not in yaml_args, set it from Default
+                merged_settings[key] = value.value
+        return merged_settings
+
+    def assign(self, dict_settings: dict) -> None:
+        """Updates existing attributes with values from `dict_settings`.
+        Raises AttributeError if any attribute in `dict_settings` does not exist.
+        """
+        for key, value in dict_settings.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                raise AttributeError(f"'Settings' object has no attribute '{key}'")
+
+    def check_unassigned_attributes(self) -> list:
+        """Check for UNSET and Default class."""
+        return [
+            key
+            for key, value in self.__dict__.items()
+            if value is UNSET or isinstance(value, Default)
+        ]
+
+    def check(self) -> None:
+        """Check if all values are assigned and if the essentials are provided
+        correctly.
+        """
+        unassigned_attributes = self.check_unassigned_attributes()
+        if unassigned_attributes:
+            raise ValueError(
+                f"Unassigned or default-initialized attributes detected: "
+                f"{', '.join(unassigned_attributes)}"
+            )
+        check_essential_arguments(
+            self.run_pipeline,  # type: ignore
+            self.root_directory,  # type: ignore
+            self.pipeline_space,  # type: ignore
+            self.max_cost_total,  # type: ignore
+            self.max_evaluations_total,  # type: ignore
+            self.searcher,  # type: ignore
+        )
+
+
+class Default:
+    """A class to enable default detection.
+
+    Attributes:
+        value: The value to be stored as the default.
+
+    Methods:
+        __init__(self, value): Initializes the Default object with a value.
+        __repr__(self): Returns a string representation of the Default object.
+    """
+
+    def __init__(self, value: Any):
+        """Initialize the Default object with the specified value.
+
+        Args:
+            value: The value to store as default. Can be any data type.
+        """
+        self.value = value
+
+    def __repr__(self) -> str:
+        """Return the string representation of the Default object.
+
+        Returns:
+            A string that represents the Default object in the format <default: value>.
+        """
+        return f"<default: {self.value}>"
diff --git a/tests/test_settings/overwrite_run_args.yaml b/tests/test_settings/overwrite_run_args.yaml
new file mode 100644
index 00000000..fb69b401
--- /dev/null
+++ b/tests/test_settings/overwrite_run_args.yaml
@@ -0,0 +1,43 @@
+# Full Configuration Template for NePS
+run_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+  name: run_pipeline_constant
+
+pipeline_space:
+  learning_rate:
+    lower: 1e-5
+    upper: 1e-1
+    log: True  # Log scale for learning rate
+  epochs:
+    lower: 5
+    upper: 20
+    is_fidelity: True
+  optimizer:
+    choices: [adam, sgd, adamw]
+  batch_size: 64
+
+root_directory: "tests_tmpdir/test_declarative_usage_docs/full_config"
+max_evaluations_total: 20             # Budget
+max_cost_total:
+
+# Debug and Monitoring
+overwrite_working_directory: True
+post_run_summary: False
+development_stage_id: 1
+task_id: 3
+
+# Parallelization Setup
+max_evaluations_per_run: 6
+continue_until_max_evaluation_completed: False
+
+# Error Handling
+loss_value_on_error: 1.0
+cost_value_on_error: 1.0
+ignore_errors: True
+
+# Customization Options
+searcher: hyperband       # Internal key to select a NePS optimizer.
+
+# Hooks
+pre_load_hooks:
+  hook1: "tests/test_settings/test_settings.py"
diff --git a/tests/test_settings/run_args_optimizer_outside.yaml b/tests/test_settings/run_args_optimizer_outside.yaml
new file mode 100644
index 00000000..1dbfce01
--- /dev/null
+++ b/tests/test_settings/run_args_optimizer_outside.yaml
@@ -0,0 +1,20 @@
+run_pipeline:
+  name: run_pipeline
+  path: "tests/test_settings/test_settings.py"
+pipeline_space:
+  name: pipeline_space
+  path: "tests/test_settings/test_settings.py"
+
+root_directory: "path/to/root_directory"
+max_evaluations_total: 10            # Budget
+searcher:
+  path: "tests/test_settings/test_settings.py"
+  name: my_bayesian
+  # Specific arguments depending on the searcher
+  initial_design_size: 7
+  surrogate_model: gp
+  acquisition: EI
+  acquisition_sampler: random
+  random_interleave_prob: 0.1
+
+overwrite_working_directory: True
diff --git a/tests/test_settings/run_args_optimizer_settings.yaml b/tests/test_settings/run_args_optimizer_settings.yaml
new file mode 100644
index 00000000..00e34d18
--- /dev/null
+++ b/tests/test_settings/run_args_optimizer_settings.yaml
@@ -0,0 +1,50 @@
+# Full Configuration Template for NePS
+run_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+  name: run_pipeline_constant
+
+pipeline_space:
+  learning_rate:
+    lower: 1e-5
+    upper: 1e-1
+    log: True  # Log scale for learning rate
+  epochs:
+    lower: 5
+    upper: 20
+    is_fidelity: True
+  optimizer:
+    choices: [adam, sgd, adamw]
+  batch_size: 64
+
+root_directory: "tests_tmpdir/test_declarative_usage_docs/full_config"
+max_evaluations_total: 20             # Budget
+max_cost_total:
+
+# Debug and Monitoring
+overwrite_working_directory: True
+post_run_summary: False
+development_stage_id: 1
+task_id: 3
+
+# Parallelization Setup
+max_evaluations_per_run: 6
+continue_until_max_evaluation_completed: False
+
+# Error Handling
+loss_value_on_error: 1.0
+cost_value_on_error: 1.0
+ignore_errors: True
+
+# Customization Options
+searcher:
+  strategy: "hyperband"  # Internal key to select a NePS optimizer.
+  eta: 3
+  initial_design_type: max_budget
+  use_priors: false
+  random_interleave_prob: 0.0
+  sample_default_first: false
+  sample_default_at_target: false
+
+# Hooks
+pre_load_hooks:
+  hook1: "tests/test_settings/test_settings.py"
diff --git a/tests/test_settings/run_args_optional.yaml b/tests/test_settings/run_args_optional.yaml
new file mode 100644
index 00000000..5e46ff99
--- /dev/null
+++ b/tests/test_settings/run_args_optional.yaml
@@ -0,0 +1,14 @@
+max_cost_total:
+overwrite_working_directory: True
+post_run_summary: False
+development_stage_id:
+task_id:
+max_evaluations_per_run:
+continue_until_max_evaluation_completed: False
+max_evaluations_total: 11 # get ignored
+root_directory: "get/ignored"
+loss_value_on_error:
+cost_value_on_error:
+ignore_errors:
+searcher: hyperband
+pre_load_hooks:
diff --git a/tests/test_settings/run_args_required.yaml b/tests/test_settings/run_args_required.yaml
new file mode 100644
index 00000000..d3c6afdd
--- /dev/null
+++ b/tests/test_settings/run_args_required.yaml
@@ -0,0 +1,8 @@
+run_pipeline:
+  name: run_pipeline
+  path: "tests/test_settings/test_settings.py"
+pipeline_space:
+  name: pipeline_space
+  path: "tests/test_settings/test_settings.py"
+max_evaluations_total: 10
+root_directory: "path/to/root_directory"
diff --git a/tests/test_settings/test_settings.py b/tests/test_settings/test_settings.py
new file mode 100644
index 00000000..fcdac758
--- /dev/null
+++ b/tests/test_settings/test_settings.py
@@ -0,0 +1,333 @@
+from neps.utils.run_args import Settings, Default
+import pytest
+import neps
+from neps.utils.run_args import get_run_args_from_yaml
+from tests.test_yaml_run_args.test_yaml_run_args import (run_pipeline, hook1, hook2,
+                                                         pipeline_space)
+from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
+from typing import Union, Callable, Dict, List, Type
+
+BASE_PATH = "tests/test_settings"
+run_pipeline = run_pipeline
+hook1 = hook1
+hook2 = hook2
+pipeline_space = pipeline_space
+my_bayesian = BayesianOptimization
+
+
+@pytest.mark.neps_api
+@pytest.mark.parametrize("func_args, yaml_args, expected_output", [
+    (
+        {  # only essential arguments provided by func_args, no yaml
+            "run_pipeline": run_pipeline,
+            "root_directory": "path/to/root_directory",
+            "pipeline_space": pipeline_space,
+            "run_args": Default(None),
+            "overwrite_working_directory": Default(False),
+            "post_run_summary": Default(True),
+            "development_stage_id": Default(None),
+            "task_id": Default(None),
+            "max_evaluations_total": 10,
+            "max_evaluations_per_run": Default(None),
+            "continue_until_max_evaluation_completed": Default(False),
+            "max_cost_total": Default(None),
+            "ignore_errors": Default(False),
+            "loss_value_on_error": Default(None),
+            "cost_value_on_error": Default(None),
+            "pre_load_hooks": Default(None),
+            "searcher": Default("default"),
+            "searcher_kwargs": {},
+        }
+        ,
+        Default(None),
+        {
+            "run_pipeline": run_pipeline,
+            "root_directory": "path/to/root_directory",
+            "pipeline_space": pipeline_space,
+            "overwrite_working_directory": False,
+            "post_run_summary": True,
+            "development_stage_id": None,
+            "task_id": None,
+            "max_evaluations_total": 10,
+            "max_evaluations_per_run": None,
+            "continue_until_max_evaluation_completed": False,
+            "max_cost_total": None,
+            "ignore_errors": False,
+            "loss_value_on_error": None,
+            "cost_value_on_error": None,
+            "pre_load_hooks": None,
+            "searcher": "default",
+            "searcher_kwargs": {}
+        }
+    ),
+    ({  # only required elements of run_args
+        "run_pipeline": Default(None),
+        "root_directory": Default(None),
+        "pipeline_space": Default(None),
+        "run_args": Default(None),
+        "overwrite_working_directory": Default(False),
+        "post_run_summary": Default(True),
+        "development_stage_id": Default(None),
+        "task_id": Default(None),
+        "max_evaluations_total": Default(None),
+        "max_evaluations_per_run": Default(None),
+        "continue_until_max_evaluation_completed": Default(False),
+        "max_cost_total": Default(None),
+        "ignore_errors": Default(False),
+        "loss_value_on_error": Default(None),
+        "cost_value_on_error": Default(None),
+        "pre_load_hooks": Default(None),
+        "searcher": Default("default"),
+        "searcher_kwargs": {},
+    },
+    "/run_args_required.yaml",
+    {
+        "run_pipeline": run_pipeline,
+        "root_directory": "path/to/root_directory",
+        "pipeline_space": pipeline_space,
+        "overwrite_working_directory": False,
+        "post_run_summary": True,
+        "development_stage_id": None,
+        "task_id": None,
+        "max_evaluations_total": 10,
+        "max_evaluations_per_run": None,
+        "continue_until_max_evaluation_completed": False,
+        "max_cost_total": None,
+        "ignore_errors": False,
+        "loss_value_on_error": None,
+        "cost_value_on_error": None,
+        "pre_load_hooks": None,
+        "searcher": "default",
+        "searcher_kwargs": {}
+    }),
+    ({  # required via func_args, optional via yaml
+        "run_pipeline": run_pipeline,
+        "root_directory": "path/to/root_directory",
+        "pipeline_space": pipeline_space,
+        "run_args": "tests/path/to/run_args",  # will be ignored by Settings
+        "overwrite_working_directory": Default(False),
+        "post_run_summary": Default(True),
+        "development_stage_id": Default(None),
+        "task_id": Default(None),
+        "max_evaluations_total": 10,
+        "max_evaluations_per_run": Default(None),
+        "continue_until_max_evaluation_completed": Default(False),
+        "max_cost_total": Default(None),
+        "ignore_errors": Default(False),
+        "loss_value_on_error": Default(None),
+        "cost_value_on_error": Default(None),
+        "pre_load_hooks": Default(None),
+        "searcher": Default("default"),
+        "searcher_kwargs": {},
+    },
+    "/run_args_optional.yaml",
+    {
+        "run_pipeline": run_pipeline,
+        "root_directory": "path/to/root_directory",
+        "pipeline_space": pipeline_space,
+        "overwrite_working_directory": True,
+        "post_run_summary": False,
+        "development_stage_id": None,
+        "task_id": None,
+        "max_evaluations_total": 10,
+        "max_evaluations_per_run": None,
+        "continue_until_max_evaluation_completed": False,
+        "max_cost_total": None,
+        "ignore_errors": False,
+        "loss_value_on_error": None,
+        "cost_value_on_error": None,
+        "pre_load_hooks": None,
+        "searcher": "hyperband",
+        "searcher_kwargs": {}
+    }),
+    ({  # overwrite all yaml values
+            "run_pipeline": run_pipeline,
+            "root_directory": "path/to/root_directory",
+            "pipeline_space": pipeline_space,
+            "run_args": "test",
+            "overwrite_working_directory": False,
+            "post_run_summary": True,
+            "development_stage_id": 5,
+            "task_id": None,
+            "max_evaluations_total": 17,
+            "max_evaluations_per_run": None,
+            "continue_until_max_evaluation_completed": False,
+            "max_cost_total": None,
+            "ignore_errors": False,
+            "loss_value_on_error": None,
+            "cost_value_on_error": None,
+            "pre_load_hooks": None,
+            "searcher": "default",
+            "searcher_kwargs": {},
+        }
+        ,
+        "/overwrite_run_args.yaml",
+     {
+         "run_pipeline": run_pipeline,
+         "root_directory": "path/to/root_directory",
+         "pipeline_space": pipeline_space,
+         "overwrite_working_directory": False,
+         "post_run_summary": True,
+         "development_stage_id": 5,
+         "task_id": None,
+         "max_evaluations_total": 17,
+         "max_evaluations_per_run": None,
+         "continue_until_max_evaluation_completed": False,
+         "max_cost_total": None,
+         "ignore_errors": False,
+         "loss_value_on_error": None,
+         "cost_value_on_error": None,
+         "pre_load_hooks": None,
+         "searcher": "default",
+         "searcher_kwargs": {},
+     }
+    ),
+    ({  # optimizer args special case
+            "run_pipeline": run_pipeline,
+            "root_directory": "path/to/root_directory",
+            "pipeline_space": pipeline_space,
+            "run_args": "test",
+            "overwrite_working_directory": False,
+            "post_run_summary": True,
+            "development_stage_id": 5,
+            "task_id": None,
+            "max_evaluations_total": 17,
+            "max_evaluations_per_run": None,
+            "continue_until_max_evaluation_completed": False,
+            "max_cost_total": None,
+            "ignore_errors": False,
+            "loss_value_on_error": None,
+            "cost_value_on_error": None,
+            "pre_load_hooks": None,
+            "searcher": Default("default"),
+            "searcher_kwargs": {"initial_design_type": "max_budget",
+            "use_priors": False,
+            "random_interleave_prob": 0.0,
+            "sample_default_first": False,
+            "sample_default_at_target": False},
+        }
+        ,
+        "/run_args_optimizer_settings.yaml",
+     {
+         "run_pipeline": run_pipeline,
+         "root_directory": "path/to/root_directory",
+         "pipeline_space": pipeline_space,
+         "overwrite_working_directory": False,
+         "post_run_summary": True,
+         "development_stage_id": 5,
+         "task_id": None,
+         "max_evaluations_total": 17,
+         "max_evaluations_per_run": None,
+         "continue_until_max_evaluation_completed": False,
+         "max_cost_total": None,
+         "ignore_errors": False,
+         "loss_value_on_error": None,
+         "cost_value_on_error": None,
+         "pre_load_hooks": None,
+         "searcher": {
+            "strategy": "hyperband",
+            "eta": 3,
+            "initial_design_type": "max_budget",
+            "use_priors": False,
+            "random_interleave_prob": 0.0,
+            "sample_default_first": False,
+            "sample_default_at_target": False},
+         "searcher_kwargs": {"initial_design_type": "max_budget",
+            "use_priors": False,
+            "random_interleave_prob": 0.0,
+            "sample_default_first": False,
+            "sample_default_at_target": False},
+     }),
+({  # load optimizer with args
+        "run_pipeline": Default(None),
+        "root_directory": Default(None),
+        "pipeline_space": Default(None),
+        "run_args": Default(None),
+        "overwrite_working_directory": Default(False),
+        "post_run_summary": Default(True),
+        "development_stage_id": Default(None),
+        "task_id": Default(None),
+        "max_evaluations_total": Default(None),
+        "max_evaluations_per_run": Default(None),
+        "continue_until_max_evaluation_completed": Default(False),
+        "max_cost_total": Default(None),
+        "ignore_errors": Default(False),
+        "loss_value_on_error": Default(None),
+        "cost_value_on_error": Default(None),
+        "pre_load_hooks": Default(None),
+        "searcher": Default("default"),
+        "searcher_kwargs": {"random_interleave_prob": 0.2,
+                            "initial_design_size": 9},
+        }
+        ,
+        "/run_args_optimizer_outside.yaml",
+     {
+         "run_pipeline": run_pipeline,
+         "root_directory": "path/to/root_directory",
+         "pipeline_space": pipeline_space,
+         "overwrite_working_directory": True,
+         "post_run_summary": True,
+         "development_stage_id": None,
+         "task_id": None,
+         "max_evaluations_total": 10,
+         "max_evaluations_per_run": None,
+         "continue_until_max_evaluation_completed": False,
+         "max_cost_total": None,
+         "ignore_errors": False,
+         "loss_value_on_error": None,
+         "cost_value_on_error": None,
+         "pre_load_hooks": None,
+         "searcher": my_bayesian,
+         "searcher_kwargs": {"acquisition": "EI",
+                             "acquisition_sampler": "random",
+                             "random_interleave_prob": 0.2,
+                             "initial_design_size": 9,
+                             "surrogate_model": "gp"
+                             },
+     })
+])
+def test_check_settings(func_args: Dict, yaml_args: str, expected_output: Dict) -> None:
+    """
+    Check if expected settings are set
+    """
+    if not isinstance(yaml_args, Default):
+        yaml_args = BASE_PATH + yaml_args
+    settings = Settings(func_args, yaml_args)
+    print(settings)
+    for key, value in expected_output.items():
+        assert getattr(settings, key) == value
+
+
+@pytest.mark.neps_api
+@pytest.mark.parametrize("func_args, yaml_args, error", [
+    (
+        {
+            "root_directory": Default(None),
+            "pipeline_space": Default(None),
+            "run_args": Default(None),
+            "overwrite_working_directory": Default(False),
+            "post_run_summary": Default(True),
+            "development_stage_id": Default(None),
+            "task_id": Default(None),
+            "max_evaluations_total": Default(None),
+            "max_evaluations_per_run": Default(None),
+            "continue_until_max_evaluation_completed": Default(False),
+            "max_cost_total": Default(None),
+            "ignore_errors": Default(False),
+            "loss_value_on_error": Default(None),
+            "cost_value_on_error": Default(None),
+            "pre_load_hooks": Default(None),
+            "searcher": Default("default"),
+            "searcher_kwargs": {},
+        },
+        Default(None),
+        ValueError
+    )
+])
+def test_settings_initialization_error(func_args: Dict, yaml_args: Union[str, Default],
+                                       error: Exception) -> None:
+    """
+    Test if Settings raises Error when essential arguments are missing
+    """
+    with pytest.raises(error):
+        Settings(func_args, yaml_args)
diff --git a/tests/test_yaml_run_args/test_yaml_run_args.py b/tests/test_yaml_run_args/test_yaml_run_args.py
index 3ce2185f..5a0c5d22 100644
--- a/tests/test_yaml_run_args/test_yaml_run_args.py
+++ b/tests/test_yaml_run_args/test_yaml_run_args.py
@@ -179,7 +179,7 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]],
             "cost_value_on_error": 2.1,
             "ignore_errors": False,
             "searcher": BayesianOptimization,
-            "custom_class_searcher_kwargs": {'initial_design_size': 5,
+            "searcher_kwargs": {'initial_design_size': 5,
                                              'surrogate_model': 'gp'},
             "pre_load_hooks": [hook1]