From 712c4e95da7c0602af36c22321821f849f92ba7a Mon Sep 17 00:00:00 2001 From: Lukas Fehring Date: Mon, 2 Dec 2024 17:05:40 +0100 Subject: [PATCH 1/5] First version to update load and save --- smac/runhistory/runhistory.py | 66 ++++++++++++++++------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index ab6d8f564..c29637793 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -768,25 +768,23 @@ def save(self, filename: str | Path = "runhistory.json") -> None: ---------- filename : str | Path, defaults to "runhistory.json" """ - data = [] + data = dict() for k, v in self._data.items(): - data += [ - ( - int(k.config_id), - str(k.instance) if k.instance is not None else None, - int(k.seed) if k.seed is not None else None, - float(k.budget) if k.budget is not None else None, - v.cost, - v.time, - v.cpu_time, - v.status, - v.starttime, - v.endtime, - v.additional_info, - ) - ] - - config_ids_to_serialize = set([entry[0] for entry in data]) + data[k.config_id] = { + "config_id": k.config_id, + "instance": k.instance if k.instance is not None else None, + "seed": k.seed if k.seed is not None else None, + "budget": k.budget if k.budget is not None else None, + "cost": v.cost, + "time": v.time, + "cpu_time": v.cpu_time, + "status": v.status, + "starttime": v.starttime, + "endtime": v.endtime, + "additional_info": v.additional_info + } + + config_ids_to_serialize = set(data.keys()) configs = {} config_origins = {} for id_, config in self._ids_config.items(): @@ -857,32 +855,30 @@ def load(self, filename: str | Path, configspace: ConfigurationSpace) -> None: # Important to use add method to use all data structure correctly # NOTE: These hardcoded indices can easily lead to trouble - for entry in data["data"]: - # Set n_objectives first + for key, value in data["data"].items(): if self._n_objectives == -1: - if isinstance(entry[4], (float, int)): + if isinstance(value["cost"], (float, int)): self._n_objectives = 1 else: - self._n_objectives = len(entry[4]) + self._n_objectives = len(value["cost"]) cost: list[float] | float if self._n_objectives == 1: - cost = float(entry[4]) + cost = float(value["cost"]) else: - cost = [float(x) for x in entry[4]] - + cost = [float(x) for x in value["cost"]] self.add( - config=self._ids_config[int(entry[0])], + config=self._ids_config[int(key)], # TODO probably -1 cost=cost, - time=float(entry[5]), - cpu_time=float(entry[6]), - status=StatusType(entry[7]), - instance=entry[1], - seed=entry[2], - budget=entry[3], - starttime=entry[8], - endtime=entry[9], - additional_info=entry[10], + time=value["time"], + cpu_time=value["cpu_time"], + status=StatusType(value["status"]), + instance=value["instance"], + seed=value["seed"], + budget=value["budget"], + starttime=value["starttime"], + endtime=value["endtime"], + additional_info=value["additional_info"], ) # Although adding trials should give us the same stats, the trajectory might be different From 788e45d8bfd735a5fd72ddeced539b5f5585a576 Mon Sep 17 00:00:00 2001 From: Lukas Fehring Date: Wed, 11 Dec 2024 15:28:33 +0100 Subject: [PATCH 2/5] Update logging docs --- docs/advanced_usage/8_logging.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/advanced_usage/8_logging.md b/docs/advanced_usage/8_logging.md index 18e8d7496..39cb2fb6a 100644 --- a/docs/advanced_usage/8_logging.md +++ b/docs/advanced_usage/8_logging.md @@ -96,24 +96,24 @@ The runhistory.json in split into four parts. `stats`, `data`, `configs`, and `c }, ``` -`data` contains a list of entries, one for each configuration. +`data` contains a list of entries, one for each configuration where the keys are the one-based `config_id`. ```json - "data": [ - [ - 1, # config_id - null, # instance or None - 209652396, # seed or None - null, # budget or None - 5.4345623938566385, # cost - 6.699562072753906e-05, # time - 6.299999999992423e-05, # cpu_time - 1, # status - 1733133181.2144582, # start_time - 1733133181.21695, # end_time - {} # additional_info - ], + "data": { + "1": { + "config_id": 1, + "instance": null, + "seed": 398764591, + "budget": null, + "cost": 16916.0, + "time": 4.0531158447265625e-06, + "cpu_time": 3.000000006636583e-06, + "status": 1, + "starttime": 1733155597.639732, + "endtime": 1733155597.64017, + "additional_info": {} + }, ... - ] + } ``` `configs` is a human-readable dictionary of configurations, where the keys are the one-based `config_id`. It is important to note that in `runhistory.json`, the indexing is zero-based. From 5dfe5e080ad0ffedfeaf898d891faf9d0f62b393 Mon Sep 17 00:00:00 2001 From: Lukas Fehring Date: Fri, 13 Dec 2024 14:00:20 +0100 Subject: [PATCH 3/5] Remove config_id duplication --- docs/advanced_usage/8_logging.md | 1 - smac/runhistory/runhistory.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/advanced_usage/8_logging.md b/docs/advanced_usage/8_logging.md index 39cb2fb6a..b28fc7a0a 100644 --- a/docs/advanced_usage/8_logging.md +++ b/docs/advanced_usage/8_logging.md @@ -100,7 +100,6 @@ The runhistory.json in split into four parts. `stats`, `data`, `configs`, and `c ```json "data": { "1": { - "config_id": 1, "instance": null, "seed": 398764591, "budget": null, diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index c29637793..cebd9544b 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -771,7 +771,6 @@ def save(self, filename: str | Path = "runhistory.json") -> None: data = dict() for k, v in self._data.items(): data[k.config_id] = { - "config_id": k.config_id, "instance": k.instance if k.instance is not None else None, "seed": k.seed if k.seed is not None else None, "budget": k.budget if k.budget is not None else None, @@ -868,7 +867,7 @@ def load(self, filename: str | Path, configspace: ConfigurationSpace) -> None: else: cost = [float(x) for x in value["cost"]] self.add( - config=self._ids_config[int(key)], # TODO probably -1 + config=self._ids_config[int(key)], cost=cost, time=value["time"], cpu_time=value["cpu_time"], From 31a1a14752148c7931f65f8e351530bd6446211e Mon Sep 17 00:00:00 2001 From: Lukas Fehring Date: Thu, 19 Dec 2024 16:00:42 +0100 Subject: [PATCH 4/5] Update RunHistory --- docs/1_installation.md | 2 +- examples/2_multi_fidelity/1_mlp_epochs.py | 2 +- smac/runhistory/runhistory.py | 47 ++++++++++++----------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/docs/1_installation.md b/docs/1_installation.md index d886d4b49..06dc5fe21 100644 --- a/docs/1_installation.md +++ b/docs/1_installation.md @@ -1,5 +1,5 @@ # Installation - +TODO UPDATE THIS TO INCLUDE MIN_TRIALS. DO NOT ACCEPT A PUSH WITH THIS ## Requirements SMAC is written in python3 and therefore requires an environment with python>=3.8. diff --git a/examples/2_multi_fidelity/1_mlp_epochs.py b/examples/2_multi_fidelity/1_mlp_epochs.py index 3ea7f30f4..2981c5fbd 100644 --- a/examples/2_multi_fidelity/1_mlp_epochs.py +++ b/examples/2_multi_fidelity/1_mlp_epochs.py @@ -140,7 +140,7 @@ def plot_trajectory(facades: list[AbstractFacade]) -> None: mlp = MLP() facades: list[AbstractFacade] = [] - for intensifier_object in [SuccessiveHalving, Hyperband]: + for intensifier_object in [ Hyperband]: # Define our environment variables scenario = Scenario( mlp.configspace, diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index cebd9544b..75307256b 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -768,22 +768,23 @@ def save(self, filename: str | Path = "runhistory.json") -> None: ---------- filename : str | Path, defaults to "runhistory.json" """ - data = dict() + data = list() for k, v in self._data.items(): - data[k.config_id] = { - "instance": k.instance if k.instance is not None else None, - "seed": k.seed if k.seed is not None else None, - "budget": k.budget if k.budget is not None else None, + data.append({ + "config_id": int(k.config_id), + "instance": str(k.instance) if k.instance is not None else None, + "seed": int(k.seed) if k.seed is not None else None, + "budget": float(k.budget) if k.budget is not None else None, "cost": v.cost, "time": v.time, "cpu_time": v.cpu_time, "status": v.status, "starttime": v.starttime, "endtime": v.endtime, - "additional_info": v.additional_info - } + "additional_info": v.additional_info, + }) - config_ids_to_serialize = set(data.keys()) + config_ids_to_serialize = set([entry["config_id"] for entry in data]) configs = {} config_origins = {} for id_, config in self._ids_config.items(): @@ -854,30 +855,30 @@ def load(self, filename: str | Path, configspace: ConfigurationSpace) -> None: # Important to use add method to use all data structure correctly # NOTE: These hardcoded indices can easily lead to trouble - for key, value in data["data"].items(): + for entry in data["data"]: if self._n_objectives == -1: - if isinstance(value["cost"], (float, int)): + if isinstance(entry["cost"], (float, int)): self._n_objectives = 1 else: - self._n_objectives = len(value["cost"]) + self._n_objectives = len(entry["cost"]) cost: list[float] | float if self._n_objectives == 1: - cost = float(value["cost"]) + cost = float(entry["cost"]) else: - cost = [float(x) for x in value["cost"]] + cost = [float(x) for x in entry["cost"]] self.add( - config=self._ids_config[int(key)], + config=self._ids_config[int(entry["config_id"])], cost=cost, - time=value["time"], - cpu_time=value["cpu_time"], - status=StatusType(value["status"]), - instance=value["instance"], - seed=value["seed"], - budget=value["budget"], - starttime=value["starttime"], - endtime=value["endtime"], - additional_info=value["additional_info"], + time=entry["time"], + cpu_time=entry["cpu_time"], + status=StatusType(entry["status"]), + instance=entry["instance"], + seed=entry["seed"], + budget=entry["budget"], + starttime=entry["starttime"], + endtime=entry["endtime"], + additional_info=entry["additional_info"], ) # Although adding trials should give us the same stats, the trajectory might be different From 4f77643e70e5180c67944a9c99e41fac86b1dabb Mon Sep 17 00:00:00 2001 From: Lukas Fehring Date: Thu, 19 Dec 2024 16:05:01 +0100 Subject: [PATCH 5/5] Update docstyle --- smac/runhistory/runhistory.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index 75307256b..4aeb10b41 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -770,19 +770,21 @@ def save(self, filename: str | Path = "runhistory.json") -> None: """ data = list() for k, v in self._data.items(): - data.append({ - "config_id": int(k.config_id), - "instance": str(k.instance) if k.instance is not None else None, - "seed": int(k.seed) if k.seed is not None else None, - "budget": float(k.budget) if k.budget is not None else None, - "cost": v.cost, - "time": v.time, - "cpu_time": v.cpu_time, - "status": v.status, - "starttime": v.starttime, - "endtime": v.endtime, - "additional_info": v.additional_info, - }) + data.append( + { + "config_id": int(k.config_id), + "instance": str(k.instance) if k.instance is not None else None, + "seed": int(k.seed) if k.seed is not None else None, + "budget": float(k.budget) if k.budget is not None else None, + "cost": v.cost, + "time": v.time, + "cpu_time": v.cpu_time, + "status": v.status, + "starttime": v.starttime, + "endtime": v.endtime, + "additional_info": v.additional_info, + } + ) config_ids_to_serialize = set([entry["config_id"] for entry in data]) configs = {}