automl · LukasFehring · Dec 2, 2024 · Dec 11, 2024 · Dec 13, 2024 · Dec 19, 2024
diff --git a/docs/advanced_usage/8_logging.md b/docs/advanced_usage/8_logging.md
@@ -96,24 +96,23 @@ The runhistory.json in split into four parts. `stats`, `data`, `configs`, and `c
   },
 ```
 
-`data` contains a list of entries, one for each configuration.
+`data` contains a list of entries, one for each configuration where the keys are the one-based `config_id`.
 ```json
-  "data": [
-    [
-      1,                            # config_id
-      null,                         # instance or None
-      209652396,                    # seed or None
-      null,                         # budget or None
-      5.4345623938566385,           # cost
-      6.699562072753906e-05,        # time
-      6.299999999992423e-05,        # cpu_time
-      1,                            # status
-      1733133181.2144582,           # start_time
-      1733133181.21695,             # end_time
-      {}                            # additional_info
-    ],
+  "data": {
+    "1": {
+      "instance": null,
+      "seed": 398764591,
+      "budget": null,
+      "cost": 16916.0,
+      "time": 4.0531158447265625e-06,
+      "cpu_time": 3.000000006636583e-06,
+      "status": 1,
+      "starttime": 1733155597.639732,
+      "endtime": 1733155597.64017,
+      "additional_info": {}
+    },
     ...
-  ]
+  }
 ```
 
 `configs` is a human-readable dictionary of configurations, where the keys are the one-based `config_id`. It is important to note that in `runhistory.json`, the indexing is zero-based.

diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py
@@ -768,25 +768,22 @@ def save(self, filename: str | Path = "runhistory.json") -> None:
         ----------
         filename : str | Path, defaults to "runhistory.json"
         """
-        data = []
+        data = dict()
         for k, v in self._data.items():
-            data += [
-                (
-                    int(k.config_id),
-                    str(k.instance) if k.instance is not None else None,
-                    int(k.seed) if k.seed is not None else None,
-                    float(k.budget) if k.budget is not None else None,
-                    v.cost,
-                    v.time,
-                    v.cpu_time,
-                    v.status,
-                    v.starttime,
-                    v.endtime,
-                    v.additional_info,
-                )
-            ]
-
-        config_ids_to_serialize = set([entry[0] for entry in data])
+            data[k.config_id] = {
+                "instance": k.instance if k.instance is not None else None,
+                "seed": k.seed if k.seed is not None else None,
+                "budget": k.budget if k.budget is not None else None,
+                "cost": v.cost,
+                "time": v.time,
+                "cpu_time": v.cpu_time,
+                "status": v.status,
+                "starttime": v.starttime,
+                "endtime": v.endtime,
+                "additional_info": v.additional_info
+            }
+
+        config_ids_to_serialize = set(data.keys())
         configs = {}
         config_origins = {}
         for id_, config in self._ids_config.items():
@@ -857,32 +854,30 @@ def load(self, filename: str | Path, configspace: ConfigurationSpace) -> None:
 
         # Important to use add method to use all data structure correctly
         # NOTE: These hardcoded indices can easily lead to trouble
-        for entry in data["data"]:
-            # Set n_objectives first
+        for key, value in data["data"].items():
             if self._n_objectives == -1:
-                if isinstance(entry[4], (float, int)):
+                if isinstance(value["cost"], (float, int)):
                     self._n_objectives = 1
                 else:
-                    self._n_objectives = len(entry[4])
+                    self._n_objectives = len(value["cost"])
 
             cost: list[float] | float
             if self._n_objectives == 1:
-                cost = float(entry[4])
+                cost = float(value["cost"])
             else:
-                cost = [float(x) for x in entry[4]]
-
+                cost = [float(x) for x in value["cost"]]
             self.add(
-                config=self._ids_config[int(entry[0])],
+                config=self._ids_config[int(key)],
                 cost=cost,
-                time=float(entry[5]),
-                cpu_time=float(entry[6]),
-                status=StatusType(entry[7]),
-                instance=entry[1],
-                seed=entry[2],
-                budget=entry[3],
-                starttime=entry[8],
-                endtime=entry[9],
-                additional_info=entry[10],
+                time=value["time"],
+                cpu_time=value["cpu_time"],
+                status=StatusType(value["status"]),
+                instance=value["instance"],
+                seed=value["seed"],
+                budget=value["budget"],
+                starttime=value["starttime"],
+                endtime=value["endtime"],
+                additional_info=value["additional_info"],
             )
 
         # Although adding trials should give us the same stats, the trajectory might be different