Skip to content

Commit

Permalink
Migrate finalized keys for response configs
Browse files Browse the repository at this point in the history
  • Loading branch information
yngve-sk committed Nov 28, 2024
1 parent 53eea27 commit a26e843
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 7 deletions.
3 changes: 2 additions & 1 deletion src/ert/storage/local_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ def _migrate(self, version: int) -> None:
to6,
to7,
to8,
to9,
)

try:
Expand Down Expand Up @@ -516,7 +517,7 @@ def _migrate(self, version: int) -> None:

elif version < _LOCAL_STORAGE_VERSION:
migrations = list(
enumerate([to2, to3, to4, to5, to6, to7, to8], start=1)
enumerate([to2, to3, to4, to5, to6, to7, to8, to9], start=1)
)
for from_version, migration in migrations[version - 1 :]:
print(f"* Updating storage to version: {from_version+1}")
Expand Down
65 changes: 65 additions & 0 deletions src/ert/storage/migration/to9.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
import os
from pathlib import Path

import polars

info = "Migrate finalized response keys into configs"


def _migrate_response_configs_wrt_finalized_keys(path: Path) -> None:
for experiment in path.glob("experiments/*"):
ensembles = path.glob("ensembles/*")

experiment_id = None
with open(experiment / "index.json", encoding="utf-8") as f:
exp_index = json.load(f)
experiment_id = exp_index["id"]

responses_config = None
with open(experiment / "responses.json", mode="r", encoding="utf-8") as f:
responses_config = json.load(f)
for response_type, config in responses_config.items():
if not config.get("has_finalized_keys"):
# Read a sample response and write the keys
for ens in ensembles:
with open(ens / "index.json", encoding="utf-8") as f:
ens_file = json.load(f)
if ens_file["experiment_id"] != experiment_id:
continue

real_dirs = [*ens.glob("realization-*")]

for real_dir in real_dirs:
if (real_dir / f"{response_type}.parquet").exists():
df = polars.read_parquet(
real_dir / f"{response_type}.parquet"
)
response_keys = df["response_key"].unique().to_list()
config["has_finalized_keys"] = True
config["keys"] = sorted(response_keys)
break

if config["has_finalized_keys"]:
break

if "has_finalized_keys" not in config:
# At this point in "storage history",
# only gendata and summary response types
# exist, and only summary starts without finalized keys
config["has_finalized_keys"] = (
config["_ert_kind"] != "SummaryConfig"
)

os.remove(experiment / "responses.json")
with open(experiment / "responses.json", mode="w+", encoding="utf-8") as f:
json.dump(
responses_config,
f,
default=str,
indent=2,
)


def migrate(path: Path) -> None:
_migrate_response_configs_wrt_finalized_keys(path)
17 changes: 13 additions & 4 deletions tests/ert/unit_tests/storage/migration/test_version_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ def test_migrate_responses(setup_case, set_ert_config):
response_info = json.loads(
(experiment._path / "responses.json").read_text(encoding="utf-8")
)
assert (
experiment.response_configuration
== ert_config.ensemble_config.response_configs
)

response_config_exp = experiment.response_configuration
response_config_ens = ert_config.ensemble_config.response_configs

# From storage v9 and onwards the response config is mutated
# when migrating an existing experiment, hence we expect different
# keys for summary config in the experiment.
response_config_ens["summary"].has_finalized_keys = response_config_exp[
"summary"
].has_finalized_keys
response_config_ens["summary"].keys = response_config_exp["summary"].keys

assert response_config_exp == response_config_ens

assert set(response_info) == {
"gen_data",
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{'gen_data': GenDataConfig(name='gen_data', input_files=['gen%d.txt'], keys=['GEN'], has_finalized_keys=True, report_steps_list=[[1]]), 'summary': SummaryConfig(name='summary', input_files=['CASE'], keys=['FOPR', 'RWPR'], has_finalized_keys=False, refcase={})}
{'gen_data': GenDataConfig(name='gen_data', input_files=['gen%d.txt'], keys=['GEN'], has_finalized_keys=True, report_steps_list=[[1]]), 'summary': SummaryConfig(name='summary', input_files=['CASE'], keys=['FOPR'], has_finalized_keys=True, refcase={})}
8 changes: 7 additions & 1 deletion tests/ert/unit_tests/storage/test_storage_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ def test_that_storage_matches(
response_config = experiment.response_configuration
response_config["summary"].refcase = {}

assert all(
"has_finalized_keys" in config
for config in experiment.response_info.values()
)

with open(
experiment._path / experiment._responses_file, "w", encoding="utf-8"
) as f:
Expand Down Expand Up @@ -195,7 +200,8 @@ def test_that_storage_matches(
"gen_data",
)

assert not ensemble.experiment._has_finalized_response_keys("summary")
assert ensemble.experiment._has_finalized_response_keys("summary")
assert ensemble.experiment._has_finalized_response_keys("gen_data")
ensemble.save_response("summary", ensemble.load_responses("summary", (0,)), 0)
assert ensemble.experiment._has_finalized_response_keys("summary")
assert ensemble.experiment.response_type_to_response_keys["summary"] == ["FOPR"]
Expand Down

0 comments on commit a26e843

Please sign in to comment.