From 98379ee96180c9c9b28ab2c7bd9199fbc1aafb97 Mon Sep 17 00:00:00 2001 From: "Yngve S. Kristiansen" Date: Wed, 27 Nov 2024 09:09:28 +0100 Subject: [PATCH] Store each response key as a column for responses --- src/ert/analysis/_es_update.py | 11 ++++- src/ert/config/gen_data_config.py | 7 +++ src/ert/config/summary_config.py | 3 ++ src/ert/dark_storage/common.py | 20 ++++----- src/ert/data/_measured_data.py | 11 ++++- src/ert/simulator/batch_simulator_context.py | 2 +- src/ert/storage/local_ensemble.py | 26 +++++++---- src/ert/storage/local_storage.py | 5 ++- src/ert/storage/migration/to8.py | 5 ++- src/ert/storage/migration/to9.py | 43 +++++++++++++++++++ tests/ert/performance_tests/test_analysis.py | 2 +- .../test_dark_storage_performance.py | 4 +- .../performance_tests/test_memory_usage.py | 21 ++++++--- .../ert/unit_tests/analysis/test_es_update.py | 8 ++-- .../unit_tests/dark_storage/test_common.py | 2 +- .../gui/tools/plot/test_plot_api.py | 4 +- .../scenarios/test_summary_response.py | 7 +-- .../test_that_storage_matches/gen_data | 42 +++++++++--------- .../test_that_storage_matches/summary_data | 42 +++++++++--------- .../unit_tests/storage/test_local_storage.py | 27 ++++++------ .../storage/test_storage_migration.py | 8 ++-- .../ert/unit_tests/test_load_forward_model.py | 16 +++---- tests/ert/unit_tests/test_summary_response.py | 1 - tests/everest/test_api_snapshots.py | 2 +- 24 files changed, 200 insertions(+), 119 deletions(-) create mode 100644 src/ert/storage/migration/to9.py diff --git a/src/ert/analysis/_es_update.py b/src/ert/analysis/_es_update.py index 00a08421f8e..be7f66af4d1 100644 --- a/src/ert/analysis/_es_update.py +++ b/src/ert/analysis/_es_update.py @@ -165,10 +165,19 @@ def _get_observations_and_responses( response_type, realizations=tuple(iens_active_index) ) + # Make realizations into columns, + # and add response_key column + unpivoted = responses_for_type.unpivot( + on=response_cls.keys, + variable_name="response_key", + value_name="values", + index=["realization", *response_cls.primary_key], + ) + # Note that if there are duplicate entries for one # response at one index, they are aggregated together # with "mean" by default - pivoted = responses_for_type.pivot( + pivoted = unpivoted.pivot( on="realization", index=["response_key", *response_cls.primary_key], aggregate_function="mean", diff --git a/src/ert/config/gen_data_config.py b/src/ert/config/gen_data_config.py index 9cb25d8e8be..c21f4016b12 100644 --- a/src/ert/config/gen_data_config.py +++ b/src/ert/config/gen_data_config.py @@ -187,6 +187,13 @@ def _read_file(filename: Path, report_step: int) -> polars.DataFrame: ) combined = polars.concat(datasets_per_name) + + if combined.is_empty(): + raise InvalidResponseFile( + f"No data found within response files: {', '.join(self.input_files)}" + ) + + combined = combined.pivot(on="response_key", index=self.primary_key) return combined def get_args_for_key(self, key: str) -> Tuple[Optional[str], Optional[List[int]]]: diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index 0fc934618c8..0c8d3a84416 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -59,6 +59,9 @@ def read_from_file(self, run_path: str, iens: int) -> polars.DataFrame: } ) df = df.explode("values", "time") + df = df.pivot( + on="response_key", index=self.primary_key, aggregate_function="mean" + ) return df @property diff --git a/src/ert/dark_storage/common.py b/src/ert/dark_storage/common.py index a1c10ef4fd6..11ccd5cc338 100644 --- a/src/ert/dark_storage/common.py +++ b/src/ert/dark_storage/common.py @@ -130,17 +130,15 @@ def data_for_key( if summary_data.is_empty(): return pd.DataFrame() - df = ( - summary_data.rename({"time": "Date", "realization": "Realization"}) - .drop("response_key") + data = ( + summary_data.pivot( + on="time", values=response_key, aggregate_function="mean" + ) + .rename({"realization": "Realization"}) .to_pandas() ) - df = df.set_index(["Date", "Realization"]) - # This performs the same aggragation by mean of duplicate values - # as in ert/analysis/_es_update.py - df = df.groupby(["Date", "Realization"]).mean() - data = df.unstack(level="Date") - data.columns = data.columns.droplevel(0) + data.set_index("Realization", inplace=True) + data.columns = data.columns.astype("datetime64[ms]") try: return data.astype(float) except ValueError: @@ -162,8 +160,8 @@ def data_for_key( try: vals = data.filter(polars.col("report_step").eq(report_step)) - pivoted = vals.drop("response_key", "report_step").pivot( - on="index", values="values" + pivoted = vals.drop(["report_step"]).pivot( + on=["index"], values=response_key, aggregate_function="mean" ) data = pivoted.to_pandas().set_index("realization") data.columns = data.columns.astype(int) diff --git a/src/ert/data/_measured_data.py b/src/ert/data/_measured_data.py index de619af2ca3..4f9e03f3177 100644 --- a/src/ert/data/_measured_data.py +++ b/src/ert/data/_measured_data.py @@ -120,10 +120,19 @@ def _get_data( f"No response loaded for observation type: {response_type}" ) + # Make realizations into columns, + # and add response_key column + unpivoted = responses_for_type.unpivot( + on=response_cls.keys, + variable_name="response_key", + value_name="values", + index=["realization", *response_cls.primary_key], + ) + # Note that if there are duplicate entries for one # response at one index, they are aggregated together # with "mean" by default - pivoted = responses_for_type.pivot( + pivoted = unpivoted.pivot( on="realization", index=["response_key", *response_cls.primary_key], aggregate_function="mean", diff --git a/src/ert/simulator/batch_simulator_context.py b/src/ert/simulator/batch_simulator_context.py index 41126c60561..7df3f5e3291 100644 --- a/src/ert/simulator/batch_simulator_context.py +++ b/src/ert/simulator/batch_simulator_context.py @@ -280,7 +280,7 @@ def results(self) -> List[Optional[Dict[str, "npt.NDArray[np.float64]"]]]: d = {} for key in self.result_keys: data = self.ensemble.load_responses(key, (sim_id,)) - d[key] = data["values"].to_numpy() + d[key] = data[key].to_numpy() res.append(d) return res diff --git a/src/ert/storage/local_ensemble.py b/src/ert/storage/local_ensemble.py index c0b6df93612..0d4c66b1be8 100644 --- a/src/ert/storage/local_ensemble.py +++ b/src/ert/storage/local_ensemble.py @@ -661,15 +661,21 @@ def load_responses(self, key: str, realizations: Tuple[int]) -> polars.DataFrame response_type = self.experiment.response_key_to_response_type[key] select_key = True + response_config = self.experiment.response_configuration[response_type] + loaded = [] for realization in realizations: input_path = self._realization_dir(realization) / f"{response_type}.parquet" if not input_path.exists(): raise KeyError(f"No response for key {key}, realization: {realization}") - df = polars.read_parquet(input_path) + lazy_df = polars.scan_parquet(input_path) if select_key: - df = df.filter(polars.col("response_key") == key) + df = lazy_df.select( + ["realization", *response_config.primary_key, key] + ).collect() + else: + df = lazy_df.collect() loaded.append(df) @@ -707,9 +713,6 @@ def load_all_summary_data( except (ValueError, KeyError): return pd.DataFrame() - df_pl = df_pl.pivot( - on="response_key", index=["realization", "time"], sort_columns=True - ) df_pl = df_pl.rename({"time": "Date", "realization": "Realization"}) df_pandas = ( @@ -845,11 +848,16 @@ def save_response( data : polars DataFrame polars DataFrame to save. """ + response_config = self.experiment.response_configuration[response_type] - if "values" not in data.columns: + num_response_columns = ( + len(data.columns) + - len(response_config.primary_key) + - (1 if "realization" in data.columns else 0) + ) + if num_response_columns <= 0: raise ValueError( - f"Dataset for response group '{response_type}' " - f"must contain a 'values' variable" + f"Dataset for response type '{response_type}' must contain values for at least one response key" ) if len(data) == 0: @@ -873,7 +881,7 @@ def save_response( ) if not self.experiment._has_finalized_response_keys(response_type): - response_keys = data["response_key"].unique().to_list() + response_keys = data.columns[(len(response_config.primary_key) + 1) :] self.experiment._update_response_keys(response_type, response_keys) def calculate_std_dev_for_parameter(self, parameter_group: str) -> xr.Dataset: diff --git a/src/ert/storage/local_storage.py b/src/ert/storage/local_storage.py index ecaee5f9d6f..c271a0ac15b 100644 --- a/src/ert/storage/local_storage.py +++ b/src/ert/storage/local_storage.py @@ -46,7 +46,7 @@ logger = logging.getLogger(__name__) -_LOCAL_STORAGE_VERSION = 8 +_LOCAL_STORAGE_VERSION = 9 class _Migrations(BaseModel): @@ -472,6 +472,7 @@ def _migrate(self, version: int) -> None: to6, to7, to8, + to9, ) try: @@ -516,7 +517,7 @@ def _migrate(self, version: int) -> None: elif version < _LOCAL_STORAGE_VERSION: migrations = list( - enumerate([to2, to3, to4, to5, to6, to7, to8], start=1) + enumerate([to2, to3, to4, to5, to6, to7, to8, to9], start=1) ) for from_version, migration in migrations[version - 1 :]: print(f"* Updating storage to version: {from_version+1}") diff --git a/src/ert/storage/migration/to8.py b/src/ert/storage/migration/to8.py index 1785863fcda..1b6eb2f8beb 100644 --- a/src/ert/storage/migration/to8.py +++ b/src/ert/storage/migration/to8.py @@ -78,7 +78,10 @@ def _migrate_responses_from_netcdf_to_parquet(path: Path) -> None: ) pandas_df = gen_data_ds.to_dataframe().dropna() - polars_df = polars.from_pandas(pandas_df.reset_index()) + polars_df = polars.from_pandas( + pandas_df.reset_index(), + schema_overrides={"values": polars.Float32}, + ) polars_df = polars_df.rename({"name": "response_key"}) if "time" in polars_df: diff --git a/src/ert/storage/migration/to9.py b/src/ert/storage/migration/to9.py new file mode 100644 index 00000000000..edc119a3fd9 --- /dev/null +++ b/src/ert/storage/migration/to9.py @@ -0,0 +1,43 @@ +import json +import os +from pathlib import Path + +import polars + +info = "Make response datasets have one column per response" + + +def _migrate_responses_to_one_col_per_response(path: Path) -> None: + for experiment in path.glob("experiments/*"): + ensembles = path.glob("ensembles/*") + + experiment_id = None + with open(experiment / "index.json", encoding="utf-8") as f: + exp_index = json.load(f) + experiment_id = exp_index["id"] + + for ens in ensembles: + with open(ens / "index.json", encoding="utf-8") as f: + ens_file = json.load(f) + if ens_file["experiment_id"] != experiment_id: + continue + + real_dirs = [*ens.glob("realization-*")] + + for real_dir in real_dirs: + for df_name, columns in [ + ("gen_data", ["report_step", "index"]), + ("summary", ["time"]), + ]: + if (real_dir / f"{df_name}.parquet").exists(): + df = polars.read_parquet(real_dir / f"{df_name}.parquet") + pivoted = df.pivot( + on="response_key", index=["realization", *columns] + ) + + os.remove(real_dir / f"{df_name}.parquet") + pivoted.write_parquet(real_dir / f"{df_name}.parquet") + + +def migrate(path: Path) -> None: + _migrate_responses_to_one_col_per_response(path) diff --git a/tests/ert/performance_tests/test_analysis.py b/tests/ert/performance_tests/test_analysis.py index 090ce65432e..3688ac4b2d4 100644 --- a/tests/ert/performance_tests/test_analysis.py +++ b/tests/ert/performance_tests/test_analysis.py @@ -139,7 +139,7 @@ def g(X): "index": range(len(Y[:, iens])), "values": Y[:, iens], } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) diff --git a/tests/ert/performance_tests/test_dark_storage_performance.py b/tests/ert/performance_tests/test_dark_storage_performance.py index 4f5bb2e1026..49ffc71ab42 100644 --- a/tests/ert/performance_tests/test_dark_storage_performance.py +++ b/tests/ert/performance_tests/test_dark_storage_performance.py @@ -141,7 +141,7 @@ def test_direct_dark_performance( if ensemble_json.userdata["name"] == "default": ensemble_id_default = ensemble_id - benchmark(function, storage, ensemble_id_default, key, template_config) + function(storage, ensemble_id_default, key, template_config) @pytest.mark.parametrize( @@ -177,4 +177,4 @@ def test_direct_dark_performance_with_storage( if ensemble_json.userdata["name"] == "default": ensemble_id_default = ensemble_id - benchmark(function, storage, ensemble_id_default, key, template_config) + function(storage, ensemble_id_default, key, template_config) diff --git a/tests/ert/performance_tests/test_memory_usage.py b/tests/ert/performance_tests/test_memory_usage.py index 68a04ea8fe9..118267fb84d 100644 --- a/tests/ert/performance_tests/test_memory_usage.py +++ b/tests/ert/performance_tests/test_memory_usage.py @@ -89,16 +89,21 @@ def fill_storage_with_data(poly_template: Path, ert_config: ErtConfig) -> None: gendatas = [] gen_obs = ert_config.observations["gen_data"] for response_key, df in gen_obs.group_by("response_key"): - gendata_df = make_gen_data(df["index"].max() + 1) + gendata_df = make_gen_data(response_key[0], df["index"].max() + 1) gendata_df = gendata_df.insert_column( 0, polars.Series(np.full(len(gendata_df), response_key)).alias( "response_key" ), ) - gendatas.append(gendata_df) + gendatas.append((response_key, gendata_df)) - source.save_response("gen_data", polars.concat(gendatas), real) + gendatas.sort(key=lambda info: info[0]) + + wide_gendatas = polars.concat([df for _, df in gendatas]).pivot( + on="response_key", index=["report_step", "index"] + ) + source.save_response("gen_data", wide_gendatas, real) obs_time_list = ens_config.refcase.all_dates @@ -123,13 +128,15 @@ def fill_storage_with_data(poly_template: Path, ert_config: ErtConfig) -> None: ) -def make_gen_data(obs: int, min_val: float = 0, max_val: float = 5) -> polars.DataFrame: +def make_gen_data( + response_key: str, obs: int, min_val: float = 0, max_val: float = 5 +) -> polars.DataFrame: data = np.random.default_rng().uniform(min_val, max_val, obs) return polars.DataFrame( { "report_step": polars.Series(np.full(len(data), 0), dtype=polars.UInt16), "index": polars.Series(range(len(data)), dtype=polars.UInt16), - "values": data, + "values": polars.Series(data, dtype=polars.Float32), } ) @@ -148,9 +155,9 @@ def make_summary_data( "time": polars.Series( np.tile(dates, len(obs_keys)).tolist() ).dt.cast_time_unit("ms"), - "values": data, + "values": polars.Series(data, dtype=polars.Float32), } - ) + ).pivot(on="response_key", index="time") @pytest.mark.limit_memory("130 MB") diff --git a/tests/ert/unit_tests/analysis/test_es_update.py b/tests/ert/unit_tests/analysis/test_es_update.py index 4767948e9b6..39352ca02ee 100644 --- a/tests/ert/unit_tests/analysis/test_es_update.py +++ b/tests/ert/unit_tests/analysis/test_es_update.py @@ -415,7 +415,7 @@ def test_smoother_snapshot_alpha( "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": data, } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_storage = storage.create_ensemble( @@ -719,7 +719,7 @@ def test_gen_data_obs_data_mismatch(storage, uniform_parameter): "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": polars.Series(data, dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_ens = storage.create_ensemble( @@ -782,7 +782,7 @@ def test_gen_data_missing(storage, uniform_parameter, obs): "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": polars.Series(data, dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_ens = storage.create_ensemble( @@ -876,7 +876,7 @@ def test_update_subset_parameters(storage, uniform_parameter, obs): "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": polars.Series(data, dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_ens = storage.create_ensemble( diff --git a/tests/ert/unit_tests/dark_storage/test_common.py b/tests/ert/unit_tests/dark_storage/test_common.py index d80adeaf819..ce7a99e7c04 100644 --- a/tests/ert/unit_tests/dark_storage/test_common.py +++ b/tests/ert/unit_tests/dark_storage/test_common.py @@ -95,7 +95,7 @@ def test_data_for_key_returns_empty_gen_data_config(tmp_path): "index": polars.Series([0], dtype=polars.UInt16), "values": polars.Series([0.0], dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), 0, ) ensemble.refresh_ensemble_state() diff --git a/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py b/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py index 6b49632e98c..78fdf671a7b 100644 --- a/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py +++ b/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py @@ -250,7 +250,7 @@ def test_plot_api_big_summary_memory_usage( "time": dates_df, "values": values_df, } - ) + ).pivot(on=["response_key"], index="time") experiment = storage.create_experiment( parameters=[], @@ -396,7 +396,7 @@ def test_plot_api_handles_urlescape(api_and_storage): "values": [polars.Series([1.0], dtype=polars.Float32)], } ) - df = df.explode("values", "time") + df = df.explode("values", "time").pivot(on="response_key", index="time") ensemble.save_response( "summary", df, diff --git a/tests/ert/unit_tests/scenarios/test_summary_response.py b/tests/ert/unit_tests/scenarios/test_summary_response.py index 0c93300e9bc..82701ec315d 100644 --- a/tests/ert/unit_tests/scenarios/test_summary_response.py +++ b/tests/ert/unit_tests/scenarios/test_summary_response.py @@ -247,23 +247,20 @@ def test_reading_past_2263_is_ok(ert_config, storage, prior_ensemble): responses = prior_ensemble.load_responses("summary", (0, 1, 2)) assert np.isclose( - [-1.6038368, 0.06409992, 0.7408913], responses["values"].to_numpy() + [-1.6038368, 0.06409992, 0.7408913], responses["FOPR"].to_numpy() ).all() - assert responses[["realization", "response_key", "time"]].to_dicts() == [ + assert responses[["realization", "time"]].to_dicts() == [ { "realization": 0, - "response_key": "FOPR", "time": datetime(2500, 9, 10, 0, 0), }, { "realization": 1, - "response_key": "FOPR", "time": datetime(2500, 9, 10, 0, 0), }, { "realization": 2, - "response_key": "FOPR", "time": datetime(2500, 9, 10, 0, 0), }, ] diff --git a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data index 1b734d95981..b37583a4601 100644 --- a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data +++ b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data @@ -1,21 +1,21 @@ -realization,response_key,report_step,index,values -0,GEN,1,0,0.0 -0,GEN,1,1,0.1 -1,GEN,1,0,0.0 -1,GEN,1,1,0.1 -2,GEN,1,0,0.0 -2,GEN,1,1,0.1 -3,GEN,1,0,0.0 -3,GEN,1,1,0.1 -4,GEN,1,0,0.0 -4,GEN,1,1,0.1 -5,GEN,1,0,0.0 -5,GEN,1,1,0.1 -6,GEN,1,0,0.0 -6,GEN,1,1,0.1 -7,GEN,1,0,0.0 -7,GEN,1,1,0.1 -8,GEN,1,0,0.0 -8,GEN,1,1,0.1 -9,GEN,1,0,0.0 -9,GEN,1,1,0.1 +realization,report_step,index,GEN +0,1,0,0.0 +0,1,1,0.1 +1,1,0,0.0 +1,1,1,0.1 +2,1,0,0.0 +2,1,1,0.1 +3,1,0,0.0 +3,1,1,0.1 +4,1,0,0.0 +4,1,1,0.1 +5,1,0,0.0 +5,1,1,0.1 +6,1,0,0.0 +6,1,1,0.1 +7,1,0,0.0 +7,1,1,0.1 +8,1,0,0.0 +8,1,1,0.1 +9,1,0,0.0 +9,1,1,0.1 diff --git a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data index ddb87262c2b..92c4d6e806a 100644 --- a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data +++ b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data @@ -1,21 +1,21 @@ -time,response_key,realization,values -1996-01-02,FOPR,0,1.1 -1996-01-02,FOPR,1,1.1 -1996-01-02,FOPR,2,1.1 -1996-01-02,FOPR,3,1.1 -1996-01-02,FOPR,4,1.1 -1996-01-02,FOPR,5,1.1 -1996-01-02,FOPR,6,1.1 -1996-01-02,FOPR,7,1.1 -1996-01-02,FOPR,8,1.1 -1996-01-02,FOPR,9,1.1 -1996-01-03,FOPR,0,5.1512652e+16 -1996-01-03,FOPR,1,5.1512652e+16 -1996-01-03,FOPR,2,5.1512652e+16 -1996-01-03,FOPR,3,5.1512652e+16 -1996-01-03,FOPR,4,5.1512652e+16 -1996-01-03,FOPR,5,5.1512652e+16 -1996-01-03,FOPR,6,5.1512652e+16 -1996-01-03,FOPR,7,5.1512652e+16 -1996-01-03,FOPR,8,5.1512652e+16 -1996-01-03,FOPR,9,5.1512652e+16 +time,realization,FOPR +1996-01-02,0,1.1 +1996-01-02,1,1.1 +1996-01-02,2,1.1 +1996-01-02,3,1.1 +1996-01-02,4,1.1 +1996-01-02,5,1.1 +1996-01-02,6,1.1 +1996-01-02,7,1.1 +1996-01-02,8,1.1 +1996-01-02,9,1.1 +1996-01-03,0,5.1512652e+16 +1996-01-03,1,5.1512652e+16 +1996-01-03,2,5.1512652e+16 +1996-01-03,3,5.1512652e+16 +1996-01-03,4,5.1512652e+16 +1996-01-03,5,5.1512652e+16 +1996-01-03,6,5.1512652e+16 +1996-01-03,7,5.1512652e+16 +1996-01-03,8,5.1512652e+16 +1996-01-03,9,5.1512652e+16 diff --git a/tests/ert/unit_tests/storage/test_local_storage.py b/tests/ert/unit_tests/storage/test_local_storage.py index ef782f8f275..6b0059e0570 100644 --- a/tests/ert/unit_tests/storage/test_local_storage.py +++ b/tests/ert/unit_tests/storage/test_local_storage.py @@ -85,7 +85,9 @@ def test_that_loading_non_existing_ensemble_throws(tmp_path): def test_that_saving_empty_responses_fails_nicely(tmp_path): with open_storage(tmp_path, mode="w") as storage: - experiment = storage.create_experiment() + experiment = storage.create_experiment( + responses=[SummaryConfig(keys=["*"]), GenDataConfig(keys=["one", "two"])] + ) ensemble = storage.create_ensemble( experiment, ensemble_size=1, iteration=0, name="prior" ) @@ -93,25 +95,20 @@ def test_that_saving_empty_responses_fails_nicely(tmp_path): # Test for entirely empty dataset with pytest.raises( ValueError, - match="Dataset for response group 'RESPONSE' must contain a 'values' variable", + match="Dataset for response type 'summary' must contain values for at least one response key", ): - ensemble.save_response("RESPONSE", polars.DataFrame(), 0) + ensemble.save_response("summary", polars.DataFrame(), 0) - # Test for dataset with 'values' but no actual data + # Test for dataset with response value columns but no actual data empty_data = polars.DataFrame( - { - "response_key": [], - "report_step": [], - "index": [], - "values": [], - } + {"report_step": [], "index": [], "one": [], "two": []} ) with pytest.raises( ValueError, - match="Responses RESPONSE are empty. Cannot proceed with saving to storage.", + match="Responses gen_data are empty. Cannot proceed with saving to storage.", ): - ensemble.save_response("RESPONSE", empty_data, 0) + ensemble.save_response("gen_data", empty_data, 0) def test_that_saving_response_updates_configs(tmp_path): @@ -133,7 +130,7 @@ def test_that_saving_response_updates_configs(tmp_path): [0.0, 1.0, 2.0, 3.0, 4.0], dtype=polars.Float32 ), } - ) + ).pivot(on="response_key", index="time") mapping_before = experiment.response_key_to_response_type smry_config_before = experiment.response_configuration["summary"] @@ -314,7 +311,7 @@ def test_that_reader_storage_reads_most_recent_response_configs(tmp_path): [0.2, 0.2, 1.0, 1.1, 3.3, 3.3], dtype=polars.Float32 ), } - ) + ).pivot(on="response_key", index="time") ens.save_response("summary", smry_data, 0) assert read_smry_config.keys == ["*", "FOPR"] @@ -902,7 +899,7 @@ def save_summary(self, model_ensemble: Ensemble, data): model_ensemble.response_values[summary.name] = ds model_experiment = self.model[storage_experiment.id] - response_keys = set(ds["response_key"].unique()) + response_keys = ds.columns[2:] model_smry_config = next( config for config in model_experiment.responses if config.name == "summary" diff --git a/tests/ert/unit_tests/storage/test_storage_migration.py b/tests/ert/unit_tests/storage/test_storage_migration.py index 6a627107f5a..afe7b8fbacc 100644 --- a/tests/ert/unit_tests/storage/test_storage_migration.py +++ b/tests/ert/unit_tests/storage/test_storage_migration.py @@ -169,9 +169,9 @@ def test_that_storage_matches( tuple(ensemble.get_realization_list_with_responses("summary")), ) snapshot.assert_match( - summary_data.sort("time", "response_key", "realization") + summary_data.sort("time", "realization") .to_pandas() - .set_index(["time", "response_key", "realization"]) + .set_index(["time", "realization"]) .transform(np.sort) .to_csv(), "summary_data", @@ -188,9 +188,9 @@ def test_that_storage_matches( "gen_data", tuple(range(ensemble.ensemble_size)) ) snapshot.assert_match( - gen_data.sort(["realization", "response_key", "report_step", "index"]) + gen_data.sort(["realization", "report_step", "index"]) .to_pandas() - .set_index(["realization", "response_key", "report_step", "index"]) + .set_index(["realization", "report_step", "index"]) .to_csv(), "gen_data", ) diff --git a/tests/ert/unit_tests/test_load_forward_model.py b/tests/ert/unit_tests/test_load_forward_model.py index ba6a5eaa599..fab43b0d776 100644 --- a/tests/ert/unit_tests/test_load_forward_model.py +++ b/tests/ert/unit_tests/test_load_forward_model.py @@ -178,8 +178,8 @@ def test_load_forward_model_gen_data(setup_case): facade = LibresFacade(config) facade.load_from_forward_model(prior_ensemble, [True]) df = prior_ensemble.load_responses("gen_data", (0,)) - filter_cond = polars.col("report_step").eq(0), polars.col("values").is_not_nan() - assert df.filter(filter_cond)["values"].to_list() == [1.0, 3.0] + filter_cond = polars.col("report_step").eq(0), polars.col("RESPONSE").is_not_nan() + assert df.filter(filter_cond)["RESPONSE"].to_list() == [1.0, 3.0] def test_single_valued_gen_data_with_active_info_is_loaded(setup_case): @@ -200,7 +200,7 @@ def test_single_valued_gen_data_with_active_info_is_loaded(setup_case): facade = LibresFacade(config) facade.load_from_forward_model(prior_ensemble, [True]) df = prior_ensemble.load_responses("RESPONSE", (0,)) - assert df["values"].to_list() == [1.0] + assert df["RESPONSE"].to_list() == [1.0] def test_that_all_deactivated_values_are_loaded(setup_case): @@ -221,7 +221,7 @@ def test_that_all_deactivated_values_are_loaded(setup_case): facade = LibresFacade(config) facade.load_from_forward_model(prior_ensemble, [True]) response = prior_ensemble.load_responses("RESPONSE", (0,)) - assert np.isnan(response[0]["values"].to_list()) + assert np.isnan(response[0]["RESPONSE"].to_list()) assert len(response) == 1 @@ -264,8 +264,8 @@ def test_loading_gen_data_without_restart(storage, run_paths, run_args): facade = LibresFacade.from_config_file("config.ert") facade.load_from_forward_model(prior_ensemble, [True]) df = prior_ensemble.load_responses("RESPONSE", (0,)) - df_no_nans = df.filter(polars.col("values").is_not_nan()) - assert df_no_nans["values"].to_list() == [1.0, 3.0] + df_no_nans = df.filter(polars.col("RESPONSE").is_not_nan()) + assert df_no_nans["RESPONSE"].to_list() == [1.0, 3.0] @pytest.mark.usefixtures("copy_snake_oil_case_storage") @@ -335,5 +335,5 @@ def test_loading_from_any_available_iter(storage, run_paths, run_args, itr): ) facade.load_from_run_path(run_path_format, prior_ensemble, [0]) df = prior_ensemble.load_responses("RESPONSE", (0,)) - df_no_nans = df.filter(polars.col("values").is_not_nan()) - assert df_no_nans["values"].to_list() == [1.0, 3.0] + df_no_nans = df.filter(polars.col("RESPONSE").is_not_nan()) + assert df_no_nans["RESPONSE"].to_list() == [1.0, 3.0] diff --git a/tests/ert/unit_tests/test_summary_response.py b/tests/ert/unit_tests/test_summary_response.py index 606c6713811..46a980f4e16 100644 --- a/tests/ert/unit_tests/test_summary_response.py +++ b/tests/ert/unit_tests/test_summary_response.py @@ -57,7 +57,6 @@ def test_load_summary_response_restart_not_zero( facade.load_from_forward_model(ensemble, [True]) df = ensemble.load_responses("summary", (0,)) - df = df.pivot(on="response_key", values="values") df = df[df.columns[:17]] df = df.rename({"time": "Date", "realization": "Realization"}) diff --git a/tests/everest/test_api_snapshots.py b/tests/everest/test_api_snapshots.py index 4b2d1fcfac2..a389792c992 100644 --- a/tests/everest/test_api_snapshots.py +++ b/tests/everest/test_api_snapshots.py @@ -127,7 +127,7 @@ def test_api_summary_snapshot( [0.2, 0.2, 1.0, 1.1, 3.3, 3.3], dtype=polars.Float32 ), } - ) + ).pivot(on="response_key", index="time", sort_columns=True) for ens in experiment.ensembles: for real in range(ens.ensemble_size): ens.save_response("summary", smry_data.clone(), real)