From dd7925c48926cce52e2e0cadb3994ec11658cf45 Mon Sep 17 00:00:00 2001 From: Peter Verveer Date: Wed, 27 Nov 2024 07:50:50 +0000 Subject: [PATCH] Remove the everest load entry point --- docs/everest/cli.rst | 9 - src/everest/bin/everload_script.py | 197 ---------------- src/everest/bin/main.py | 5 - .../everest/unit/everest/bin/test_everload.py | 219 ------------------ 4 files changed, 430 deletions(-) delete mode 100755 src/everest/bin/everload_script.py delete mode 100644 tests/everest/unit/everest/bin/test_everload.py diff --git a/docs/everest/cli.rst b/docs/everest/cli.rst index c655169f72a..bdd699e1db5 100644 --- a/docs/everest/cli.rst +++ b/docs/everest/cli.rst @@ -124,15 +124,6 @@ The command above is equivalent to having the following export section defined i export: batches: [] -============== -Everest `load` -============== - -.. argparse:: - :module: everest.bin.everload_script - :func: _build_args_parser - :prog: everload_entry - ============== Everest `lint` diff --git a/src/everest/bin/everload_script.py b/src/everest/bin/everload_script.py deleted file mode 100755 index 4488eb48e35..00000000000 --- a/src/everest/bin/everload_script.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env python - -import argparse -import datetime -import logging -import os -import shutil -from functools import partial - -from ert import LibresFacade -from ert.storage import open_storage -from everest import MetaDataColumnNames as MDCN -from everest.config import EverestConfig -from everest.config.export_config import ExportConfig -from everest.export import export_data -from everest.strings import SIMULATION_DIR, STORAGE_DIR -from everest.util import version_info - - -def everload_entry(args=None) -> None: - parser = _build_args_parser() - options = parser.parse_args(args) - if options.debug: - logging.getLogger().setLevel(logging.DEBUG) - # Remove the null handler if set: - logging.getLogger().removeHandler(logging.NullHandler()) - logging.info(version_info()) - - config: EverestConfig = options.config_file - - if options.batches is not None: - batch_list = [int(item) for item in options.batches] - if config.export is None: - config.export = ExportConfig(batches=batch_list) - else: - config.export.batches = batch_list - - # The case must have run before - out_dir = config.output_dir - if not os.path.isdir(out_dir): - raise RuntimeError("This case was never run, cannot internalize data") - - # The simulation directory should be available - # At the moment we check only if the simulation folder exists. In the future - # we may consider carrying out some more thorough sanity check on the folder - # before proceding with the internalization - sim_dir = config.simulation_dir - if not os.path.isdir(sim_dir): - raise RuntimeError( - ( - "The simulation directory '{}' cannot be found, " - "cannot internalize data" - ).format(sim_dir) - ) - - # Warn the user and ask for confirmation - storage_path = config.storage_dir - backup_path = None - if not os.path.isdir(storage_path): - storage_path = None - elif not options.overwrite: - backup_path = storage_path + datetime.datetime.utcnow().strftime( - "__%Y-%m-%d_%H.%M.%S.%f" - ) - - if not options.silent and not user_confirms(sim_dir, storage_path, backup_path): - return - - reload_data(config, backup_path=backup_path) - - -def _build_args_parser() -> argparse.ArgumentParser: - """Build arg parser""" - arg_parser = argparse.ArgumentParser( - description="Load Eclipse data from an existing simulation folder", - usage="""everest load """, - ) - - def batch(batch_str, parser=arg_parser): - batch_str = "{}".format( - batch_str.strip() - ) # Because isnumeric only works on unicode strings in py27 - if not batch_str.isnumeric() or (batch_str[0] == "0" and len(batch_str) > 1): - parser.error("Invalid batch given: '{}'".format(batch_str)) - return int(batch_str) - - arg_parser.add_argument( - "config_file", - type=partial(EverestConfig.load_file_with_argparser, parser=arg_parser), - help="The path to the everest configuration file", - ) - arg_parser.add_argument( - "-s", - "--silent", - action="store_true", - help="Backup/overwrite current internal storage without asking", - ) - arg_parser.add_argument( - "--overwrite", - action="store_true", - help="Overwrite the internal storage instead of backing it up", - ) - arg_parser.add_argument( - "-b", - "--batches", - nargs="+", - type=batch, - help="List of batches to be internalized", - ) - arg_parser.add_argument( - "--debug", - action="store_true", - help="Display debug information in the terminal", - ) - - return arg_parser - - -def user_confirms(simulation_path, storage_path=None, backup_path=None) -> bool: - print("\n*************************************************************") - print("*** This operation can take several minutes or even hours ***") - print("*************************************************************\n") - print("The Everest internal storage will be populated using data from") - print(" {}".format(simulation_path)) - if storage_path is not None: - if backup_path is None: - print("WARNING: the current internal storage will be deleted") - else: - print("The current internal storage will be backed up in") - print(" {}".format(backup_path)) - while True: - text = input("Are you sure you want to proceed? (y/n) ") - if not text: - continue - if text[0] in ("n", "N"): - return False - if text[0] in ("y", "Y"): - return True - - -def reload_data(ever_config: EverestConfig, backup_path=None) -> None: - """Load data from a completed optimization into ert storage - - If @batch_ids are given, only the specified batches are internalized - If a @backup_path is specified, the current internal storage will be copied - to the given path instead of being deleted. - """ - # The ErtConfig constructor is picky, these sections can produce errors, but - # we don't need them for re-internalizing the data - - ever_config.forward_model = None - ever_config.install_jobs = None - ever_config.install_workflow_jobs = None - ever_config.install_data = None - ever_config.install_templates = None - - # load information about batches from previous run - df = export_data( - export_config=ever_config.export, - output_dir=ever_config.output_dir, - data_file=ever_config.model.data_file if ever_config.model else None, - export_ecl=False, - ) - groups = df.groupby(by=MDCN.BATCH) - - # backup or delete the previous internal storage - if backup_path: - shutil.move(ever_config.storage_dir, backup_path) - else: - shutil.rmtree(ever_config.storage_dir) - - ensemble_path = os.path.join(ever_config.output_dir, STORAGE_DIR) - run_path_format = os.path.join( - ever_config.simulation_dir, - "", - "geo_realization_", - SIMULATION_DIR, - ) - - # internalize one batch at a time - for batch_id, group in groups: - _internalize_batch(ensemble_path, run_path_format, batch_id, group) - - -def _internalize_batch( - ensemble_path: str, run_path_format: str, batch_id, batch_data -) -> None: - batch_size = batch_data.shape[0] - with open_storage(ensemble_path, "w") as storage: - experiment = next(storage.experiments) - ensemble = experiment.get_ensemble_by_name(f"batch_{batch_id}") - active_realizations = list(range(batch_size)) - LibresFacade.load_from_run_path(run_path_format, ensemble, active_realizations) - - -if __name__ == "__main__": - everload_entry() diff --git a/src/everest/bin/main.py b/src/everest/bin/main.py index 604d7567aed..20285e841cf 100644 --- a/src/everest/bin/main.py +++ b/src/everest/bin/main.py @@ -10,7 +10,6 @@ from everest.bin.everest_script import everest_entry from everest.bin.everexport_script import everexport_entry from everest.bin.everlint_script import lint_entry -from everest.bin.everload_script import everload_entry from everest.bin.kill_script import kill_entry from everest.bin.monitor_script import monitor_entry from everest.bin.visualization_script import visualization_entry @@ -132,10 +131,6 @@ def lint(self, args): """Validate a config file""" lint_entry(args) - def load(self, args): - """Load Eclipse data from an existing simulation folder""" - everload_entry(args) - def render(self, args): """Display the configuration data loaded from a config file""" config_dump_entry(args) diff --git a/tests/everest/unit/everest/bin/test_everload.py b/tests/everest/unit/everest/bin/test_everload.py deleted file mode 100644 index 17c09b2bc99..00000000000 --- a/tests/everest/unit/everest/bin/test_everload.py +++ /dev/null @@ -1,219 +0,0 @@ -import os -import shutil -from unittest.mock import patch - -import pandas as pd -import pytest -from tests.everest.utils import ( - capture_streams, - create_cached_mocked_test_case, -) - -from everest import MetaDataColumnNames as MDCN -from everest.bin.everload_script import everload_entry -from everest.config import EverestConfig -from everest.export import export_data -from everest.strings import STORAGE_DIR - -CONFIG_FILE = "mocked_multi_batch.yml" - -pytestmark = pytest.mark.xdist_group(name="starts_everest") - - -@pytest.fixture -def cache_dir(request, monkeypatch): - return create_cached_mocked_test_case(request, monkeypatch) - - -def get_config(cache_dir): - shutil.copytree( - cache_dir / "mocked_multi_batch_output", "mocked_multi_batch_output" - ) - config = EverestConfig.load_file(CONFIG_FILE) - simdir = config.simulation_dir - - # Assume there is already a storage - assert os.path.isdir(config.storage_dir) - - # Create the simulation folder - if not os.path.isdir(simdir): - os.makedirs(simdir) - - return config - - -def assert_internalize_calls(batch_ids, mocked_internalize): - for i, b_id in enumerate(batch_ids): - ensemble_path, run_path_format, bid, data = mocked_internalize.call_args_list[ - i - ].args - assert isinstance(ensemble_path, str) - assert isinstance(run_path_format, str) - assert isinstance(data, pd.DataFrame) - assert bid == b_id - - -def assertBackup(config: EverestConfig): - backupdir = [ - d for d in os.listdir(config.output_dir) if d.startswith(STORAGE_DIR + "__") - ] - assert backupdir != [] - - -@patch("everest.bin.everload_script._internalize_batch") -@pytest.mark.fails_on_macos_github_workflow -def test_everload_entry_run( - mocked_internalize, cache_dir, copy_mocked_test_data_to_tmp -): - """Test running everload on an optimization case""" - config = get_config(cache_dir) - everload_entry([CONFIG_FILE, "-s"]) - df = export_data( - export_config=config.export, - output_dir=config.output_dir, - data_file=config.model.data_file if config.model else None, - export_ecl=False, - ) - batch_ids = set(df[MDCN.BATCH]) - assert_internalize_calls(batch_ids, mocked_internalize) - assertBackup(config) - - -@patch("everest.bin.everload_script._internalize_batch") -@pytest.mark.fails_on_macos_github_workflow -def test_everload_entry_run_empty_batch_list(_, copy_mocked_test_data_to_tmp): - """Test running everload on an optimization case""" - with pytest.raises(SystemExit), capture_streams() as (_, err): - everload_entry([CONFIG_FILE, "-s", "-b"]) - assert ( - "error: argument -b/--batches: expected at least one argument" - in err.getvalue() - ) - - -@patch("everest.bin.everload_script._internalize_batch") -@pytest.mark.fails_on_macos_github_workflow -def test_everload_entry_missing_folders( - mocked_internalize, cache_dir, copy_mocked_test_data_to_tmp -): - """Test running everload when output folders are missing""" - config = get_config(cache_dir) - shutil.rmtree(config.simulation_dir) - with pytest.raises(RuntimeError, match="simulation"): - everload_entry([CONFIG_FILE, "-s"]) - shutil.rmtree(config.output_dir) - with pytest.raises(RuntimeError, match="never run"): - everload_entry([CONFIG_FILE, "-s"]) - mocked_internalize.assert_not_called() - - -@patch("everest.bin.everload_script._internalize_batch") -@pytest.mark.fails_on_macos_github_workflow -def test_everload_entry_batches( - mocked_internalize, cache_dir, copy_mocked_test_data_to_tmp -): - """Test running everload with a selection of batches""" - config = get_config(cache_dir) - # pick every second batch (assume there are at least 2) - df = export_data( - export_config=config.export, - output_dir=config.output_dir, - data_file=config.model.data_file if config.model else None, - export_ecl=False, - ) - batch_ids = list(set(df[MDCN.BATCH])) - assert len(batch_ids) > 1 - batch_ids = batch_ids[::2] - - everload_entry([CONFIG_FILE, "-s", "-b"] + [str(b) for b in batch_ids]) - - assert_internalize_calls(batch_ids, mocked_internalize) - assertBackup(config) - - -@patch("everest.bin.everload_script._internalize_batch") -@pytest.mark.fails_on_macos_github_workflow -def test_everload_entry_invalid_batches( - mocked_internalize, copy_mocked_test_data_to_tmp -): - """Test running everload with no or wrong batches""" - with pytest.raises(SystemExit), capture_streams() as (_, err): - everload_entry([CONFIG_FILE, "-s", "-b", "-2", "5412"]) - assert "error: Invalid batch given: '-2'" in err.getvalue() - - with pytest.raises(SystemExit), capture_streams() as (_, err): - everload_entry([CONFIG_FILE, "-s", "-b", "0123"]) - assert "error: Invalid batch given: '0123'" in err.getvalue() - - mocked_internalize.assert_not_called() - - -@patch("everest.bin.everload_script._internalize_batch") -@pytest.mark.fails_on_macos_github_workflow -def test_everload_entry_overwrite( - mocked_internalize, cache_dir, copy_mocked_test_data_to_tmp -): - """Test running everload with the --overwrite flag""" - config = get_config(cache_dir) - everload_entry([CONFIG_FILE, "-s", "--overwrite"]) - - df = export_data( - export_config=config.export, - output_dir=config.output_dir, - data_file=config.model.data_file if config.model else None, - export_ecl=False, - ) - batch_ids = set(df[MDCN.BATCH]) - assert_internalize_calls(batch_ids, mocked_internalize) - - # Note that, as we are mocking the entire ert related part, the - # internalization does not take place, so no new storage dir is created - backupdir = [d for d in os.listdir(config.output_dir) if d.startswith(STORAGE_DIR)] - assert backupdir == [] - - -@patch("everest.bin.everload_script._internalize_batch") -@pytest.mark.fails_on_macos_github_workflow -def test_everload_entry_not_silent( - mocked_internalize, cache_dir, copy_mocked_test_data_to_tmp -): - """Test running everload without the -s flag""" - config = get_config(cache_dir) - - no = lambda _: "n" # pylint:disable=unnecessary-lambda-assignment - yes = lambda _: "y" # pylint:disable=unnecessary-lambda-assignment - - with capture_streams() as (stdout, _): - with patch("everest.bin.everload_script.input", side_effect=no): - everload_entry([CONFIG_FILE]) - assert "backed up" in stdout.getvalue() - mocked_internalize.assert_not_called() - - with capture_streams() as (stdout, _): - with patch("everest.bin.everload_script.input", side_effect=no): - everload_entry([CONFIG_FILE, "--overwrite"]) - assert "WARNING" in stdout.getvalue() - mocked_internalize.assert_not_called() - - with capture_streams() as (stdout, _): - with patch("everest.bin.everload_script.input", side_effect=yes): - everload_entry([CONFIG_FILE]) - assert len(stdout.getvalue()) > 0 - df = export_data( - export_config=config.export, - output_dir=config.output_dir, - data_file=config.model.data_file if config.model else None, - export_ecl=False, - ) - batch_ids = set(df[MDCN.BATCH]) - assert_internalize_calls(batch_ids, mocked_internalize) - - df = export_data( - export_config=config.export, - output_dir=config.output_dir, - data_file=config.model.data_file if config.model else None, - export_ecl=False, - ) - batch_ids = set(df[MDCN.BATCH]) - assert_internalize_calls(batch_ids, mocked_internalize) - assertBackup(config)