Skip to content

Commit

Permalink
Fix a bug when preprocessed data use a subfolder
Browse files Browse the repository at this point in the history
Solves #282, when the subsequent FMU job
is ran on preprocessed data.
  • Loading branch information
jcrivenaes committed Dec 22, 2022
1 parent 57dd0a1 commit d6f6cb2
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 3 deletions.
4 changes: 4 additions & 0 deletions src/fmu/dataio/_filedata_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ def _get_path_generic(self, mode="realization", allow_forcefolder=True, info="")

if mode == "preprocessed":
outroot = outroot / "preprocessed"
if self.dataio.forcefolder:
raise ValueError(
"Cannot use 'forcefolder' option with preprocessed data"
)

if mode != "preprocessed":
if self.dataio.is_observation:
Expand Down
6 changes: 5 additions & 1 deletion src/fmu/dataio/_objectdata_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,11 @@ def _derive_from_existing(self):

# derive the additional attributes needed later e.g. in Filedata provider:
relpath = Path(self.meta_existing["file"]["relative_path"])
self.efolder = relpath.parent.name
if self.dataio.subfolder:
self.efolder = relpath.parent.parent.name
else:
self.efolder = relpath.parent.name

self.classname = self.meta_existing["class"]
self.extension = relpath.suffix
self.fmt = self.meta_existing["data"]["format"]
Expand Down
18 changes: 16 additions & 2 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ class ExportData:
reuse_metadata_rule: This input is None or a string describing rule for reusing
metadata. Default is None, but if the input is a file string or object with
already valid metdata, then it is assumed to be "preprocessed", which
already valid metadata, then it is assumed to be "preprocessed", which
merges the metadata after predefined rules.
runpath: TODO! Optional and deprecated. The relative location of the current run
Expand Down Expand Up @@ -718,7 +718,11 @@ def _establish_pwd_rootpath(self):
logger.info("rootpath: %s", str(self._rootpath))

def _check_obj_if_file(self, obj: Any) -> Any:
"""When obj is file-like, it must be checked + assume preprocessed."""
"""When obj is file-like, it must be checked + assume preprocessed.
In addition, if preprocessed, derive the subfolder if present and subfolder is
not set already.
"""

if isinstance(obj, (str, Path)):
if isinstance(obj, str):
Expand All @@ -728,6 +732,16 @@ def _check_obj_if_file(self, obj: Any) -> Any:
if not self.reuse_metadata_rule:
self.reuse_metadata_rule = "preprocessed"

# detect if object is on a subfolder relative to /preprocessed/xxxx
for ipar in range(3):
foldername = obj.parents[ipar].stem
if foldername == "preprocessed" and ipar == 2:
if not self.subfolder:
self.subfolder = obj.parents[0].stem
logger.info(
"Subfolder is auto-derived from preprocessed file path: %s",
self.subfolder,
)
return obj

# ==================================================================================
Expand Down
90 changes: 90 additions & 0 deletions tests/test_units/test_prerealization_surfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import os
from pathlib import Path

import pytest
from conftest import inside_rms

import fmu.dataio.dataio as dataio
Expand Down Expand Up @@ -140,3 +141,92 @@ def _run_case_fmu(fmurun_w_casemetadata, rmsglobalconfig, surfacepath):
_run_case_fmu(fmurun_w_casemetadata, rmsglobalconfig, mysurf)

logger.info("Preprocessed surface is %s", mysurf)


def test_regsurf_preprocessed_observation_subfolder(
fmurun_w_casemetadata, rmssetup, rmsglobalconfig, regsurf
):
"""As previous test, but with data using subfolder option.
When the original output is using a subfolder key, the subsequent job shall detect
this from the filepath and automatically output to the same subfolder name, also.
Alternatively the subfolder can be given another name.
"""

@inside_rms
def _export_data_from_rms(rmssetup, rmsglobalconfig, regsurf):
"""Run an export of a preprocessed surface inside RMS."""
logger.info("Active folder is %s", rmssetup)

os.chdir(rmssetup)
edata = dataio.ExportData(
config=rmsglobalconfig, # read from global config
fmu_context="preprocessed",
name="preprocessedmap",
is_observation=True,
timedata=[[20240802, "moni"], [20200909, "base"]],
subfolder="mysub",
)

metadata = edata.generate_metadata(regsurf)
logger.debug("\n%s", utils.prettyprint_dict(metadata))

assert (
metadata["file"]["relative_path"]
== "share/preprocessed/maps/mysub/preprocessedmap--20240802_20200909.gri"
)

return edata.export(regsurf)

def _run_case_fmu(fmurun_w_casemetadata, rmsglobalconfig, surfacepath, subf=None):
"""Run FMU workflow, using the preprocessed data on a subfolder."""

os.chdir(fmurun_w_casemetadata)
logger.info("Active folder is %s", fmurun_w_casemetadata)

edata = dataio.ExportData(
config=rmsglobalconfig, # read from global config
fmu_context="case",
name="pre_v3",
is_observation=True,
)
if subf is not None:
metadata = edata.generate_metadata(surfacepath, subfolder=subf)
assert (
metadata["file"]["relative_path"]
== f"share/observations/maps/{subf}/pre_v3--20240802_20200909.gri"
)
else:
metadata = edata.generate_metadata(surfacepath)
assert (
metadata["file"]["relative_path"]
== "share/observations/maps/mysub/pre_v3--20240802_20200909.gri"
)
assert "merged" in metadata["tracklog"][-1]["event"]

# run two stage process
mysurf = _export_data_from_rms(rmssetup, rmsglobalconfig, regsurf)
_run_case_fmu(fmurun_w_casemetadata, rmsglobalconfig, mysurf)
_run_case_fmu(fmurun_w_casemetadata, rmsglobalconfig, mysurf, subf="xxxx")


@inside_rms
def test_preprocessed_with_forcefolder_shall_fail(rmssetup, rmsglobalconfig, regsurf):
"""Run an export of a preprocessed surface inside RMS."""
logger.info("Active folder is %s", rmssetup)

os.chdir(rmssetup)
edata = dataio.ExportData(
config=rmsglobalconfig, # read from global config
fmu_context="preprocessed",
name="some",
is_observation=True,
timedata=[[20240802, "moni"], [20200909, "base"]],
forcefolder="/tmp",
)

with pytest.raises(
ValueError, match="Cannot use 'forcefolder' option with preprocessed data"
):
edata.generate_metadata(regsurf)

0 comments on commit d6f6cb2

Please sign in to comment.