From d7a937ba852266b881140c615ab09bd4206fc31f Mon Sep 17 00:00:00 2001 From: Per Olav Eide Svendsen <35033325+perolavsvendsen@users.noreply.github.com> Date: Wed, 15 Mar 2023 09:21:49 +0100 Subject: [PATCH] map classification from ssdl.access_level (#324) * require access.ssdl.access_level to be "internal", "asset" or "restricted" * allow "asset", but change to "restricted" * mirror access.ssdl.access_level to access.classification --- .../examples/aggregated_surface_depth.yml | 1 + .../0.8.0/examples/polygons_field_outline.yml | 1 + .../0.8.0/examples/polygons_field_region.yml | 1 + .../examples/preprocessed_surface_depth.yml | 1 + .../0.8.0/examples/surface_depth.yml | 1 + .../0.8.0/examples/surface_fluid_contact.yml | 1 + .../examples/surface_seismic_amplitude.yml | 1 + .../0.8.0/examples/table_inplace.yml | 1 + .../definitions/0.8.0/schema/fmu_results.json | 14 +- src/fmu/dataio/_metadata.py | 66 +++++++- tests/test_schema/test_schema_logic.py | 24 ++- tests/test_units/test_metadata_class.py | 144 +++++++++++++++++- 12 files changed, 246 insertions(+), 10 deletions(-) diff --git a/schema/definitions/0.8.0/examples/aggregated_surface_depth.yml b/schema/definitions/0.8.0/examples/aggregated_surface_depth.yml index 860b33718..71659384d 100644 --- a/schema/definitions/0.8.0/examples/aggregated_surface_depth.yml +++ b/schema/definitions/0.8.0/examples/aggregated_surface_depth.yml @@ -170,6 +170,7 @@ access: ssdl: access_level: internal rep_include: true + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/examples/polygons_field_outline.yml b/schema/definitions/0.8.0/examples/polygons_field_outline.yml index 4dd0cc0c7..3b14f2d41 100644 --- a/schema/definitions/0.8.0/examples/polygons_field_outline.yml +++ b/schema/definitions/0.8.0/examples/polygons_field_outline.yml @@ -123,6 +123,7 @@ access: ssdl: access_level: internal rep_include: true + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/examples/polygons_field_region.yml b/schema/definitions/0.8.0/examples/polygons_field_region.yml index c9e3439a7..57d3f205d 100644 --- a/schema/definitions/0.8.0/examples/polygons_field_region.yml +++ b/schema/definitions/0.8.0/examples/polygons_field_region.yml @@ -100,6 +100,7 @@ access: ssdl: access_level: internal rep_include: true + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/examples/preprocessed_surface_depth.yml b/schema/definitions/0.8.0/examples/preprocessed_surface_depth.yml index 4aa5b41d6..daec3ec5d 100644 --- a/schema/definitions/0.8.0/examples/preprocessed_surface_depth.yml +++ b/schema/definitions/0.8.0/examples/preprocessed_surface_depth.yml @@ -145,6 +145,7 @@ access: ssdl: access_level: internal rep_include: true + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/examples/surface_depth.yml b/schema/definitions/0.8.0/examples/surface_depth.yml index a0e060f40..ab0619741 100644 --- a/schema/definitions/0.8.0/examples/surface_depth.yml +++ b/schema/definitions/0.8.0/examples/surface_depth.yml @@ -161,6 +161,7 @@ access: ssdl: access_level: internal rep_include: true + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/examples/surface_fluid_contact.yml b/schema/definitions/0.8.0/examples/surface_fluid_contact.yml index b1e75e32f..b286b52ec 100644 --- a/schema/definitions/0.8.0/examples/surface_fluid_contact.yml +++ b/schema/definitions/0.8.0/examples/surface_fluid_contact.yml @@ -167,6 +167,7 @@ access: ssdl: access_level: internal rep_include: true + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/examples/surface_seismic_amplitude.yml b/schema/definitions/0.8.0/examples/surface_seismic_amplitude.yml index 4c5238f99..388a2dfdb 100644 --- a/schema/definitions/0.8.0/examples/surface_seismic_amplitude.yml +++ b/schema/definitions/0.8.0/examples/surface_seismic_amplitude.yml @@ -177,6 +177,7 @@ access: ssdl: access_level: internal rep_include: true + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/examples/table_inplace.yml b/schema/definitions/0.8.0/examples/table_inplace.yml index 03768ce2f..bab1784ee 100644 --- a/schema/definitions/0.8.0/examples/table_inplace.yml +++ b/schema/definitions/0.8.0/examples/table_inplace.yml @@ -133,6 +133,7 @@ access: ssdl: access_level: asset rep_include: false + classification: internal masterdata: smda: diff --git a/schema/definitions/0.8.0/schema/fmu_results.json b/schema/definitions/0.8.0/schema/fmu_results.json index 81ed09381..0767b067d 100644 --- a/schema/definitions/0.8.0/schema/fmu_results.json +++ b/schema/definitions/0.8.0/schema/fmu_results.json @@ -779,10 +779,11 @@ ], "properties": { "access_level": { - "$comment": "Who should SSDL be allowed to share this further with?", + "$comment": "'asset' is legacy, but will be allowed in a transition", "type": "string", "enum": [ "internal", + "restricted", "asset" ] }, @@ -795,6 +796,17 @@ ] } } + }, + "classification": { + "type": "string", + "enum": [ + "internal", + "restricted" + ], + "examples": [ + "internal", + "restricted" + ] } } }, diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index ef0bb16f6..c02ff068b 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -78,9 +78,20 @@ def generate_meta_access(config: dict) -> Optional[dict]: The "ssdl" field can come from the config, or be explicitly given through the "access_ssdl" input argument. If the access_ssdl input argument is present, - its contents shall take presedence. + its contents shall take presedence. If no input, and no config, revert to the + following defaults: + access.ssdl.access_level: "internal" (we explicitly elevate to "restricted) + access.ssdl.rep_include: False (we explicitly flag to be included in REP) + + The access.ssdl.access_level field shall be "internal" or "restricted". We still + allow for the legacy input argument "asset", however we issue warning and change it + to "restricted". + + The access.classification will in the future be the only information classification + field. For now, we simply mirror it from ssdl.access_level to avoid API change. """ + if not config: warn("The config is empty or missing", UserWarning) return None @@ -88,7 +99,7 @@ def generate_meta_access(config: dict) -> Optional[dict]: if config and "access" not in config: raise ConfigurationError("The config misses the 'access' section") - a_cfg = config["access"] + a_cfg = config["access"] # shortform if "asset" not in a_cfg: # asset shall be present if config is used @@ -100,9 +111,54 @@ def generate_meta_access(config: dict) -> Optional[dict]: # if there is a config, the 'asset' tag shall be present a_meta["asset"] = a_cfg["asset"] - # ssdl - if "ssdl" in a_cfg and a_cfg["ssdl"]: - a_meta["ssdl"] = a_cfg["ssdl"] + # ------------------------------------ + # classification & ssdl.access_level and ssdl.rep_include + # ------------------------------------ + + # The information from the input argument "ssdl_access" has previously + # been inserted into the config. Meaning: The fact that it sits in the config + # at this stage, does not necessarily mean that the user actually has it in his + # config on the FMU side. It may come from user arguments. + # See dataio._update_globalconfig_from_settings + + # First set defaults + a_meta["ssdl"] = {"access_level": "internal", "rep_include": False} + + # Then overwrite from config (which may also actually come from user arguments) + if "ssdl" in a_cfg and "access_level" in a_cfg["ssdl"]: + a_meta["ssdl"]["access_level"] = a_cfg["ssdl"]["access_level"] + + if "ssdl" in a_cfg and "rep_include" in a_cfg["ssdl"]: + a_meta["ssdl"]["rep_include"] = a_cfg["ssdl"]["rep_include"] + + # check validity + _valid_ssdl_access_levels = ["internal", "restricted", "asset"] + _ssdl_access_level = a_meta["ssdl"]["access_level"] + if _ssdl_access_level not in _valid_ssdl_access_levels: + raise ConfigurationError( + f"Illegal value for access.ssdl.access_level: {_ssdl_access_level} " + f"Valid values are: {_valid_ssdl_access_levels}" + ) + + _ssdl_rep_include = a_meta["ssdl"]["rep_include"] + if not isinstance(_ssdl_rep_include, bool): + raise ConfigurationError( + f"Illegal value for access.ssdl.rep_include: {_ssdl_rep_include}" + "access.ssdl.rep_include must be a boolean (True/False)." + ) + + # if "asset", change to "restricted" and give warning + if a_meta["ssdl"]["access_level"] == "asset": + warn( + "The value 'asset' for access.ssdl.access_level is deprecated. " + "Please use 'restricted' in input arguments or global variables to silence " + " this warning.", + UserWarning, + ) + a_meta["ssdl"]["access_level"] = "restricted" + + # mirror access.ssdl.access_level to access.classification + a_meta["classification"] = a_meta["ssdl"]["access_level"] # mirror return a_meta diff --git a/tests/test_schema/test_schema_logic.py b/tests/test_schema/test_schema_logic.py index 747de7170..fcf250d9a 100644 --- a/tests/test_schema/test_schema_logic.py +++ b/tests/test_schema/test_schema_logic.py @@ -114,7 +114,7 @@ def test_schema_080_logic_case(schema_080, metadata_examples): def test_schema_080_logic_fmu_block_aggr_real(schema_080, metadata_examples): """Test that fmu.realization and fmu.aggregation are not allowed at the same time""" - metadata = metadata_examples["surface_depth.yml"] + metadata = deepcopy(metadata_examples["surface_depth.yml"]) # check that assumptions for the test is true assert "realization" in metadata["fmu"] assert "aggregation" not in metadata["fmu"] @@ -325,3 +325,25 @@ def test_schema_080_data_time(schema_080, metadata_examples): _example["data"]["time"] = testvalue with pytest.raises(jsonschema.exceptions.ValidationError): jsonschema.validate(instance=_example, schema=schema_080) + + +def test_schema_logic_classification(schema_080, metadata_examples): + """Test the classification of individual files.""" + + # fetch example + example = deepcopy(metadata_examples["surface_depth.yml"]) + + # assert validation with no changes + jsonschema.validate(instance=example, schema=schema_080) + + # assert "internal" and "restricted" validates + example["access"]["classification"] = "internal" + jsonschema.validate(instance=example, schema=schema_080) + + example["access"]["classification"] = "restricted" + jsonschema.validate(instance=example, schema=schema_080) + + # assert erroneous value does not validate + example["access"]["classification"] = "open" + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=example, schema=schema_080) diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index e937cd79c..ef72ce130 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -119,25 +119,163 @@ def test_metadata_populate_access_ok_config(edataobj2): assert mymeta.meta_access == { "asset": {"name": "Drogon"}, "ssdl": {"access_level": "internal", "rep_include": True}, + "classification": "internal", } -def test_metadata_populate_change_access_ok(globalconfig1): +def test_metadata_populate_from_argument(globalconfig1): """Testing the access part, now with ok config and a change in access.""" + # test assumptions + assert globalconfig1["access"]["ssdl"]["access_level"] == "internal" + edata = dio.ExportData( config=globalconfig1, - access_ssdl={"access_level": "paranoid", "rep_include": False}, + access_ssdl={"access_level": "restricted", "rep_include": True}, ) mymeta = _MetaData("dummy", edata) mymeta._populate_meta_access() assert mymeta.meta_access == { "asset": {"name": "Test"}, - "ssdl": {"access_level": "paranoid", "rep_include": False}, + "ssdl": {"access_level": "restricted", "rep_include": True}, + "classification": "restricted", # mirroring ssdl.access_level } +def test_metadata_populate_partial_access_ssdl(globalconfig1): + """Test what happens if ssdl_access argument is partial.""" + + # test assumptions + assert globalconfig1["access"]["ssdl"]["access_level"] == "internal" + assert globalconfig1["access"]["ssdl"]["rep_include"] is False + + # rep_include only, but in config + edata = dio.ExportData(config=globalconfig1, access_ssdl={"rep_include": True}) + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_access() + assert mymeta.meta_access["ssdl"]["rep_include"] is True + assert mymeta.meta_access["ssdl"]["access_level"] == "internal" # default + assert mymeta.meta_access["classification"] == "internal" # default + + # access_level only, but in config + edata = dio.ExportData( + config=globalconfig1, access_ssdl={"access_level": "restricted"} + ) + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_access() + assert mymeta.meta_access["ssdl"]["rep_include"] is False # default + assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" + assert mymeta.meta_access["classification"] == "restricted" + + +def test_metadata_populate_wrong_config(globalconfig1): + """Test error in access_ssdl in config.""" + + # test assumptions + _config = deepcopy(globalconfig1) + _config["access"]["ssdl"]["access_level"] = "wrong" + + edata = dio.ExportData( + config=_config, + ) + mymeta = _MetaData("dummy", edata) + with pytest.raises(ConfigurationError, match="Illegal value for access"): + mymeta._populate_meta_access() + + +def test_metadata_populate_wrong_argument(globalconfig1): + """Test error in access_ssdl in arguments.""" + + edata = dio.ExportData(config=globalconfig1, access_ssdl={"access_level": "wrong"}) + mymeta = _MetaData("dummy", edata) + with pytest.raises(ConfigurationError, match="Illegal value for access"): + mymeta._populate_meta_access() + + +def test_metadata_access_correct_input(globalconfig1): + """Test giving correct input.""" + # Input is "restricted" and False - correct use, shall work + edata = dio.ExportData( + config=globalconfig1, + access_ssdl={"access_level": "restricted", "rep_include": False}, + ) + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_access() + assert mymeta.meta_access["ssdl"]["rep_include"] is False + assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" + assert mymeta.meta_access["classification"] == "restricted" + + # Input is "internal" and True - correct use, shall work + edata = dio.ExportData( + config=globalconfig1, + access_ssdl={"access_level": "internal", "rep_include": True}, + ) + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_access() + assert mymeta.meta_access["ssdl"]["rep_include"] is True + assert mymeta.meta_access["ssdl"]["access_level"] == "internal" + assert mymeta.meta_access["classification"] == "internal" + + +def test_metadata_access_deprecated_input(globalconfig1): + """Test giving deprecated input.""" + # Input is "asset". Is deprecated, shall work with warning. + # Output shall be "restricted". + edata = dio.ExportData(config=globalconfig1, access_ssdl={"access_level": "asset"}) + mymeta = _MetaData("dummy", edata) + with pytest.warns(match="The value 'asset' for access.ssdl.access_level is deprec"): + mymeta._populate_meta_access() + assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" + assert mymeta.meta_access["classification"] == "restricted" + + +def test_metadata_access_illegal_input(globalconfig1): + """Test giving illegal input.""" + + # Input is "secret". Not allowed, shall fail. + edata = dio.ExportData(config=globalconfig1, access_ssdl={"access_level": "secret"}) + mymeta = _MetaData("dummy", edata) + with pytest.raises(ConfigurationError, match="Illegal value for access"): + mymeta._populate_meta_access() + + # Input is "open". Not allowed, shall fail. + edata = dio.ExportData(config=globalconfig1, access_ssdl={"access_level": "open"}) + mymeta = _MetaData("dummy", edata) + with pytest.raises(ConfigurationError, match="Illegal value for access"): + mymeta._populate_meta_access() + + +def test_metadata_access_no_input(globalconfig1): + """Test not giving any input arguments.""" + + # No input, revert to config + configcopy = deepcopy(globalconfig1) + configcopy["access"]["ssdl"]["access_level"] = "restricted" + configcopy["access"]["ssdl"]["rep_include"] = True + edata = dio.ExportData(config=configcopy) + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_access() + assert mymeta.meta_access["ssdl"]["rep_include"] is True + assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" + assert mymeta.meta_access["classification"] == "restricted" # mirrored + + # No input, no config, shall default to "internal" and False + configcopy = deepcopy(globalconfig1) + del configcopy["access"]["ssdl"]["access_level"] + del configcopy["access"]["ssdl"]["rep_include"] + edata = dio.ExportData(config=globalconfig1) + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_access() + assert mymeta.meta_access["ssdl"]["rep_include"] is False # default + assert mymeta.meta_access["ssdl"]["access_level"] == "internal" # default + assert mymeta.meta_access["classification"] == "internal" # mirrored + + +def test_metadata_access_rep_include(globalconfig1): + """Test the input of the rep_include field.""" + + # -------------------------------------------------------------------------------------- # The GENERATE method # --------------------------------------------------------------------------------------