Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update copy_attribute plugin to include copying history attribute #2055

Merged
merged 5 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 88 additions & 18 deletions improver/utilities/copy_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# See LICENSE in the root of the repository for full licensing details.
from typing import List, Union

from dateutil import parser as dparser
from iris.cube import Cube, CubeList

from improver import BasePlugin
Expand Down Expand Up @@ -31,38 +32,107 @@ def __init__(self, attributes: List = [], aux_coord: List = []):
self.attributes = attributes
self.aux_coord = aux_coord

def process(self, *cubes: Union[Cube, CubeList]) -> Union[Cube, CubeList]:
@staticmethod
def get_most_recent_history(datelist: list) -> list:
"""
Gets the most recent history attribute from the list of provided dates.

Args:
datelist:
A list of dates to find the most recent calue from.

Returns:
The most recent history attribute.
"""
prev_time = None

for date in datelist:
new_time = dparser.parse(date, fuzzy=True)
if not prev_time:
prev_time = new_time
str_time = date
elif new_time > prev_time:
prev_time = new_time
str_time = date

return str_time

def find_common_attributes(self, cubes: CubeList, attributes: List) -> dict:
"""
Find the common attribute values between the cubes. If the attribute is history, the most recent
value will be returned.

Args:
cubes:
A list of template cubes to extract common attributes from.
attributes:
A list of attributes to be copied.
Returns:
A dictionary of common attributes.
Raises:
ValueError: If the attribute is not found in any of the template cubes
ValueError: If the attribute has different values in the provided template cubes.
"""
common_attributes = {}
for attribute in attributes:
attribute_value = [
cube.attributes.get(attribute)
for cube in cubes
if cube.attributes.get(attribute) is not None
]
if attribute == "history":
# We expect the history attribute to differ between cubes, so we will only keep the most recent one
common_attributes[attribute] = self.get_most_recent_history(
attribute_value
)
elif len(attribute_value) == 0:
raise ValueError(
f"Attribute {attribute} not found in any of the template cubes"
)
elif any(attr != attribute_value[0] for attr in attribute_value):
raise ValueError(
f"Attribute {attribute} has different values in the provided template cubes"
)
else:
common_attributes[attribute] = attribute_value[0]

return common_attributes

def process(self, *cubes: Union[Cube, CubeList]) -> Cube:
"""
Copy attribute or auxilary coordinate values from template_cube to cube,
overwriting any existing values.
overwriting any existing values. If the history attribute is present in
the list of requested attributes, the most recent value will be used. If an
auxilary coordinate needs to be copied then all template cubes must have the
auxilary coordinate present.

Operation is performed in-place on provided inputs.

Args:
cubes:
Source cube(s) to be updated. Final cube provided represents the template_cube.
List of cubes. First cube provided represents the cube to be updated. All
other cubes are treated as template cubes.

Returns:
Updated cube(s).
A cube with attributes copied from the template cubes

"""
cubes_proc = as_cubelist(*cubes)
if len(cubes_proc) < 2:
raise RuntimeError(
f"At least two cubes are required for this operation, got {len(cubes_proc)}"
)
template_cube = cubes_proc.pop()

for cube in cubes_proc:
new_attributes = {k: template_cube.attributes[k] for k in self.attributes}
amend_attributes(cube, new_attributes)
for coord in self.aux_coord:
# If coordinate is already present in the cube, remove it
if cube.coords(coord):
cube.remove_coord(coord)
cube.add_aux_coord(
template_cube.coord(coord),
data_dims=template_cube.coord_dims(coord=coord),
)
cube = cubes_proc.pop(0)
template_cubes = cubes_proc
new_attributes = self.find_common_attributes(template_cubes, self.attributes)
amend_attributes(cube, new_attributes)

return cubes_proc if len(cubes_proc) > 1 else cubes_proc[0]
for coord in self.aux_coord:
# If coordinate is already present in the cube, remove it
if cube.coords(coord):
cube.remove_coord(coord)
cube.add_aux_coord(
template_cubes[0].coord(coord),
data_dims=template_cubes[0].coord_dims(coord=coord),
)
return cube
140 changes: 111 additions & 29 deletions improver_tests/utilities/copy_metadata/test_CopyMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pytest
from iris.coords import AuxCoord
from iris.cube import Cube, CubeList
from iris.cube import Cube

from improver.utilities.copy_metadata import CopyMetadata

Expand All @@ -17,45 +17,92 @@ class HaltExecution(Exception):

@patch("improver.utilities.copy_metadata.as_cubelist")
def test_as_cubelist_called(mock_as_cubelist):
"""Test that the as_cubelist function is called."""
mock_as_cubelist.side_effect = HaltExecution
try:
CopyMetadata(["attribA", "attribB"])(
sentinel.cube0, sentinel.cube1, sentinel.template_cube
sentinel.cube0, sentinel.template_cube1, sentinel.template_cube2
)
except HaltExecution:
pass
mock_as_cubelist.assert_called_once_with(
sentinel.cube0, sentinel.cube1, sentinel.template_cube
sentinel.cube0, sentinel.template_cube1, sentinel.template_cube2
)


def test_copy_attributes_multi_input():
@pytest.mark.parametrize("history", [False, True])
def test_copy_attributes_multi_input(history):
"""
Test the copy_attributes function for multiple input cubes.
Test the copy_attributes function for multiple input template cubes.

Demonstrates copying attributes from the template cube to the input
cubes and also demonstrates the attributes on the templates cube that
aren't specified in the attributes list are indeed ignored.

Also demonstrates that the most recent history attribute is copied correctly if
present on multiple template cubes.

Note how we are verifying the object IDs, since CubeAttributes is an
in-place operation.
"""
attributes = ["attribA", "attribB"]
cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"})
cube1 = Cube([0], attributes={"attribA": "valueAA", "attribB": "valueBB"})
template_cube = Cube(
[0], attributes={"attribA": "tempA", "attribB": "tempB", "attribC": "tempC"}
[0],
attributes={
"attribA": "tempA",
"attribB": "tempB",
"attribC": "tempC",
"history": "2024-11-25T00:00:00Z",
},
)
template_cube_2 = Cube(
[0],
attributes={
"attribA": "tempA",
"attribC": "tempC",
"history": "2024-11-25T01:43:15Z",
},
)
if history:
attributes.append("history")

plugin = CopyMetadata(attributes)
result = plugin.process(cube0, template_cube, template_cube_2)
assert isinstance(result, Cube)
assert result.attributes["attribA"] == "tempA"
assert result.attributes["attribB"] == "tempB"
assert "attribC" not in result.attributes
assert result == cube0 # Checks cube has been altered in-place
if history:
assert result.attributes["history"] == "2024-11-25T01:43:15Z"
else:
assert "history" not in result.attributes


def test_copy_attributes_one_history_attribute():
"""Test that the history attribute is copied correctly if only one template cube has a history attribute."""
attributes = ["attribA", "attribB", "history"]
cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"})
template_cube = Cube(
[0],
attributes={
"attribA": "tempA",
"attribB": "tempB",
"attribC": "tempC",
"history": "2024-11-25T00:00:00Z",
},
)
template_cube_2 = Cube([0], attributes={"attribA": "tempA", "attribC": "tempC"})

plugin = CopyMetadata(attributes)
result = plugin.process(cube0, cube1, template_cube)
assert type(result) is CubeList
for res in result:
assert res.attributes["attribA"] == "tempA"
assert res.attributes["attribB"] == "tempB"
assert "attribC" not in res.attributes
assert id(result[0]) == id(cube0)
assert id(result[1]) == id(cube1)
result = plugin.process(cube0, template_cube_2, template_cube)
assert isinstance(result, Cube)
assert result.attributes["attribA"] == "tempA"
assert result.attributes["attribB"] == "tempB"
assert "attribC" not in result.attributes
assert result == cube0 # Checks cube has been altered in-place
assert result.attributes["history"] == "2024-11-25T00:00:00Z"


def test_copy_attributes_single_input():
Expand All @@ -72,12 +119,12 @@ def test_copy_attributes_single_input():

plugin = CopyMetadata(attributes)
result = plugin.process(cube0, template_cube)
assert type(result) is Cube
assert isinstance(result, Cube)
assert result.attributes["attribA"] == "tempA"
assert result.attributes["attribB"] == "tempB"
assert result.attributes["attribD"] == "valueD"
assert "attribC" not in result.attributes
assert id(result) == id(cube0)
assert result == cube0 # Checks cube has been altered in-place


@pytest.mark.parametrize("cubelist", [True, False])
Expand All @@ -101,21 +148,56 @@ def test_auxiliary_coord_modification(cubelist):

cube = Cube(data, aux_coords_and_dims=[(dummy_aux_coord_0, 0)])
# Create the cube with the auxiliary coordinates
template_cube = Cube(
template_cubes = Cube(
data,
aux_coords_and_dims=[(dummy_aux_coord_0_temp, 0), (dummy_aux_coord_1_temp, 0)],
)

cubes = cube
if cubelist:
cubes = [cube, cube]

template_cubes = [template_cubes, template_cubes]
plugin = CopyMetadata(aux_coord=auxiliary_coord)
result = plugin.process(cubes, template_cube)
if cubelist:
for res in result:
assert res.coord("dummy_0 status_flag") == dummy_aux_coord_0_temp
assert res.coord("dummy_1 status_flag") == dummy_aux_coord_1_temp
else:
assert result.coord("dummy_0 status_flag") == dummy_aux_coord_0_temp
assert result.coord("dummy_1 status_flag") == dummy_aux_coord_1_temp
result = plugin.process(cube, template_cubes)
assert result.coord("dummy_0 status_flag") == dummy_aux_coord_0_temp
assert result.coord("dummy_1 status_flag") == dummy_aux_coord_1_temp


def test_copy_attributes_multi_input_mismatching_attributes():
"""Test that an error is raised if the template cubes have mismatching attribute values."""
attributes = ["attribA", "attribB"]
cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"})
template_cube = Cube(
[0], attributes={"attribA": "tempA", "attribB": "tempB", "attribC": "tempC"}
)
template_cube_2 = Cube([0], attributes={"attribA": "temp2A", "attribC": "tempC"})

plugin = CopyMetadata(attributes)
with pytest.raises(
ValueError,
match="Attribute attribA has different values in the provided template cubes",
):
plugin.process(cube0, template_cube_2, template_cube)


def test_copy_attributes_multi_input_missing_attributes():
"""Test that an error is raised if a requested attribute is not present on any of the template cubes."""
attributes = ["attribA", "attribB"]
cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"})
template_cube = Cube([0], attributes={"attribB": "tempB", "attribC": "tempC"})
template_cube_2 = Cube([0], attributes={"attribC": "tempC"})
plugin = CopyMetadata(attributes)
with pytest.raises(
ValueError, match="Attribute attribA not found in any of the template cubes"
):
plugin.process(cube0, template_cube_2, template_cube)


def test_copy_attributes_missing_inputs():
"""Test that an error is raised if the number of input cubes is less than 2."""
attributes = ["attribA", "attribB"]
cube0 = Cube([0])

plugin = CopyMetadata(attributes)
with pytest.raises(
RuntimeError, match="At least two cubes are required for this operation, got 1"
):
plugin.process(cube0)