Skip to content

Commit

Permalink
Add aggregation CLI (#1952)
Browse files Browse the repository at this point in the history
* add aggregation functionality

* Handle case where std_dev is undefined

* all dimensions to be a string

* update cli and acceptance tests

* Undo testing change

* formatting

* reverse testing change

* update

* change parameter name

* fix docstring

* Check dimension exists

* Refactor to use collapse-realizations cli

* Remove old file

* Handle nans

* fix bug

* Remove check

* simplify code

* Check that realization is a dimension in cli

* fix docstring

* fix docstring

* Add test for invalid method error

* Add acceptance test for when new name not specified

* formatting

* update docstring

* update docstring

* update docstring

* Use pytest

* simplify code

* formatting

* update checksum

* Remove argument.

---------

Co-authored-by: Belinda Trotta <[email protected]>
Co-authored-by: Gavin Evans <[email protected]>
  • Loading branch information
3 people authored Oct 13, 2023
1 parent fdc96a4 commit 2520f58
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 14 deletions.
71 changes: 71 additions & 0 deletions improver/cli/collapse_realizations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown copyright. The Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Script to collapse the realizations dimension of a cube."""


from improver import cli


@cli.clizefy
@cli.with_output
def process(
cube: cli.inputcube, *, method: str = "mean", new_name: str = None,
):
"""Collapse the realization dimension of a cube.
Args:
cube (iris.cube.Cube):
Cube to be collapsed.
method (str):
One of "sum", "mean", "median", "std_dev", "min", "max".
new_name (str):
New name for output cube; if None use iris default.
Returns:
iris.cube.Cube:
Collapsed cube. Dimensions are the same as input cube,
without realization dimension.
Raises:
ValueError: if realization is not a dimension coordinate.
"""

from improver.utilities.cube_manipulation import collapse_realizations

if not (cube.coords("realization", dim_coords=True)):
raise ValueError("realization must be a dimension coordinate.")

collapsed_cube = collapse_realizations(cube, method=method)
if new_name:
collapsed_cube.rename(new_name)

return collapsed_cube
34 changes: 30 additions & 4 deletions improver/utilities/cube_manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,44 @@ def collapsed(cube: Cube, *args: Any, **kwargs: Any) -> Cube:
return new_cube


def collapse_realizations(cube: Cube) -> Cube:
def collapse_realizations(cube: Cube, method="mean") -> Cube:
"""Collapses the realization coord of a cube and strips the coord from the cube.
Args:
cube:
Input cube
Cube to be aggregated.
method:
One of "sum", "mean", "median", "std_dev", "min", "max";
default is "mean".
Returns:
Cube with realization coord collapsed and removed.
"""
returned_cube = collapsed(cube, "realization", iris.analysis.MEAN)

aggregator_dict = {
"sum": iris.analysis.SUM,
"mean": iris.analysis.MEAN,
"median": iris.analysis.MEDIAN,
"std_dev": iris.analysis.STD_DEV,
"min": iris.analysis.MIN,
"max": iris.analysis.MAX,
}

aggregator = aggregator_dict.get(method)
if aggregator is None:
raise ValueError(f"method must be one of {list(aggregator_dict.keys())}")

returned_cube = collapsed(cube, "realization", aggregator)
returned_cube.remove_coord("realization")

if (
(method == "std_dev")
and (len(cube.coord("realization").points) == 1)
and (np.ma.is_masked(returned_cube.data))
):
# Standard deviation is undefined. Iris masks the entire output,
# but we also set the underlying data to np.nan here.
returned_cube.data.data[:] = np.nan

return returned_cube


Expand Down
4 changes: 4 additions & 0 deletions improver_tests/acceptance/SHA256SUMS
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ d0053b55f2abaeeefa3d5d58df597bbaf5b07a2bfbb614c1ff404fb695700d2c ./cloud-conden
5788b1228fbd7cce6c92a2ff6448c4f14d78d868bb45743a816f728b70c6b47d ./cloud-top-temperature/temperature_on_pressure_levels.nc
d350e329f47d18c819f75a91a0168dff0c8cce404df2e3992e41c4b78925699b ./cloud-top-temperature/with_id_attr/kgo.nc
2c4064d3d42eedcffda85872b00fabb42b23f71ecb007e5be0b046ef93b0e9fc ./cloud-top-temperature/without_id_attr/kgo.nc
966663c18804e6332f74682686d207ddd80e5c30dbf7f3ace785e1df44c9c673 ./collapse-realizations/input.nc
e4250f25f7e770d00d863e6d5d3e8921c79e3609b68566637bda0f5ba0148b12 ./collapse-realizations/input_no_realization.nc
c21806e891dafce6fd42ea1bd420a3d6197b676f6f7a1c2bcec1c805fcba8d59 ./collapse-realizations/kgo_mean.nc
2ca7400809cb8f306acc8803bb4142ed5bb754931f120229d67b01bf00c130a7 ./collapse-realizations/kgo_no_rename.nc
1ea1bcc88472251ac904f609e2cd195d25e61941bc3ac8d535952a872bd84b6c ./combine/accum/20180101T0100Z-PT0001H-rainfall_accumulation.nc
acd51f2a3ec308acfdc5591e86b904eb1a74835f41197d2f83513dc436c00d60 ./combine/accum/20180101T0200Z-PT0002H-rainfall_accumulation.nc
2e53d150fe22608970e967e56fb07c558948e0c2bf279f4e0f677b2d07a54cf0 ./combine/accum/20180101T0300Z-PT0003H-rainfall_accumulation.nc
Expand Down
103 changes: 103 additions & 0 deletions improver_tests/acceptance/test_collapse_realizations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown copyright. The Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Tests for the collapse-realizations CLI"""


import pytest

from . import acceptance as acc

pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
CLI = acc.cli_name_with_dashes(__file__)
run_cli = acc.run_cli(CLI)


def test_basic(tmp_path):
"""
Test mean aggregation.
"""

kgo_dir = acc.kgo_root() / "collapse-realizations"
kgo_path = kgo_dir / "kgo_mean.nc"
input_path = kgo_dir / "input.nc"
output_path = tmp_path / "output.nc"
args = [
input_path,
"--method",
"mean",
"--new-name",
"ensemble_mean_of_air_temperature",
"--output",
output_path,
]
run_cli(args)
acc.compare(output_path, kgo_path)


def test_no_rename(tmp_path):
"""
Test mean aggregation with new-name unspecified.
"""

kgo_dir = acc.kgo_root() / "collapse-realizations"
kgo_path = kgo_dir / "kgo_no_rename.nc"
input_path = kgo_dir / "input.nc"
output_path = tmp_path / "output.nc"
args = [
input_path,
"--method",
"mean",
"--output",
output_path,
]
run_cli(args)
acc.compare(output_path, kgo_path)


def test_no_realization_coord(tmp_path):
"""
Test that an error is raised if there is no realization dimension.
"""

kgo_dir = acc.kgo_root() / "collapse-realizations"
input_path = kgo_dir / "input_no_realization.nc"
output_path = tmp_path / "output.nc"
args = [
input_path,
"--method",
"mean",
"--new-name",
"ensemble_mean_of_air_temperature",
"--output",
output_path,
]
with pytest.raises(ValueError):
run_cli(args)
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,70 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Unit tests for the function collapsed.
Unit tests for the function collapse_realizations.
"""

import iris
import numpy as np
import pytest
from iris.exceptions import CoordinateCollapseError

from improver.synthetic_data.set_up_test_cubes import set_up_variable_cube
from improver.utilities.cube_manipulation import collapse_realizations


def test_basic():
"""Test that a collapsed cube is returned with no realization coord"""
data = np.full((3, 3, 3), fill_value=281.0, dtype=np.float32)
cube = set_up_variable_cube(data, realizations=[0, 1, 2])
result = collapse_realizations(cube)
assert "realization" not in [
coord.name() for coord in result.dim_coords + result.aux_coords
]
assert (result.data == np.full((3, 3), fill_value=281.0, dtype=np.float32)).all()
@pytest.fixture
def temperature_cube():
data = 281 * np.random.random_sample((3, 3, 3)).astype(np.float32)
return set_up_variable_cube(data, realizations=[0, 1, 2])


def test_basic(temperature_cube):
"""Test that a collapsed cube is returned with no realization coord."""
result = collapse_realizations(temperature_cube)
assert "realization" not in [coord.name() for coord in result.coords()]
expected = temperature_cube.collapsed(["realization"], iris.analysis.MEAN)
np.testing.assert_allclose(result.data, expected.data)


def test_invalid_dimension(temperature_cube):
"""Test that an error is raised if realization dimension
does not exist."""
sub_cube = temperature_cube.extract(iris.Constraint(realization=0))
with pytest.raises(CoordinateCollapseError):
collapse_realizations(sub_cube, "mean")


def test_different_aggregators(temperature_cube):
"""Test aggregators other than mean."""
aggregator_dict = {
"sum": iris.analysis.SUM,
"median": iris.analysis.MEDIAN,
"std_dev": iris.analysis.STD_DEV,
"min": iris.analysis.MIN,
"max": iris.analysis.MAX,
}
for key, value in aggregator_dict.items():
result = collapse_realizations(temperature_cube, key)
expected = temperature_cube.collapsed(["realization"], value)
np.testing.assert_allclose(result.data, expected.data)


def test_invalid_aggregators(temperature_cube):
"""Test that an error is raised if aggregator is not
one of the allowed types."""

msg = "method must be one of"
with pytest.raises(ValueError, match=msg):
collapse_realizations(temperature_cube, method="product")


def test_1d_std_dev():
"""Test that when std_dev is calculated over a dimension of size 1,
output is all masked and underlying value is np.nan.
"""
data = 281 * np.random.random_sample((1, 3, 3)).astype(np.float32)
cube_1d = set_up_variable_cube(data, realizations=[0])
result = collapse_realizations(cube_1d, "std_dev")
assert np.all(np.ma.getmask(result.data))
assert np.all(np.isnan(result.data.data))

0 comments on commit 2520f58

Please sign in to comment.