From 2520f58bf9a183f8a955283e7ce93339feddf4ef Mon Sep 17 00:00:00 2001 From: Belinda Trotta <73675905+btrotta-bom@users.noreply.github.com> Date: Fri, 13 Oct 2023 20:01:02 +1100 Subject: [PATCH] Add aggregation CLI (#1952) * add aggregation functionality * Handle case where std_dev is undefined * all dimensions to be a string * update cli and acceptance tests * Undo testing change * formatting * reverse testing change * update * change parameter name * fix docstring * Check dimension exists * Refactor to use collapse-realizations cli * Remove old file * Handle nans * fix bug * Remove check * simplify code * Check that realization is a dimension in cli * fix docstring * fix docstring * Add test for invalid method error * Add acceptance test for when new name not specified * formatting * update docstring * update docstring * update docstring * Use pytest * simplify code * formatting * update checksum * Remove argument. --------- Co-authored-by: Belinda Trotta Co-authored-by: Gavin Evans --- improver/cli/collapse_realizations.py | 71 ++++++++++++ improver/utilities/cube_manipulation.py | 34 +++++- improver_tests/acceptance/SHA256SUMS | 4 + .../acceptance/test_collapse_realizations.py | 103 ++++++++++++++++++ .../test_collapse_realizations.py | 69 ++++++++++-- 5 files changed, 267 insertions(+), 14 deletions(-) create mode 100644 improver/cli/collapse_realizations.py create mode 100644 improver_tests/acceptance/test_collapse_realizations.py diff --git a/improver/cli/collapse_realizations.py b/improver/cli/collapse_realizations.py new file mode 100644 index 0000000000..0c3f1e8c47 --- /dev/null +++ b/improver/cli/collapse_realizations.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------------- +# (C) British Crown copyright. The Met Office. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +"""Script to collapse the realizations dimension of a cube.""" + + +from improver import cli + + +@cli.clizefy +@cli.with_output +def process( + cube: cli.inputcube, *, method: str = "mean", new_name: str = None, +): + """Collapse the realization dimension of a cube. + + Args: + cube (iris.cube.Cube): + Cube to be collapsed. + method (str): + One of "sum", "mean", "median", "std_dev", "min", "max". + new_name (str): + New name for output cube; if None use iris default. + + Returns: + iris.cube.Cube: + Collapsed cube. Dimensions are the same as input cube, + without realization dimension. + + Raises: + ValueError: if realization is not a dimension coordinate. + """ + + from improver.utilities.cube_manipulation import collapse_realizations + + if not (cube.coords("realization", dim_coords=True)): + raise ValueError("realization must be a dimension coordinate.") + + collapsed_cube = collapse_realizations(cube, method=method) + if new_name: + collapsed_cube.rename(new_name) + + return collapsed_cube diff --git a/improver/utilities/cube_manipulation.py b/improver/utilities/cube_manipulation.py index b14d0c928e..089166f828 100644 --- a/improver/utilities/cube_manipulation.py +++ b/improver/utilities/cube_manipulation.py @@ -81,18 +81,44 @@ def collapsed(cube: Cube, *args: Any, **kwargs: Any) -> Cube: return new_cube -def collapse_realizations(cube: Cube) -> Cube: +def collapse_realizations(cube: Cube, method="mean") -> Cube: """Collapses the realization coord of a cube and strips the coord from the cube. Args: cube: - Input cube - + Cube to be aggregated. + method: + One of "sum", "mean", "median", "std_dev", "min", "max"; + default is "mean". Returns: Cube with realization coord collapsed and removed. """ - returned_cube = collapsed(cube, "realization", iris.analysis.MEAN) + + aggregator_dict = { + "sum": iris.analysis.SUM, + "mean": iris.analysis.MEAN, + "median": iris.analysis.MEDIAN, + "std_dev": iris.analysis.STD_DEV, + "min": iris.analysis.MIN, + "max": iris.analysis.MAX, + } + + aggregator = aggregator_dict.get(method) + if aggregator is None: + raise ValueError(f"method must be one of {list(aggregator_dict.keys())}") + + returned_cube = collapsed(cube, "realization", aggregator) returned_cube.remove_coord("realization") + + if ( + (method == "std_dev") + and (len(cube.coord("realization").points) == 1) + and (np.ma.is_masked(returned_cube.data)) + ): + # Standard deviation is undefined. Iris masks the entire output, + # but we also set the underlying data to np.nan here. + returned_cube.data.data[:] = np.nan + return returned_cube diff --git a/improver_tests/acceptance/SHA256SUMS b/improver_tests/acceptance/SHA256SUMS index 8516326944..28127741c1 100644 --- a/improver_tests/acceptance/SHA256SUMS +++ b/improver_tests/acceptance/SHA256SUMS @@ -149,6 +149,10 @@ d0053b55f2abaeeefa3d5d58df597bbaf5b07a2bfbb614c1ff404fb695700d2c ./cloud-conden 5788b1228fbd7cce6c92a2ff6448c4f14d78d868bb45743a816f728b70c6b47d ./cloud-top-temperature/temperature_on_pressure_levels.nc d350e329f47d18c819f75a91a0168dff0c8cce404df2e3992e41c4b78925699b ./cloud-top-temperature/with_id_attr/kgo.nc 2c4064d3d42eedcffda85872b00fabb42b23f71ecb007e5be0b046ef93b0e9fc ./cloud-top-temperature/without_id_attr/kgo.nc +966663c18804e6332f74682686d207ddd80e5c30dbf7f3ace785e1df44c9c673 ./collapse-realizations/input.nc +e4250f25f7e770d00d863e6d5d3e8921c79e3609b68566637bda0f5ba0148b12 ./collapse-realizations/input_no_realization.nc +c21806e891dafce6fd42ea1bd420a3d6197b676f6f7a1c2bcec1c805fcba8d59 ./collapse-realizations/kgo_mean.nc +2ca7400809cb8f306acc8803bb4142ed5bb754931f120229d67b01bf00c130a7 ./collapse-realizations/kgo_no_rename.nc 1ea1bcc88472251ac904f609e2cd195d25e61941bc3ac8d535952a872bd84b6c ./combine/accum/20180101T0100Z-PT0001H-rainfall_accumulation.nc acd51f2a3ec308acfdc5591e86b904eb1a74835f41197d2f83513dc436c00d60 ./combine/accum/20180101T0200Z-PT0002H-rainfall_accumulation.nc 2e53d150fe22608970e967e56fb07c558948e0c2bf279f4e0f677b2d07a54cf0 ./combine/accum/20180101T0300Z-PT0003H-rainfall_accumulation.nc diff --git a/improver_tests/acceptance/test_collapse_realizations.py b/improver_tests/acceptance/test_collapse_realizations.py new file mode 100644 index 0000000000..8eadf31300 --- /dev/null +++ b/improver_tests/acceptance/test_collapse_realizations.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------------- +# (C) British Crown copyright. The Met Office. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +"""Tests for the collapse-realizations CLI""" + + +import pytest + +from . import acceptance as acc + +pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing] +CLI = acc.cli_name_with_dashes(__file__) +run_cli = acc.run_cli(CLI) + + +def test_basic(tmp_path): + """ + Test mean aggregation. + """ + + kgo_dir = acc.kgo_root() / "collapse-realizations" + kgo_path = kgo_dir / "kgo_mean.nc" + input_path = kgo_dir / "input.nc" + output_path = tmp_path / "output.nc" + args = [ + input_path, + "--method", + "mean", + "--new-name", + "ensemble_mean_of_air_temperature", + "--output", + output_path, + ] + run_cli(args) + acc.compare(output_path, kgo_path) + + +def test_no_rename(tmp_path): + """ + Test mean aggregation with new-name unspecified. + """ + + kgo_dir = acc.kgo_root() / "collapse-realizations" + kgo_path = kgo_dir / "kgo_no_rename.nc" + input_path = kgo_dir / "input.nc" + output_path = tmp_path / "output.nc" + args = [ + input_path, + "--method", + "mean", + "--output", + output_path, + ] + run_cli(args) + acc.compare(output_path, kgo_path) + + +def test_no_realization_coord(tmp_path): + """ + Test that an error is raised if there is no realization dimension. + """ + + kgo_dir = acc.kgo_root() / "collapse-realizations" + input_path = kgo_dir / "input_no_realization.nc" + output_path = tmp_path / "output.nc" + args = [ + input_path, + "--method", + "mean", + "--new-name", + "ensemble_mean_of_air_temperature", + "--output", + output_path, + ] + with pytest.raises(ValueError): + run_cli(args) diff --git a/improver_tests/utilities/cube_manipulation/test_collapse_realizations.py b/improver_tests/utilities/cube_manipulation/test_collapse_realizations.py index f62e0a182d..4bfd542e68 100644 --- a/improver_tests/utilities/cube_manipulation/test_collapse_realizations.py +++ b/improver_tests/utilities/cube_manipulation/test_collapse_realizations.py @@ -29,21 +29,70 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. """ -Unit tests for the function collapsed. +Unit tests for the function collapse_realizations. """ +import iris import numpy as np +import pytest +from iris.exceptions import CoordinateCollapseError from improver.synthetic_data.set_up_test_cubes import set_up_variable_cube from improver.utilities.cube_manipulation import collapse_realizations -def test_basic(): - """Test that a collapsed cube is returned with no realization coord""" - data = np.full((3, 3, 3), fill_value=281.0, dtype=np.float32) - cube = set_up_variable_cube(data, realizations=[0, 1, 2]) - result = collapse_realizations(cube) - assert "realization" not in [ - coord.name() for coord in result.dim_coords + result.aux_coords - ] - assert (result.data == np.full((3, 3), fill_value=281.0, dtype=np.float32)).all() +@pytest.fixture +def temperature_cube(): + data = 281 * np.random.random_sample((3, 3, 3)).astype(np.float32) + return set_up_variable_cube(data, realizations=[0, 1, 2]) + + +def test_basic(temperature_cube): + """Test that a collapsed cube is returned with no realization coord.""" + result = collapse_realizations(temperature_cube) + assert "realization" not in [coord.name() for coord in result.coords()] + expected = temperature_cube.collapsed(["realization"], iris.analysis.MEAN) + np.testing.assert_allclose(result.data, expected.data) + + +def test_invalid_dimension(temperature_cube): + """Test that an error is raised if realization dimension + does not exist.""" + sub_cube = temperature_cube.extract(iris.Constraint(realization=0)) + with pytest.raises(CoordinateCollapseError): + collapse_realizations(sub_cube, "mean") + + +def test_different_aggregators(temperature_cube): + """Test aggregators other than mean.""" + aggregator_dict = { + "sum": iris.analysis.SUM, + "median": iris.analysis.MEDIAN, + "std_dev": iris.analysis.STD_DEV, + "min": iris.analysis.MIN, + "max": iris.analysis.MAX, + } + for key, value in aggregator_dict.items(): + result = collapse_realizations(temperature_cube, key) + expected = temperature_cube.collapsed(["realization"], value) + np.testing.assert_allclose(result.data, expected.data) + + +def test_invalid_aggregators(temperature_cube): + """Test that an error is raised if aggregator is not + one of the allowed types.""" + + msg = "method must be one of" + with pytest.raises(ValueError, match=msg): + collapse_realizations(temperature_cube, method="product") + + +def test_1d_std_dev(): + """Test that when std_dev is calculated over a dimension of size 1, + output is all masked and underlying value is np.nan. + """ + data = 281 * np.random.random_sample((1, 3, 3)).astype(np.float32) + cube_1d = set_up_variable_cube(data, realizations=[0]) + result = collapse_realizations(cube_1d, "std_dev") + assert np.all(np.ma.getmask(result.data)) + assert np.all(np.isnan(result.data.data))