From f34055896264bf3664e57b070fa3217e25045e8b Mon Sep 17 00:00:00 2001 From: "benjamin.ayliffe" Date: Wed, 11 Dec 2024 11:17:29 +0000 Subject: [PATCH] Backport release changes for wx modal derivation into master. These changes primarily aimed at including the cloud component from wet symbols in dry day symbolderivation. --- improver/categorical/modal_code.py | 48 +++++++++- improver/categorical/utilities.py | 19 ++++ .../acceptance/test_weather_symbol_modes.py | 4 +- .../categorical/decision_tree/__init__.py | 37 +++++--- .../decision_tree/test_ModalFromGroupings.py | 95 ++++++++++++++++--- 5 files changed, 172 insertions(+), 31 deletions(-) diff --git a/improver/categorical/modal_code.py b/improver/categorical/modal_code.py index d13a36232b..9427bf9a1e 100644 --- a/improver/categorical/modal_code.py +++ b/improver/categorical/modal_code.py @@ -23,7 +23,7 @@ from improver.utilities.cube_manipulation import MergeCubes from ..metadata.forecast_times import forecast_period_coord -from .utilities import day_night_map +from .utilities import day_night_map, dry_map class BaseModalCategory(BasePlugin): @@ -358,6 +358,12 @@ class ModalFromGroupings(BaseModalCategory): Where there are different categories available for night and day, the modal code returned is always a day code, regardless of the times covered by the input files. + + If a location is to return a dry code after consideration of the various + weightings, the wet codes for that location are converted into the best + matching dry cloud code and these are included in determining the resulting + dry code. The wet bias has no impact on the weight of these converted wet + codes, but the day weighting still applies. """ # Day length set to aid testing. @@ -420,6 +426,8 @@ def __init__( constructing the categories. """ super().__init__(decision_tree) + self.dry_map = dry_map(self.decision_tree) + self.broad_categories = broad_categories self.wet_categories = wet_categories self.intensity_categories = intensity_categories @@ -744,6 +752,35 @@ def _set_blended_times(cube: Cube, result: Cube) -> None: ) result.replace_coord(new_coord) + def _get_dry_equivalents( + self, cube: Cube, dry_indices: np.ndarray, time_axis + ) -> Cube: + """ + Returns a cube with only dry codes in which all wet codes have + been replaced by their nearest dry cloud equivalent. For example a + shower code is replaced with a partly cloudy code, a light rain code + is replaced with a cloud code, and a heavy rain code is replaced with + an overcast cloud code. + + Args: + cube: Weather code cube. + dry_indices: An array of bools which are true for locations where + the summary weather code will be dry. + + Returns: + cube: Wet codes converted to their dry equivalent for those points + that will receive a dry summary weather code. + """ + dry_cube = cube.copy() + for value, target in self.dry_map.items(): + dry_cube.data = np.where(cube.data == value, target, dry_cube.data) + + original = np.rollaxis(cube.data, time_axis) + dried = np.rollaxis(dry_cube.data, time_axis) + original[..., dry_indices] = dried[..., dry_indices] + + return cube + def process(self, cubes: CubeList) -> Cube: """Calculate the modal categorical code by grouping weather codes. @@ -767,14 +804,19 @@ def process(self, cubes: CubeList) -> Cube: if len(cube.coord("time").points) == 1: result = cube else: - original_cube = self._emphasise_day_period(cube.copy()) - cube = self._consolidate_intensity_categories(cube) cube = self._emphasise_day_period(cube) result = cube[0].copy() (time_axis,) = cube.coord_dims("time") wet_indices = self._find_wet_indices(cube, time_axis) + + # For dry locations convert the wet codes to their equivalent dry + # codes for use in determining the summary symbol. + cube = self._get_dry_equivalents(cube, ~wet_indices, time_axis) + + original_cube = cube.copy() + cube = self._consolidate_intensity_categories(cube) result = self._find_most_significant_dry_code(cube, result, ~wet_indices) result = self._get_most_likely_following_grouping( diff --git a/improver/categorical/utilities.py b/improver/categorical/utilities.py index 72532a3b86..9b6393c127 100644 --- a/improver/categorical/utilities.py +++ b/improver/categorical/utilities.py @@ -576,3 +576,22 @@ def day_night_map(decision_tree: Dict[str, Dict[str, Union[str, List]]]) -> Dict for k, v in decision_tree.items() if "if_night" in v.keys() } + + +def dry_map(decision_tree: Dict[str, Dict[str, Union[str, List]]]) -> Dict: + """Returns a dict showing which dry values are linked to which wet values. + This is used to produce cloud contributions from wet codes when determining + a dry summary symbol. + + Args: + decision_tree: + Decision tree definition, provided as a dictionary. + + Returns: + dict showing which dry categories (values) are linked to which wet categories (keys) + """ + return { + v["leaf"]: v["dry_equivalent"] + for v in decision_tree.values() + if "dry_equivalent" in v.keys() + } diff --git a/improver_tests/acceptance/test_weather_symbol_modes.py b/improver_tests/acceptance/test_weather_symbol_modes.py index 2d3723ae9d..44290b0ad2 100644 --- a/improver_tests/acceptance/test_weather_symbol_modes.py +++ b/improver_tests/acceptance/test_weather_symbol_modes.py @@ -44,7 +44,7 @@ def test_expected(tmp_path, test_path): intensity_categories = ( acc.kgo_root() / "weather-symbol-modes" / "intensity_categories.json" ) - wxtree = acc.kgo_root() / "categorical-modes" / "wx_decision_tree.json" + wxtree = acc.kgo_root() / "weather-symbol-modes" / "wx_decision_tree.json" output_path = tmp_path / "output.nc" args = [ *input_paths, @@ -74,7 +74,7 @@ def test_no_input(tmp_path): intensity_categories = ( acc.kgo_root() / "weather-symbol-modes" / "intensity_categories.json" ) - wxtree = acc.kgo_root() / "categorical-modes" / "wx_decision_tree.json" + wxtree = acc.kgo_root() / "weather-symbol-modes" / "wx_decision_tree.json" output_path = tmp_path / "output.nc" args = [ "--decision-tree", diff --git a/improver_tests/categorical/decision_tree/__init__.py b/improver_tests/categorical/decision_tree/__init__.py index 769d37065c..b2c4af2232 100644 --- a/improver_tests/categorical/decision_tree/__init__.py +++ b/improver_tests/categorical/decision_tree/__init__.py @@ -488,56 +488,63 @@ def wxcode_decision_tree(accumulation: bool = False) -> Dict[str, Dict[str, Any] "Fog": {"leaf": 6, "group": "visibility"}, "Cloudy": {"leaf": 7}, "Overcast": {"leaf": 8}, - "Light_Shower_Night": {"leaf": 9}, + "Light_Shower_Night": {"leaf": 9, "dry_equivalent": 2}, "Light_Shower_Day": { "leaf": 10, "if_night": "Light_Shower_Night", "group": "rain", + "dry_equivalent": 3, }, - "Drizzle": {"leaf": 11, "group": "rain"}, - "Light_Rain": {"leaf": 12, "group": "rain"}, - "Heavy_Shower_Night": {"leaf": 13}, + "Drizzle": {"leaf": 11, "group": "rain", "dry_equivalent": 8}, + "Light_Rain": {"leaf": 12, "group": "rain", "dry_equivalent": 8}, + "Heavy_Shower_Night": {"leaf": 13, "dry_equivalent": 2}, "Heavy_Shower_Day": { "leaf": 14, "if_night": "Heavy_Shower_Night", "group": "rain", + "dry_equivalent": 3, }, - "Heavy_Rain": {"leaf": 15, "group": "rain"}, - "Sleet_Shower_Night": {"leaf": 16}, + "Heavy_Rain": {"leaf": 15, "group": "rain", "dry_equivalent": 8}, + "Sleet_Shower_Night": {"leaf": 16, "dry_equivalent": 2}, "Sleet_Shower_Day": { "leaf": 17, "if_night": "Sleet_Shower_Night", "group": "sleet", + "dry_equivalent": 3, }, - "Sleet": {"leaf": 18, "group": "sleet"}, - "Hail_Shower_Night": {"leaf": 19}, + "Sleet": {"leaf": 18, "group": "sleet", "dry_equivalent": 8}, + "Hail_Shower_Night": {"leaf": 19, "dry_equivalent": 2}, "Hail_Shower_Day": { "leaf": 20, "if_night": "Hail_Shower_Night", "group": "convection", + "dry_equivalent": 3, }, - "Hail": {"leaf": 21, "group": "convection"}, - "Light_Snow_Shower_Night": {"leaf": 22}, + "Hail": {"leaf": 21, "group": "convection", "dry_equivalent": 8}, + "Light_Snow_Shower_Night": {"leaf": 22, "dry_equivalent": 2}, "Light_Snow_Shower_Day": { "leaf": 23, "if_night": "Light_Snow_Shower_Night", "group": "snow", + "dry_equivalent": 3, }, - "Light_Snow": {"leaf": 24, "group": "snow"}, - "Heavy_Snow_Shower_Night": {"leaf": 25}, + "Light_Snow": {"leaf": 24, "group": "snow", "dry_equivalent": 8}, + "Heavy_Snow_Shower_Night": {"leaf": 25, "dry_equivalent": 2}, "Heavy_Snow_Shower_Day": { "leaf": 26, "if_night": "Heavy_Snow_Shower_Night", "group": "snow", + "dry_equivalent": 3, }, - "Heavy_Snow": {"leaf": 27, "group": "snow"}, - "Thunder_Shower_Night": {"leaf": 28}, + "Heavy_Snow": {"leaf": 27, "group": "snow", "dry_equivalent": 8}, + "Thunder_Shower_Night": {"leaf": 28, "dry_equivalent": 2}, "Thunder_Shower_Day": { "leaf": 29, "if_night": "Thunder_Shower_Night", "group": "convection", + "dry_equivalent": 3, }, - "Thunder": {"leaf": 30, "group": "convection"}, + "Thunder": {"leaf": 30, "group": "convection", "dry_equivalent": 8}, } if accumulation: diff --git a/improver_tests/categorical/decision_tree/test_ModalFromGroupings.py b/improver_tests/categorical/decision_tree/test_ModalFromGroupings.py index d975b429cc..4876f60cc5 100644 --- a/improver_tests/categorical/decision_tree/test_ModalFromGroupings.py +++ b/improver_tests/categorical/decision_tree/test_ModalFromGroupings.py @@ -36,6 +36,9 @@ "snow_shower": [26, 23], "snow": [27, 24], "thunder": [30, 29], + "cloud": [7, 8], + "sun": [3, 1], + "vis": [5, 6], } @@ -71,9 +74,10 @@ # significant dry code is selected (8). ([5, 5, 5, 5, 6, 6, 6, 6, 8, 8, 8, 8, 7, 7, 7, 7], 8), # An extreme edge case in which all the codes across time for a site - # are different. More dry symbols are present, so the most - # significant dry code is selected (8). - ([1, 3, 4, 5, 7, 8, 10, 17, 20, 23], 8), + # are different. More dry symbols are present, so we get a dry code. + # The wet symbols are translated to their cloud equivalents, all + # partly cloud in this case, so this symbol ends up dominating (3). + ([1, 3, 4, 5, 7, 8, 10, 17, 20, 23], 3), # Equal numbers of dry and wet symbols leads to a wet symbol being chosen. # Code 23 and 17 are both frozen precipitation, so are grouped together, # and the most significant of these is chosen based on the order of the codes @@ -91,9 +95,10 @@ # More dry codes than wet codes. Most common code (2, partly cloudy night) # should be converted to a day symbol. ([2, 2, 2, 0, 0, 2, 10, 10, 11, 12, 13], 3), - # More dry codes than wet codes. Most common code (0, clear night) + # More dry codes than wet codes. Wet code cloud equivalents are partly + # cloudy, so that comes to dominate in its day form (3). # should be converted to a day symbol. - ([0, 0, 0, 2, 2, 0, 10, 10, 11, 12, 13], 1), + ([0, 0, 0, 2, 2, 0, 10, 10, 11, 12, 13], 3), # Two locations with different modal dry codes. ([[3, 3, 3, 4, 5, 5], [3, 3, 4, 4, 4, 5]], [3, 4]), # Four locations with different modal dry codes. @@ -110,6 +115,17 @@ # should be selected i.e. a partly cloudy night code (2) becomes a partly # cloudy day code (3). ([0, 0, 0, 2, 2, 2, 7, 7], 3), + # A dry dominated set of codes, but one shower code is transformed to its + # daytime partly cloudy equivalent and considered in determining the dominant + # dry code, which as a result ends up as partly cloud (3). + ([1, 1, 1, 2, 2, 9], 3), + # Dry dominated, 3 sunshine codes, 1 overcast, and 2 light rain. The + # overcast cloud cover in the light rain codes is included in determining the + # dominant dry code, leading to an overcast symbol overall (8). + ([1, 1, 1, 8, 12, 12], 8), + # Dry dominated and after cloud equivalence (drizzle becomes overcast (8)). + # All codes are unique dry codes, so the most significant is selected (8). + ([1, 3, 4, 5, 7, 11], 8), ), ) def test_expected_values(wxcode_series, expected): @@ -131,21 +147,25 @@ def test_expected_values(wxcode_series, expected): @pytest.mark.parametrize( "data, wet_bias, expected, reverse_wet_values, reverse_wet_keys", ( - # More dry codes (6) than wet codes (4), the most significant dry symbol - # is selected. - ([1, 3, 4, 5, 7, 8, 10, 10, 10, 10], 1, 8, False, False), # A wet bias of 2 means that at least 1/(1+2) * 10 = 3.33 codes must be wet # in order to produce a wet code. As 4 codes are wet, a wet code is produced. ([1, 3, 4, 5, 7, 8, 10, 10, 10, 10], 2, 10, False, False), # More dry codes (7) than wet codes (3),the most significant dry symbol - # is selected. - ([1, 3, 4, 5, 7, 8, 8, 10, 10, 10], 1, 8, False, False), + # is selected after cloud equivalence, which become partly cloudy (3). + ([1, 3, 4, 5, 7, 8, 8, 10, 10, 10], 1, 3, False, False), # A wet bias of 2 means that at least 1/(1+2) * 10 = 3.33 codes must be wet # in order to produce a wet code. As 3 codes are wet, a dry code is produced. - ([1, 3, 4, 5, 7, 8, 8, 10, 10, 10], 2, 8, False, False), + ([1, 3, 4, 5, 7, 8, 8, 10, 10, 10], 2, 3, False, False), # A wet bias of 3 means that at least 1/(1+3) * 10 = 2.5 codes must be wet # in order to produce a wet code. As 3 codes are wet, a wet code is produced. ([1, 3, 4, 5, 7, 8, 8, 10, 10, 10], 3, 10, False, False), + # A wet bias of 2 should have no impact on the chosen dry code if one is + # chosen. In this case cloudy conditions dominate the dry codes, and the + # cloud equivalents to the showers are partly cloudy. If the wet bias were + # multiplying up the wet code cloud equivalents we would expect (3) to + # be the resulting dry symbol (5x3), but instead we end up tied 3x3 and 3x7, + # so the more significant (7) code results. This is what we want. + ([7, 7, 7, 1, 3, 10, 10], 2, 7, False, False), # A wet bias of 2 means that at least 1/(1+2) * 10 = 3.33 codes must be wet # in order to produce a wet code. A tie between the wet codes with the # highest index selected. @@ -172,6 +192,12 @@ def test_expected_values(wxcode_series, expected): False, False, ), + # Wet bias does not impact the contribution of wet code dry equivalents + # in determining the overall summary in dry dominated scenarios. Here the + # large wet bias does not lead to a partly cloudy summary code, once the + # shower code is dried it contributes only a single partly cloudy + # code which is insufficient to change the chosen cloudy (7) summary. + ([1, 3, 4, 7, 7, 9], 3, 7, False, False), ), ) def test_expected_values_wet_bias( @@ -211,6 +237,12 @@ def test_expected_values_wet_bias( # For a day length of 9 and a day weighting of 2, the number of clear day codes # doubles with one more shower symbol giving 6 dry codes, and 5 wet codes. ([10, 10, 10, 10, 1, 1, 1, 1, 1], 1, 2, 3, 5, 9, 1), + # Dry with one wet symbol changed to a cloud equivalent (3). This falls in the + # day weighting period, meaning we end up with 5x1 and 5x3, such that the more + # significant weather code (3) will be chosen. This demonstrates that the day + # weighting multiplication of the wet codes does impact the chosen dry code + # when these codes fall in the period of enhanced day weighting. + ([1, 1, 1, 10, 8, 1, 3, 3, 3], 1, 2, 3, 5, 9, 3), # Selecting a different period results in 6 dry codes and 6 wet codes, # so the resulting code is wet. ([10, 10, 10, 10, 10, 1, 1, 1, 1], 1, 2, 4, 7, 9, 10), @@ -279,6 +311,21 @@ def test_expected_values_day_weighting( @pytest.mark.parametrize( "data, ignore_intensity, expected, reverse_intensity_dict", ( + # Dry dominated. Rain contributes overcast conditions. Cloud therefore + # becomes the dry code. The cloud group contains 2 overcast codes and + # 1 cloudy code, so overcast (8) is chosen. + ([1, 1, 1, 7, 12, 12], True, 8, False), + # Dry dominated. Showers contribute partly cloudy conditions meaning + # the sunny/partly cloudy group provides the summary code. Of this + # there are 2 sunny and 2 partly cloud codes, with the latter chosen (3) + # as more significant. + ([1, 1, 10, 14, 8, 8, 8], True, 3, False), + # Dry. Partly cloudy and sunny are grouped by the intensity + # categorisation allowing them to dominate over the overcast + # codes. A partly cloud code is returned. + # cloud cover in the light rain codes is included in determining the + # dominant dry code, leading to an overcast symbol overall (8). + ([1, 1, 1, 3, 3, 8, 8, 8, 8], True, 1, False), # All precipitation is frozen. Sleet shower is the modal code. ([23, 23, 23, 26, 17, 17, 17, 17], False, 17, False), # When snow shower codes are grouped, light snow shower is chosen as it @@ -303,6 +350,10 @@ def test_expected_values_day_weighting( [1, 26], False, ), + # Demonstrate that the visibility category allows low vis to dominate. + ([5, 5, 6, 6, 1, 1, 1], True, 5, False), + # As above but reversed intensity order to yield fog instead of mist. + ([5, 5, 6, 6, 1, 1, 1], True, 6, True), ), ) def test_expected_values_ignore_intensity( @@ -377,6 +428,28 @@ def test_expected_values_ignore_intensity( True, [26, 23], ), + # The day emphasis and dry equivalent codes for wet codes in that period + # conspire to give an overcast (8) code to summarise the day. We include + # emphasis of the dried codes as these still fall in the period that we + # want to emphasise. + ([1, 1, 12, 12, 8, 1, 1, 1], 1, 2, 2, 6, 8, True, 8), + # Day emphasis and drying (dry dominated) are such that we end up with + # 6, 6, 8, 8, 8, 8, 3, 3, 1, 1, 1, 5. The intensity consolidation + # groups the partly cloudy (3) and sunny (1) codes together making this + # the dominant group. Of this group sunny codes dominate and this becomes + # the summary code (1). + ([6, 6, 12, 12, 3, 1, 1, 5], 1, 2, 2, 6, 8, True, 1), + # As above but without the intensity consolidation. The sunny and partly + # sunny remain ungrouped allowing the dried rain, which has become + # overcast codes to dominate. We get an overcast (8) summary. + ([6, 6, 12, 12, 3, 1, 1, 5], 1, 2, 2, 6, 8, False, 8), + # Looking again at the same case but with both a wet bias and a day + # emphasis of 2 we now get a wet dominated day. The codes are + # 6, 6, 12, 12, 12, 12, 3, 3, 1, 1, 1, 5, but wet codes count twice + # in determining the dominant conditions. 8 wet vs 8 dry results in a wet + # summary code. Intensity consolidation amongst the dry codes is + # irrelevant as is the cloud equivalence. + ([6, 6, 12, 12, 3, 1, 1, 5], 2, 2, 2, 6, 8, True, 12), ), ) def test_expected_values_interactions(