From df91968f9da3f2d545a8939057cf8a575dc92a3e Mon Sep 17 00:00:00 2001 From: "Alyssa J. Sockol" <49569746+ajsockol@users.noreply.github.com> Date: Thu, 25 Jul 2024 14:55:41 -0500 Subject: [PATCH] Adding in new function (#845) * Adding convert 2d to 1d function * Resolving commits from pull request * Adding unit tests for convert_2d_to_1d * Updating unit tests * adding updates * pushing after installing precommit * adding fix so my pr will work * adding function to init file --- act/utils/__init__.py | 1 + act/utils/data_utils.py | 111 +++++++++++++++++++++++++++++++++ tests/qc/test_arm_qc.py | 2 +- tests/utils/test_data_utils.py | 48 ++++++++++++++ 4 files changed, 161 insertions(+), 1 deletion(-) diff --git a/act/utils/__init__.py b/act/utils/__init__.py index 0fa1a75764..2f31e9755a 100644 --- a/act/utils/__init__.py +++ b/act/utils/__init__.py @@ -33,6 +33,7 @@ 'arm_site_location_search', 'DatastreamParserARM', 'calculate_percentages', + 'convert_2d_to_1d', ], 'datetime_utils': [ 'dates_between', diff --git a/act/utils/data_utils.py b/act/utils/data_utils.py index cebb7a5fea..d8c98d10b2 100644 --- a/act/utils/data_utils.py +++ b/act/utils/data_utils.py @@ -1410,3 +1410,114 @@ def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None percentages[i] = j ds_percent.close() return percentages + + +def convert_2d_to_1d( + ds, + parse=None, + variables=None, + keep_name_if_one=False, + use_dim_value_in_name=False, + dim_labels=None, +): + """ + Function to convert a single 2D variable into multiple 1D + variables using the second dimension in the new variable name. + + Parameters + ---------- + ds: xarray.dataset + Object containing 2D variable to be converted + parse: str or None + Coordinate dimension name to parse along. If set to None will + guess the non-time dimension is the parse dimension. + variables: str or list of str + Variable name or names to parse. If not provided will attempt to + parse all two dimensional variables with the parse coordinate + dimension. + keep_name_if_one: boolean + Option to not modify the variable name if the coordinate dimension + has only one value. Essentially converting a 2D (i.e. (100,1) + variable into a 1D variable (i.e. (100)). + use_dim_value_in_name: boolean + Option to use value from the coordinate dimension in new variable + name instead of indexing number. Will use the value prepended + to the units of the dimension. + dim_labels: str or list of str + Allows for use of custom label to append to end of variable names + + Returns + ------- + A new object copied from input object with the multi-dimensional + variable split into multiple single-dimensional variables. + + Example + ------- + # This will get the name of the coordinate dimension so it does not need to + # be hard coded. + >>> parse_dim = (list(set(list(ds.dims)) - set(['time'])))[0] + + # Now use the parse_dim name to parse the variable and return new object. + >>> new_ds = convert_2d_to_1d(ds, parse=parse_dim) + + """ + # If no parse dimension name given assume it is the one not equal to 'time' + if parse is None: + parse = (list(set(list(ds.dims)) - {'time'}))[0] + + new_ds = ds.copy() + + if variables is not None and isinstance(variables, str): + variables = [variables] + + if variables is None: + variables = list(new_ds.variables) + + if dim_labels is not None and isinstance(dim_labels, (str,)): + dim_labels = [dim_labels] + + # Check if we want to keep the names the same if the second dimension + # is of size one. + num_dims = 1 + if keep_name_if_one: + num_dims = 2 + + parse_values = ds[parse].values + for var in variables: + if var == parse: + continue + # Check if the parse dimension is in the dimension tuple + if parse in new_ds[var].dims: + if len(new_ds[parse]) >= num_dims: + for i in range(0, new_ds.sizes[parse]): + if dim_labels is not None: + new_var_name = '_'.join([var, dim_labels[i]]) + elif use_dim_value_in_name: + level = str(parse_values[i]) + ds[parse].attrs['units'] + new_var_name = '_'.join([var, parse, level]) + else: + new_var_name = '_'.join([var, parse, str(i)]) + new_var = new_ds[var].copy() + new_ds[new_var_name] = new_var.isel(indexers={parse: i}) + + try: + ancillary_variables = new_ds[new_var_name].attrs['ancillary_variables'] + current_qc_var_name = ds.qcfilter.check_for_ancillary_qc( + var, add_if_missing=False + ) + if current_qc_var_name is not None: + ancillary_variables = ancillary_variables.replace( + current_qc_var_name, 'qc_' + new_var_name + ) + new_ds[new_var_name].attrs['ancillary_variables'] = ancillary_variables + except KeyError: + pass + + # Remove the old 2D variable after extracting + del new_ds[var] + + else: + # Keep the same name but remove the dimension equal to size 1 + new_ds[var] = new_ds[var].squeeze(dim=parse) + + return new_ds diff --git a/tests/qc/test_arm_qc.py b/tests/qc/test_arm_qc.py index e118648706..ec623c51b2 100644 --- a/tests/qc/test_arm_qc.py +++ b/tests/qc/test_arm_qc.py @@ -12,7 +12,7 @@ def test_scalar_dqr(): # DQR webservice does go down, so ensure it # properly runs first before testing try: - ds = add_dqr_to_qc(ds) + ds = add_dqr_to_qc(ds, assessment='Reprocessed,Suspect,Incorrect') ran = True except ValueError: ran = False diff --git a/tests/utils/test_data_utils.py b/tests/utils/test_data_utils.py index f92fe14457..0a626e135f 100644 --- a/tests/utils/test_data_utils.py +++ b/tests/utils/test_data_utils.py @@ -10,6 +10,7 @@ import act from act.utils.data_utils import DatastreamParserARM as DatastreamParser +from act.utils.data_utils import convert_2d_to_1d spec = importlib.util.find_spec('pyart') if spec is not None: @@ -557,3 +558,50 @@ def test_calculate_percentages(): assert np.round(percentages["chloride"], 3) == 0.915 if not record: pytest.fail("Expected a warning for using all times.") + + +def test_convert_2d_to_1d(): + # Create a sample dataset + data = np.array([[1, 2], [3, 4], [5, 6]]) + ds = xr.Dataset( + {'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10, 20]} + ) + ds['level'].attrs['units'] = 'm' + + # Run the function + result = convert_2d_to_1d(ds, parse='level') + + # Check the results + assert 'var_level_0' in result + assert 'var_level_1' in result + np.testing.assert_array_equal(result['var_level_0'].values, [1, 3, 5]) + np.testing.assert_array_equal(result['var_level_1'].values, [2, 4, 6]) + + # Run the function with use_dim_value_in_name=True + result = convert_2d_to_1d(ds, parse='level', use_dim_value_in_name=True) + + # Check the results + assert 'var_level_10m' in result + assert 'var_level_20m' in result + np.testing.assert_array_equal(result['var_level_10m'].values, [1, 3, 5]) + np.testing.assert_array_equal(result['var_level_20m'].values, [2, 4, 6]) + + # Run the function with custom labels + result = convert_2d_to_1d(ds, parse='level', dim_labels=['low', 'high']) + + # Check the results + assert 'var_low' in result + assert 'var_high' in result + np.testing.assert_array_equal(result['var_low'].values, [1, 3, 5]) + np.testing.assert_array_equal(result['var_high'].values, [2, 4, 6]) + + # Create a sample dataset + data = np.array([[1], [3], [5]]) + ds = xr.Dataset({'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10]}) + + # Run the function with keep_name_if_one=True + result = convert_2d_to_1d(ds, parse='level', keep_name_if_one=True) + + # Check the results + assert 'var' in result + np.testing.assert_array_equal(result['var'].values, [1, 3, 5])