Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding in new function #845

Merged
merged 13 commits into from
Jul 25, 2024
111 changes: 111 additions & 0 deletions act/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1410,3 +1410,114 @@ def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None
percentages[i] = j
ds_percent.close()
return percentages


def convert_2d_to_1d(
ds,
parse=None,
variables=None,
keep_name_if_one=False,
use_dim_value_in_name=False,
dim_labels=None,
):
"""
Function to convert a single 2D variable into multiple 1D
variables using the second dimension in the new variable name.

Parameters
----------
ds: xarray.dataset
Object containing 2D variable to be converted
parse: str or None
Coordinate dimension name to parse along. If set to None will
guess the non-time dimension is the parse dimension.
variables: str or list of str
Variable name or names to parse. If not provided will attempt to
parse all two dimensional variables with the parse coordinate
dimension.
keep_name_if_one: boolean
Option to not modify the variable name if the coordinate dimension
has only one value. Essentially converting a 2D (i.e. (100,1)
variable into a 1D variable (i.e. (100)).
use_dim_value_in_name: boolean
Option to use value from the coordinate dimension in new variable
name instead of indexing number. Will use the value prepended
to the units of the dimension.
dim_labels: str or list of str
Allows for use of custom label to append to end of variable names

ajsockol marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
A new object copied from input object with the multi-dimensional
variable split into multiple single-dimensional variables.

Example
-------
# This will get the name of the coordinate dimension so it does not need to
# be hard coded.
>>> parse_dim = (list(set(list(ds.dims)) - set(['time'])))[0]

# Now use the parse_dim name to parse the variable and return new object.
>>> new_ds = convert_2d_to_1d(ds, parse=parse_dim)

"""
# If no parse dimension name given assume it is the one not equal to 'time'
if parse is None:
parse = (list(set(list(ds.dims)) - {'time'}))[0]

new_ds = ds.copy()

if variables is not None and isinstance(variables, str):
variables = [variables]

if variables is None:
variables = list(new_ds.variables)

if dim_labels is not None and isinstance(dim_labels, (str,)):
dim_labels = [dim_labels]

# Check if we want to keep the names the same if the second dimension
# is of size one.
num_dims = 1
if keep_name_if_one:
num_dims = 2

parse_values = ds[parse].values
for var in variables:
if var == parse:
continue
# Check if the parse dimension is in the dimension tuple
if parse in new_ds[var].dims:
if len(new_ds[parse]) >= num_dims:
for i in range(0, new_ds.sizes[parse]):
if dim_labels is not None:
new_var_name = '_'.join([var, dim_labels[i]])
elif use_dim_value_in_name:
level = str(parse_values[i]) + ds[parse].attrs['units']
new_var_name = '_'.join([var, parse, level])
else:
new_var_name = '_'.join([var, parse, str(i)])
new_var = new_ds[var].copy()
new_ds[new_var_name] = new_var.isel(indexers={parse: i})

try:
ancillary_variables = new_ds[new_var_name].attrs['ancillary_variables']
current_qc_var_name = ds.qcfilter.check_for_ancillary_qc(
var, add_if_missing=False
)
if current_qc_var_name is not None:
ancillary_variables = ancillary_variables.replace(
current_qc_var_name, 'qc_' + new_var_name
)
new_ds[new_var_name].attrs['ancillary_variables'] = ancillary_variables
except KeyError:
pass

# Remove the old 2D variable after extracting
del new_ds[var]

else:
# Keep the same name but remove the dimension equal to size 1
new_ds[var] = new_ds[var].squeeze(dim=parse)

return new_ds
2 changes: 1 addition & 1 deletion tests/qc/test_arm_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_scalar_dqr():
# DQR webservice does go down, so ensure it
# properly runs first before testing
try:
ds = add_dqr_to_qc(ds)
ds = add_dqr_to_qc(ds, assessment='Reprocessed,Suspect,Incorrect')
ran = True
except ValueError:
ran = False
Expand Down
48 changes: 48 additions & 0 deletions tests/utils/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import act
from act.utils.data_utils import DatastreamParserARM as DatastreamParser
from act.utils.data_utils import convert_2d_to_1d

spec = importlib.util.find_spec('pyart')
if spec is not None:
Expand Down Expand Up @@ -557,3 +558,50 @@ def test_calculate_percentages():
assert np.round(percentages["chloride"], 3) == 0.915
if not record:
pytest.fail("Expected a warning for using all times.")


def test_convert_2d_to_1d():
# Create a sample dataset
data = np.array([[1, 2], [3, 4], [5, 6]])
ds = xr.Dataset(
{'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10, 20]}
)
ds['level'].attrs['units'] = 'm'

# Run the function
result = convert_2d_to_1d(ds, parse='level')

# Check the results
assert 'var_level_0' in result
assert 'var_level_1' in result
np.testing.assert_array_equal(result['var_level_0'].values, [1, 3, 5])
np.testing.assert_array_equal(result['var_level_1'].values, [2, 4, 6])

# Run the function with use_dim_value_in_name=True
result = convert_2d_to_1d(ds, parse='level', use_dim_value_in_name=True)

# Check the results
assert 'var_level_10m' in result
assert 'var_level_20m' in result
np.testing.assert_array_equal(result['var_level_10m'].values, [1, 3, 5])
np.testing.assert_array_equal(result['var_level_20m'].values, [2, 4, 6])

# Run the function with custom labels
result = convert_2d_to_1d(ds, parse='level', dim_labels=['low', 'high'])

# Check the results
assert 'var_low' in result
assert 'var_high' in result
np.testing.assert_array_equal(result['var_low'].values, [1, 3, 5])
np.testing.assert_array_equal(result['var_high'].values, [2, 4, 6])

# Create a sample dataset
data = np.array([[1], [3], [5]])
ds = xr.Dataset({'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10]})

# Run the function with keep_name_if_one=True
result = convert_2d_to_1d(ds, parse='level', keep_name_if_one=True)

# Check the results
assert 'var' in result
np.testing.assert_array_equal(result['var'].values, [1, 3, 5])