Skip to content

Commit

Permalink
Adding in new function (ARM-DOE#845)
Browse files Browse the repository at this point in the history
* Adding convert 2d to 1d function

* Resolving commits from pull request

* Adding unit tests for convert_2d_to_1d

* Updating unit tests

* adding updates

* pushing after installing precommit

* adding fix so my pr will work

* adding function to init file
  • Loading branch information
ajsockol authored Jul 25, 2024
1 parent fe18180 commit df91968
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 1 deletion.
1 change: 1 addition & 0 deletions act/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
'arm_site_location_search',
'DatastreamParserARM',
'calculate_percentages',
'convert_2d_to_1d',
],
'datetime_utils': [
'dates_between',
Expand Down
111 changes: 111 additions & 0 deletions act/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1410,3 +1410,114 @@ def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None
percentages[i] = j
ds_percent.close()
return percentages


def convert_2d_to_1d(
ds,
parse=None,
variables=None,
keep_name_if_one=False,
use_dim_value_in_name=False,
dim_labels=None,
):
"""
Function to convert a single 2D variable into multiple 1D
variables using the second dimension in the new variable name.
Parameters
----------
ds: xarray.dataset
Object containing 2D variable to be converted
parse: str or None
Coordinate dimension name to parse along. If set to None will
guess the non-time dimension is the parse dimension.
variables: str or list of str
Variable name or names to parse. If not provided will attempt to
parse all two dimensional variables with the parse coordinate
dimension.
keep_name_if_one: boolean
Option to not modify the variable name if the coordinate dimension
has only one value. Essentially converting a 2D (i.e. (100,1)
variable into a 1D variable (i.e. (100)).
use_dim_value_in_name: boolean
Option to use value from the coordinate dimension in new variable
name instead of indexing number. Will use the value prepended
to the units of the dimension.
dim_labels: str or list of str
Allows for use of custom label to append to end of variable names
Returns
-------
A new object copied from input object with the multi-dimensional
variable split into multiple single-dimensional variables.
Example
-------
# This will get the name of the coordinate dimension so it does not need to
# be hard coded.
>>> parse_dim = (list(set(list(ds.dims)) - set(['time'])))[0]
# Now use the parse_dim name to parse the variable and return new object.
>>> new_ds = convert_2d_to_1d(ds, parse=parse_dim)
"""
# If no parse dimension name given assume it is the one not equal to 'time'
if parse is None:
parse = (list(set(list(ds.dims)) - {'time'}))[0]

new_ds = ds.copy()

if variables is not None and isinstance(variables, str):
variables = [variables]

if variables is None:
variables = list(new_ds.variables)

if dim_labels is not None and isinstance(dim_labels, (str,)):
dim_labels = [dim_labels]

# Check if we want to keep the names the same if the second dimension
# is of size one.
num_dims = 1
if keep_name_if_one:
num_dims = 2

parse_values = ds[parse].values
for var in variables:
if var == parse:
continue
# Check if the parse dimension is in the dimension tuple
if parse in new_ds[var].dims:
if len(new_ds[parse]) >= num_dims:
for i in range(0, new_ds.sizes[parse]):
if dim_labels is not None:
new_var_name = '_'.join([var, dim_labels[i]])
elif use_dim_value_in_name:
level = str(parse_values[i]) + ds[parse].attrs['units']
new_var_name = '_'.join([var, parse, level])
else:
new_var_name = '_'.join([var, parse, str(i)])
new_var = new_ds[var].copy()
new_ds[new_var_name] = new_var.isel(indexers={parse: i})

try:
ancillary_variables = new_ds[new_var_name].attrs['ancillary_variables']
current_qc_var_name = ds.qcfilter.check_for_ancillary_qc(
var, add_if_missing=False
)
if current_qc_var_name is not None:
ancillary_variables = ancillary_variables.replace(
current_qc_var_name, 'qc_' + new_var_name
)
new_ds[new_var_name].attrs['ancillary_variables'] = ancillary_variables
except KeyError:
pass

# Remove the old 2D variable after extracting
del new_ds[var]

else:
# Keep the same name but remove the dimension equal to size 1
new_ds[var] = new_ds[var].squeeze(dim=parse)

return new_ds
2 changes: 1 addition & 1 deletion tests/qc/test_arm_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_scalar_dqr():
# DQR webservice does go down, so ensure it
# properly runs first before testing
try:
ds = add_dqr_to_qc(ds)
ds = add_dqr_to_qc(ds, assessment='Reprocessed,Suspect,Incorrect')
ran = True
except ValueError:
ran = False
Expand Down
48 changes: 48 additions & 0 deletions tests/utils/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import act
from act.utils.data_utils import DatastreamParserARM as DatastreamParser
from act.utils.data_utils import convert_2d_to_1d

spec = importlib.util.find_spec('pyart')
if spec is not None:
Expand Down Expand Up @@ -557,3 +558,50 @@ def test_calculate_percentages():
assert np.round(percentages["chloride"], 3) == 0.915
if not record:
pytest.fail("Expected a warning for using all times.")


def test_convert_2d_to_1d():
# Create a sample dataset
data = np.array([[1, 2], [3, 4], [5, 6]])
ds = xr.Dataset(
{'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10, 20]}
)
ds['level'].attrs['units'] = 'm'

# Run the function
result = convert_2d_to_1d(ds, parse='level')

# Check the results
assert 'var_level_0' in result
assert 'var_level_1' in result
np.testing.assert_array_equal(result['var_level_0'].values, [1, 3, 5])
np.testing.assert_array_equal(result['var_level_1'].values, [2, 4, 6])

# Run the function with use_dim_value_in_name=True
result = convert_2d_to_1d(ds, parse='level', use_dim_value_in_name=True)

# Check the results
assert 'var_level_10m' in result
assert 'var_level_20m' in result
np.testing.assert_array_equal(result['var_level_10m'].values, [1, 3, 5])
np.testing.assert_array_equal(result['var_level_20m'].values, [2, 4, 6])

# Run the function with custom labels
result = convert_2d_to_1d(ds, parse='level', dim_labels=['low', 'high'])

# Check the results
assert 'var_low' in result
assert 'var_high' in result
np.testing.assert_array_equal(result['var_low'].values, [1, 3, 5])
np.testing.assert_array_equal(result['var_high'].values, [2, 4, 6])

# Create a sample dataset
data = np.array([[1], [3], [5]])
ds = xr.Dataset({'var': (('time', 'level'), data)}, coords={'time': [0, 1, 2], 'level': [10]})

# Run the function with keep_name_if_one=True
result = convert_2d_to_1d(ds, parse='level', keep_name_if_one=True)

# Check the results
assert 'var' in result
np.testing.assert_array_equal(result['var'].values, [1, 3, 5])

0 comments on commit df91968

Please sign in to comment.