Skip to content

Commit

Permalink
Adding convert 2d to 1d function
Browse files Browse the repository at this point in the history
  • Loading branch information
ajsockol authored Jul 17, 2024
1 parent 7d6dce3 commit 0803405
Showing 1 changed file with 109 additions and 0 deletions.
109 changes: 109 additions & 0 deletions act/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1410,3 +1410,112 @@ def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None
percentages[i] = j
ds_percent.close()
return percentages


def convert_2d_to_1d(ds_object, parse=None, variables=None, keep_name_if_one=False,
use_dim_value_in_name=False, dim_labels=None):
"""
Function to convert a single 2D variable into multiple 1D
variables using the second dimension in the new variable name.
...
Parameters
----------
ds_object: xarray.dataset
Object containing 2D variable to be converted
parse: str or None
Coordinate dimension name to parse along. If set to None will
guess the non-time dimension is the parse dimension.
variables: str or list of str
Variable name or names to parse. If not provided will attempt to
parse all two dimensional variables with the parse coordinate
dimension.
keep_name_if_one: boolean
Option to not modify the variable name if the coordinate dimension
has only one value. Essentially converting a 2D (i.e. (100,1)
variable into a 1D variable (i.e. (100)).
use_dim_value_in_name: boolean
Option to use value from the coordinate dimension in new variable
name instead of indexing number. Will use the value prepended
to the units of the dimension.
dim_labels: str or list of str
Allows for use of custom label to append to end of variable names
Returns
-------
A new object copied from input object with the multi-dimensional
variable split into multiple single-dimensional variables.
Example
-------
# This will get the name of the coordinate dimension so it does not need to
# be hard coded.
>>> parse_dim = (list(set(list(ds_object.dims)) - set(['time'])))[0]
# Now use the parse_dim name to parse the variable and return new object.
>>> new_ds_object = convert_2d_to_1d(ds_object, parse=parse_dim)
"""

# If no parse dimension name given assume it is the one not equal to 'time'
if parse is None:
parse = (list(set(list(ds_object.dims)) - set(['time'])))[0]

new_ds_object = ds_object.copy()

if variables is not None and isinstance(variables, str):
variables = [variables]

if variables is None:
variables = list(new_ds_object.variables)

if dim_labels is not None and isinstance(dim_labels, (str, )):
dim_labels = [dim_labels]

# Check if we want to keep the names the same if the second dimension
# is of size one.
num_dims = 1
if keep_name_if_one:
num_dims = 2

parse_values = ds_object[parse].values
for var in variables:
if var == parse:
continue
# Check if the parse dimension is in the dimension tuple
if parse in new_ds_object[var].dims:
if len((new_ds_object[parse])) >= num_dims:
for i in range(0, new_ds_object.sizes[parse]):
if (dim_labels is not None):
new_var_name = '_'.join([var, dim_labels[i]])
elif use_dim_value_in_name:
level = str(parse_values[i]) + ds_object[parse].attrs['units']
new_var_name = '_'.join([var, parse, level])
else:
new_var_name = '_'.join([var, parse, str(i)])
new_var = new_ds_object[var].copy()
new_ds_object[new_var_name] = new_var.isel(indexers={parse: i})

try:
ancillary_variables = new_ds_object[new_var_name].attrs['ancillary_variables']
current_qc_var_name = ds_object.qcfilter.check_for_ancillary_qc(
var, add_if_missing=False)
if current_qc_var_name is not None:
ancillary_variables = ancillary_variables.replace(
current_qc_var_name, 'qc_' + new_var_name)
new_ds_object[new_var_name].attrs['ancillary_variables'] = ancillary_variables
except KeyError:
pass

# Remove the old 2D variable after extracting
del new_ds_object[var]

else:
# Keep the same name but remove the dimension equal to size 1
new_ds_object[var] = new_ds_object[var].squeeze(dim=parse)

return new_ds_object

0 comments on commit 0803405

Please sign in to comment.