Adding convert 2d to 1d function

ARM-DOE · Jul 17, 2024 · 0803405 · 0803405
1 parent 7d6dce3
commit 0803405
Showing 1 changed file with 109 additions and 0 deletions.
diff --git a/act/utils/data_utils.py b/act/utils/data_utils.py
@@ -1410,3 +1410,112 @@ def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None
         percentages[i] = j
     ds_percent.close()
     return percentages
+
+
+def convert_2d_to_1d(ds_object, parse=None, variables=None, keep_name_if_one=False,
+                     use_dim_value_in_name=False, dim_labels=None):
+    """
+    Function to convert a single 2D variable into multiple 1D
+    variables using the second dimension in the new variable name.
+
+    ...
+
+
+    Parameters
+    ----------
+    ds_object: xarray.dataset
+        Object containing 2D variable to be converted
+    parse: str or None
+        Coordinate dimension name to parse along. If set to None will
+        guess the non-time dimension is the parse dimension.
+    variables: str or list of str
+        Variable name or names to parse. If not provided will attempt to
+        parse all two dimensional variables with the parse coordinate
+        dimension.
+    keep_name_if_one: boolean
+        Option to not modify the variable name if the coordinate dimension
+        has only one value. Essentially converting a 2D (i.e. (100,1)
+        variable into a 1D variable (i.e. (100)).
+    use_dim_value_in_name: boolean
+        Option to use value from the coordinate dimension in new variable
+        name instead of indexing number. Will use the value prepended
+        to the units of the dimension.
+    dim_labels: str or list of str
+        Allows for use of custom label to append to end of variable names
+
+
+    Returns
+    -------
+        A new object copied from input object with the multi-dimensional
+        variable split into multiple single-dimensional variables.
+
+
+    Example
+    -------
+    # This will get the name of the coordinate dimension so it does not need to
+    # be hard coded.
+    >>> parse_dim = (list(set(list(ds_object.dims)) - set(['time'])))[0]
+
+    # Now use the parse_dim name to parse the variable and return new object.
+    >>> new_ds_object = convert_2d_to_1d(ds_object, parse=parse_dim)
+
+    """
+
+    # If no parse dimension name given assume it is the one not equal to 'time'
+    if parse is None:
+        parse = (list(set(list(ds_object.dims)) - set(['time'])))[0]
+
+    new_ds_object = ds_object.copy()
+
+    if variables is not None and isinstance(variables, str):
+        variables = [variables]
+
+    if variables is None:
+        variables = list(new_ds_object.variables)
+
+    if dim_labels is not None and isinstance(dim_labels, (str, )):
+        dim_labels = [dim_labels]
+
+    # Check if we want to keep the names the same if the second dimension
+    # is of size one.
+    num_dims = 1
+    if keep_name_if_one:
+        num_dims = 2
+
+    parse_values = ds_object[parse].values
+    for var in variables:
+        if var == parse:
+            continue
+        # Check if the parse dimension is in the dimension tuple
+        if parse in new_ds_object[var].dims:
+            if len((new_ds_object[parse])) >= num_dims:
+                for i in range(0, new_ds_object.sizes[parse]):
+                    if (dim_labels is not None):
+                        new_var_name = '_'.join([var, dim_labels[i]])
+                    elif use_dim_value_in_name:
+                        level = str(parse_values[i]) + ds_object[parse].attrs['units']
+                        new_var_name = '_'.join([var, parse, level])
+                    else:
+                        new_var_name = '_'.join([var, parse, str(i)])
+                    new_var = new_ds_object[var].copy()
+                    new_ds_object[new_var_name] = new_var.isel(indexers={parse: i})
+
+                    try:
+                        ancillary_variables = new_ds_object[new_var_name].attrs['ancillary_variables']
+                        current_qc_var_name = ds_object.qcfilter.check_for_ancillary_qc(
+                            var, add_if_missing=False)
+                        if current_qc_var_name is not None:
+                            ancillary_variables = ancillary_variables.replace(
+                                current_qc_var_name, 'qc_' + new_var_name)
+                            new_ds_object[new_var_name].attrs['ancillary_variables'] = ancillary_variables
+                    except KeyError:
+                        pass
+
+                # Remove the old 2D variable after extracting
+                del new_ds_object[var]
+
+            else:
+                # Keep the same name but remove the dimension equal to size 1
+                new_ds_object[var] = new_ds_object[var].squeeze(dim=parse)
+
+    return new_ds_object