diff --git a/act/utils/data_utils.py b/act/utils/data_utils.py index cebb7a5fea..8a98bab1d3 100644 --- a/act/utils/data_utils.py +++ b/act/utils/data_utils.py @@ -1410,3 +1410,112 @@ def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None percentages[i] = j ds_percent.close() return percentages + + +def convert_2d_to_1d(ds_object, parse=None, variables=None, keep_name_if_one=False, + use_dim_value_in_name=False, dim_labels=None): + """ + Function to convert a single 2D variable into multiple 1D + variables using the second dimension in the new variable name. + + ... + + + Parameters + ---------- + ds_object: xarray.dataset + Object containing 2D variable to be converted + parse: str or None + Coordinate dimension name to parse along. If set to None will + guess the non-time dimension is the parse dimension. + variables: str or list of str + Variable name or names to parse. If not provided will attempt to + parse all two dimensional variables with the parse coordinate + dimension. + keep_name_if_one: boolean + Option to not modify the variable name if the coordinate dimension + has only one value. Essentially converting a 2D (i.e. (100,1) + variable into a 1D variable (i.e. (100)). + use_dim_value_in_name: boolean + Option to use value from the coordinate dimension in new variable + name instead of indexing number. Will use the value prepended + to the units of the dimension. + dim_labels: str or list of str + Allows for use of custom label to append to end of variable names + + + Returns + ------- + A new object copied from input object with the multi-dimensional + variable split into multiple single-dimensional variables. + + + Example + ------- + # This will get the name of the coordinate dimension so it does not need to + # be hard coded. + >>> parse_dim = (list(set(list(ds_object.dims)) - set(['time'])))[0] + + # Now use the parse_dim name to parse the variable and return new object. + >>> new_ds_object = convert_2d_to_1d(ds_object, parse=parse_dim) + + """ + + # If no parse dimension name given assume it is the one not equal to 'time' + if parse is None: + parse = (list(set(list(ds_object.dims)) - set(['time'])))[0] + + new_ds_object = ds_object.copy() + + if variables is not None and isinstance(variables, str): + variables = [variables] + + if variables is None: + variables = list(new_ds_object.variables) + + if dim_labels is not None and isinstance(dim_labels, (str, )): + dim_labels = [dim_labels] + + # Check if we want to keep the names the same if the second dimension + # is of size one. + num_dims = 1 + if keep_name_if_one: + num_dims = 2 + + parse_values = ds_object[parse].values + for var in variables: + if var == parse: + continue + # Check if the parse dimension is in the dimension tuple + if parse in new_ds_object[var].dims: + if len((new_ds_object[parse])) >= num_dims: + for i in range(0, new_ds_object.sizes[parse]): + if (dim_labels is not None): + new_var_name = '_'.join([var, dim_labels[i]]) + elif use_dim_value_in_name: + level = str(parse_values[i]) + ds_object[parse].attrs['units'] + new_var_name = '_'.join([var, parse, level]) + else: + new_var_name = '_'.join([var, parse, str(i)]) + new_var = new_ds_object[var].copy() + new_ds_object[new_var_name] = new_var.isel(indexers={parse: i}) + + try: + ancillary_variables = new_ds_object[new_var_name].attrs['ancillary_variables'] + current_qc_var_name = ds_object.qcfilter.check_for_ancillary_qc( + var, add_if_missing=False) + if current_qc_var_name is not None: + ancillary_variables = ancillary_variables.replace( + current_qc_var_name, 'qc_' + new_var_name) + new_ds_object[new_var_name].attrs['ancillary_variables'] = ancillary_variables + except KeyError: + pass + + # Remove the old 2D variable after extracting + del new_ds_object[var] + + else: + # Keep the same name but remove the dimension equal to size 1 + new_ds_object[var] = new_ds_object[var].squeeze(dim=parse) + + return new_ds_object