-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
363 additions
and
97 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
from typing import Tuple | ||
|
||
@dataclass | ||
class BoundingBox(): | ||
lat_min: float | ||
lat_max: float | ||
long_min: float | ||
long_max: float |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import gz21_ocean_momentum.new.data.step as step | ||
from gz21_ocean_momentum.new.common.bounding_box import BoundingBox | ||
|
||
import configargparse | ||
|
||
DESCRIPTION = "Read data from the CM2.6 and \ | ||
apply coarse graining. Stores the resulting dataset into an MLFLOW \ | ||
experiment within a specific run." | ||
|
||
p = configargparse.ArgParser() | ||
p.add("--config-file", is_config_file=True, help="config file path") | ||
p.add("--out-dir", type=str, required=True) | ||
p.add("--lat-min", type=float, required=True) | ||
p.add("--lat-max", type=float, required=True) | ||
p.add("--long-min", type=float, required=True) | ||
p.add("--long-max", type=float, required=True) | ||
p.add("--cyclize", action="store_true", help="global data; make cyclic along longitude") | ||
p.add("--ntimes", type=int, required=True, help="number of days (TODO)") | ||
p.add("--co2-increase", action="store_true", help="use 1%% annual CO2 increase CM2.6 dataset. By default, uses control (no increase)") | ||
p.add("--factor", type=int, help="resolution degradation factor") | ||
|
||
options = p.parse_args() | ||
|
||
# form bounding box from input arguments | ||
bounding_box = BoundingBox( | ||
options.lat_min, options.lat_max, | ||
options.long_min, options.long_max) | ||
|
||
CATALOG_URL = "https://raw.githubusercontent.com/\ | ||
pangeo-data/pangeo-datastore/\ | ||
master/\ | ||
intake-catalogs/master.yaml" | ||
|
||
surface_fields, grid = step.download_cm2_6(CATALOG_URL, options.co2_increase) | ||
forcings = step.preprocess_and_compute_forcings( | ||
grid, surface_fields, bounding_box, options.ntimes, options.cyclize, | ||
options.factor, "usurf", "vsurf") | ||
|
||
forcings.to_zarr(options.out_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
"""Routines for coarsening a dataset.""" | ||
|
||
import logging | ||
import xarray as xr | ||
from scipy.ndimage import gaussian_filter | ||
import numpy as np | ||
|
||
def eddy_forcing( | ||
u_v_dataset: xr.Dataset, | ||
grid_data: xr.Dataset, | ||
scale: int, | ||
nan_or_zero: str = "zero", | ||
) -> xr.Dataset: | ||
""" | ||
Compute the sub-grid forcing terms using mean coarse-graining. | ||
Parameters | ||
---------- | ||
u_v_dataset : xarray Dataset | ||
High-resolution velocity field. | ||
grid_data : xarray Dataset | ||
High-resolution grid details. | ||
scale : float | ||
factor (TODO) | ||
nan_or_zero: str, optional | ||
String set to either 'nan' or 'zero'. Determines whether we keep the | ||
nan values in the initial surface velocities array or whether we | ||
replace them by zeros before applying the procedure. | ||
In the second case, remaining zeros after applying the procedure will | ||
be replaced by nans for consistency. | ||
The default is 'zero'. | ||
Returns | ||
------- | ||
forcing : xarray Dataset | ||
Dataset containing the low-resolution velocity field and forcing. | ||
TODO: we edit u_v_dataset, and for some reason we were returning it (but | ||
calls were silently ignoring it, or something). | ||
""" | ||
# Replace nan values with zeros. | ||
if nan_or_zero == "zero": | ||
u_v_dataset = u_v_dataset.fillna(0.0) | ||
|
||
# Interpolate temperature | ||
# interp_coords = dict(xt_ocean=u_v_dataset.coords['xu_ocean'], | ||
# yt_ocean=u_v_dataset.coords['yu_ocean']) | ||
# u_v_dataset['temp'] = u_v_dataset['surface_temperature'].interp( | ||
# interp_coords) | ||
|
||
scale_filter = scale / 2 | ||
# High res advection terms | ||
adv = advections(u_v_dataset, grid_data) | ||
# Filtered advections | ||
filtered_adv = spatial_filter_dataset(adv, grid_data, scale_filter) | ||
# Filtered u,v field and temperature | ||
u_v_filtered = spatial_filter_dataset(u_v_dataset, grid_data, scale_filter) | ||
# Advection term from filtered velocity field | ||
adv_filtered = advections(u_v_filtered, grid_data) | ||
# Forcing | ||
forcing = adv_filtered - filtered_adv | ||
forcing = forcing.rename({"adv_x": "S_x", "adv_y": "S_y"}) | ||
# Merge filtered u,v, temperature and forcing terms | ||
forcing = forcing.merge(u_v_filtered) | ||
# TODO logging | ||
#logging.debug(forcing) | ||
|
||
# Coarsen | ||
forcing_coarse = forcing.coarsen( | ||
{"xu_ocean": int(scale_filter), "yu_ocean": int(scale_filter)}, boundary="trim" | ||
) | ||
|
||
forcing_coarse = forcing_coarse.mean() | ||
|
||
if nan_or_zero == "zero": | ||
# Replace zeros with nans for consistency | ||
forcing_coarse = forcing_coarse.where(forcing_coarse["usurf"] != 0) | ||
u_v_dataset = u_v_dataset.merge(adv) | ||
filtered_adv = filtered_adv.rename({"adv_x": "f_adv_x", "adv_y": "f_adv_y"}) | ||
adv_filtered = adv_filtered.rename({"adv_x": "adv_f_x", "adv_y": "adv_f_y"}) | ||
u_v_filtered = u_v_filtered.rename({"usurf": "f_usurf", "vsurf": "f_vsurf"}) | ||
u_v_dataset = xr.merge( | ||
( | ||
u_v_dataset, | ||
u_v_filtered, | ||
adv, | ||
filtered_adv, | ||
adv_filtered, | ||
forcing[["S_x", "S_y"]], | ||
) | ||
) | ||
return forcing_coarse | ||
|
||
def advections(u_v_field: xr.Dataset, grid_data: xr.Dataset): | ||
""" | ||
Compute advection terms corresponding to the passed velocity field. | ||
Parameters | ||
---------- | ||
u_v_field : xarray Dataset | ||
Velocity field, must contains variables "usurf" and "vsurf" | ||
grid_data : xarray Dataset | ||
grid data, must contain variables "dxu" and "dyu" | ||
Returns | ||
------- | ||
result : xarray Dataset | ||
Advection components, under variable names "adv_x" and "adv_y" | ||
""" | ||
dxu = grid_data["dxu"] | ||
dyu = grid_data["dyu"] | ||
gradient_x = u_v_field.diff(dim="xu_ocean") / dxu | ||
gradient_y = u_v_field.diff(dim="yu_ocean") / dyu | ||
# Interpolate back the gradients | ||
interp_coords = { | ||
"xu_ocean": u_v_field.coords["xu_ocean"], | ||
"yu_ocean": u_v_field.coords["yu_ocean"], | ||
} | ||
gradient_x = gradient_x.interp(interp_coords) | ||
gradient_y = gradient_y.interp(interp_coords) | ||
u, v = u_v_field["usurf"], u_v_field["vsurf"] | ||
adv_x = u * gradient_x["usurf"] + v * gradient_y["usurf"] | ||
adv_y = u * gradient_x["vsurf"] + v * gradient_y["vsurf"] | ||
result = xr.Dataset({"adv_x": adv_x, "adv_y": adv_y}) | ||
# TODO check if we can simply prevent the previous operation from adding | ||
# chunks | ||
# result = result.chunk(dict(xu_ocean=-1, yu_ocean=-1)) | ||
return result | ||
|
||
def spatial_filter_dataset( | ||
dataset: xr.Dataset, grid_info: xr.Dataset, sigma: float | ||
) -> xr.Dataset: | ||
""" | ||
Apply spatial filtering to the dataset across the spatial dimensions. | ||
Parameters | ||
---------- | ||
dataset : xarray Dataset | ||
Dataset to filter. First dimension must be time, followed by spatial dimensions | ||
grid_info : xarray Dataset | ||
grid data, must include variables "dxu" and "dyu" | ||
sigma : float | ||
Scale of the filtering, same unit as those of the grid (often, meters) | ||
Returns | ||
------- | ||
filt_dataset : xarray Dataset | ||
Filtered dataset | ||
""" | ||
area_u = grid_info["dxu"] * grid_info["dyu"] / 1e8 | ||
dataset = dataset * area_u | ||
# Normalisation term, so that if the quantity we filter is constant | ||
# over the domain, the filtered quantity is constant with the same value | ||
norm = xr.apply_ufunc( | ||
lambda x: gaussian_filter(x, (sigma, sigma), mode="constant"), | ||
area_u, | ||
dask="parallelized", | ||
output_dtypes=[ | ||
float, | ||
], | ||
) | ||
filtered = xr.apply_ufunc( | ||
lambda x: spatial_filter(x, sigma), | ||
dataset, | ||
dask="parallelized", | ||
output_dtypes=[ | ||
float, | ||
], | ||
) | ||
return filtered / norm | ||
|
||
def spatial_filter(data: np.ndarray, sigma: float): | ||
""" | ||
Apply a gaussian filter to spatial data. | ||
Apply scipy gaussian filter to along all dimensions except first one, which | ||
corresponds to time. | ||
Parameters | ||
---------- | ||
data : ndarray | ||
Data to filter. | ||
sigma : float | ||
Unitless scale of the filter. | ||
Returns | ||
------- | ||
result : ndarray | ||
Filtered data | ||
""" | ||
result = np.zeros_like(data) | ||
for t in range(data.shape[0]): | ||
data_t = data[t, ...] | ||
result_t = gaussian_filter(data_t, (sigma, sigma), mode="constant") | ||
result[t, ...] = result_t | ||
return result |
Oops, something went wrong.