Skip to content

Commit

Permalink
Updated pyl4c.apps.resample.NestedGrid and pyl4c.utils.get_pft_array()
Browse files Browse the repository at this point in the history
NestedGrid now supports downscaling of AppEEARS netCDF4 granules.
  • Loading branch information
arthur-e committed Feb 28, 2023
1 parent 92d0251 commit 269be6b
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 62 deletions.
254 changes: 202 additions & 52 deletions pyl4c/apps/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@
with h5py.File("something.h5", "r") as hdf:
nested.downscale_hdf5_by_pft(hdf, "GPP/gpp_pft%d_mean")
The above example assumes that you have official SPL4CMDL HDF5 granules from
NSIDC or EarthData Search. It's also possible to downscale netCDF4 granules
from NASA AppEEARS, including granules that represent spatial subsets:
nc = netCDF4.Dataset('AppEEARS_granule.nc')
# Read the EASE-Grid 2.0 coordinate arrays, get bounds
xmin, xmax = min(nc['fakedim1'][:]), max(nc['fakedim1'][:])
ymin, ymax = min(nc['fakedim0'][:]), max(nc['fakedim0'][:])
AOI = (xmin, ymin, xmax, ymax)
# It may be necessary to provide a "shape" argument to ensure that
# the land-cover/ PFT data are forced to the shape of the netCDF4 data
nested = NestedGrid(
subset_bbox = AOI, shape = nc.variables['SOC_soc_mean'][0].shape)
result = nested.downscale_netcdf_by_pft(
nc, field = 'SOC_soc_pft%d_mean', dtype = np.float32)
All of the SMAP Level 4 Carbon (L4C) flux and soil organic carbon (SOC) state
variables are computed on a global, 1-km equal-area grid. We spatially average
the data to 9-km to meet operational constraints on data storage. However,
Expand All @@ -25,71 +43,100 @@
import tempfile
import h5py
import numpy as np
from affine import Affine
from osgeo import gdal
from scipy.ndimage import zoom
from cached_property import cached_property
from pyl4c.data.fixtures import EASE2_GRID_PARAMS
from pyl4c.utils import get_ease2_slice_idx, get_pft_array
from pyl4c.ease2 import ease2_coords
from pyl4c.utils import get_pft_array
from pyl4c.spatial import ease2_to_geotiff
from pyl4c.lib.cli import CommandLineInterface
from pyl4c.lib.cli import CommandLineInterface, ProgressBar

class NestedGrid(object):
'''
Represents a nested L4C data structure, where each grid cell has a mean
value for each sub-grid PFT class. This allows the reconstruction of a
finer scale grid by applying the PFT mean values to the subgrid.
finer scale grid by applying the PFT mean values to the subgrid. The
arguments `subset_id` and `subset_bbox` are mutually exclusive.
Parameters
----------
pft : tuple
PFT values to use in downscaling
subset_id : str or None
Name of a well-known spatial subset
'''
def __init__(self, pft = range(1, 9), subset_id = None):
def __init__(self, pft = range(1, 9), shape = None, subset_bbox = None):
self._offsets = (0, 0)
self._pft_codes = pft
self._shp_1km = EASE2_GRID_PARAMS['M01']['shape']
self._shp_9km = EASE2_GRID_PARAMS['M09']['shape']
self._slice_idx_1km = None
self._slice_idx_9km = None
self._subset_id = subset_id
if subset_id is not None:
# Get the slicing indices, shape of the downscaled subset
self._slice_idx_1km = get_ease2_slice_idx('M01', subset_id)
self._shp_1km = (
self._slice_idx_1km[1][1] - self._slice_idx_1km[1][0],
self._slice_idx_1km[0][1] - self._slice_idx_1km[0][0])
# Get the slicing indices, shape of the 9-km grid
self._slice_idx_9km = get_ease2_slice_idx('M09', subset_id)
self._shp_9km = (
self._slice_idx_9km[1][1] - self._slice_idx_9km[1][0],
self._slice_idx_9km[0][1] - self._slice_idx_9km[0][0])
# Set the 1-km slicing indices
xoff = self._slice_idx_1km[0][0]
yoff = self._slice_idx_1km[1][0]
self._offsets = (xoff, yoff)
self._subset_bbox = subset_bbox
self._ul_coords = (subset_bbox[0], subset_bbox[-1]) # Upper-left coordinates
self._lr_coords = (subset_bbox[-2], subset_bbox[1]) # Lower-right coordinates
# This is the "global" affine transformation
self._transform_1km = Affine.from_gdal(*EASE2_GRID_PARAMS['M01']['geotransform'])
self._transform_9km = Affine.from_gdal(*EASE2_GRID_PARAMS['M09']['geotransform'])
# This is the "output" (1-km) affine transformation
self._transform = self._transform_1km * self._transform_1km.scale(1)
if subset_bbox is not None:
# Need to calculate "local" transformation
gt = list(self._transform_9km.to_gdal())
gt[0] = self._ul_coords[0]
gt[3] = self._ul_coords[1]
transform_9km = Affine.from_gdal(*gt)
transform_1km = transform_9km * transform_9km.scale(1/9)
# Figure out row-column coordinates of upper-left corner and
# the 9-km extent
x0, y0 = list(map(int, ~self._transform_9km * self._ul_coords))
x1, y1 = list(map(int, ~transform_9km * self._lr_coords))
self._shp_9km = (y1, x1)
self._slice_idx_9km = [(y0, y0 + y1), (x0, x0 + x1)]
# Same for the 1-km extent
x2, y2 = list(map(int, ~self._transform_1km * self._ul_coords))
x3, y3 = list(map(int, ~transform_1km * self._lr_coords))
self._shp_1km = (y3, x3)
# Check that this is the expected shape; because of coordinate
# transformations with different libraries, we may need to
# fudge things a big
if shape is not None:
if self._shp_9km != shape:
print(f'WARNING: Expected shape {str(shape)} did not match actual shape {str(self._shp_9km)}; using expected shape')
# Adjust the width and height at the bottom-right corner
deltas = np.array(shape) - np.array(self._shp_9km)
self._shp_9km = shape
self._shp_1km = tuple(np.array(self._shp_9km) * 9)
self._slice_idx_9km = [
(y0, y0 + y1 + deltas[0]),
(x0, x0 + x1 + deltas[1])
]
self._slice_idx_1km = (np.array(self._slice_idx_9km) * 9).tolist()
# Update the output 1-km transformation
self._transform = transform_1km

@cached_property
def pft(self):
'The 1-km PFT map'
return get_pft_array(
'M01', self._subset_id).astype(np.uint8)[np.newaxis,...]

@property
def offsets(self):
'''
Returns the xoff, yoff offsets for a subset array.
Returns:
tuple (int, int)
'''
return self._offsets
'M01', slice_idx = self._slice_idx_1km).astype(np.uint8)[np.newaxis,...]

@property
def shape(self):
'Returns the 1-km array shape'
return self._shp_1km

def _downscale(self, downscaled, scale = 1, dtype = np.float32, nodata = -9999):
# Where the PFT map is in the valid range, return data, else NoData
return np.where(
np.logical_and(
np.in1d(self.pft.ravel(), self._pft_codes),
downscaled.ravel() != nodata),
np.multiply(downscaled.ravel(), scale), nodata)\
.reshape(self._shp_1km)\
.astype(dtype)

def pft_mask(self, p):
'''
An (M x N) mask for selecting pixels matching specified PFT class.
Expand All @@ -105,17 +152,29 @@ def pft_mask(self, p):
'''
return np.where(self.pft == p, 1, 0)

def downscale_hdf5_by_pft(
def downscale_hdf5_by_pft(*args, **kwargs):
'''
DEPRECATED. Use `NestedGrid.downscale_hdf_by_pft()` instead.
'''
self.downscale_hdf_by_pft(*args, **kwargs)

def downscale_hdf_by_pft(
self, hdf, field, scale = 1, dtype = np.float32, nodata = -9999):
'''
Resamples 9-km L4C data to 1-km scale by repeating the spatial mean
values for each PFT on the 1-km land-cover grid.
Will subset the *arrays if they are not the expected shape of the
subset at 9-km scale. NOTE: Use scale = 1e6 if a total flux
(e.g., total GPP) is required; 1e6 is the number of square meters in
a 1-km L4C pixel.
subset at 9-km scale. This function assumes that the grouped dataset,
`hdf`, is either an official SPL4CMDL HDF5 granule or a netCDF4 file
generated by NASA AppEEARS.
NOTE: Use scale = 1e6 if a total flux (e.g., total GPP) is required;
`1e6` is the number of square meters in a 1-km L4C pixel.
Parameters
----------
hdf : h5py.File
hdf : h5py.File or netCDF4.Dataset
field : str
Template for PFT-mean field in the HDF5 granule, e.g.,
"GPP/gpp_pft%d_mean"
Expand All @@ -133,30 +192,97 @@ def downscale_hdf5_by_pft(
opts = { # Options to zoom()
'order': 0, 'zoom': 9, 'mode': 'grid-constant', 'grid_mode': True
}
if self._subset_id is not None:
x_idx, y_idx = self._slice_idx_9km
xmin, xmax = x_idx
if self._subset_bbox is not None:
y_idx, x_idx = self._slice_idx_9km
ymin, ymax = y_idx
xmin, xmax = x_idx

downscaled = np.zeros(self._shp_1km)
downscaled = np.zeros(self._shp_1km, dtype = dtype)
for p in self._pft_codes:
# Allow for the possibility of either netCDF4 or h5py datasets
if hasattr(hdf, 'variables'):
source = hdf.variables[field % p]
else:
source = hdf[field % p]
# If needed, subset the 9-km arrays, then down-scale to 1-km
if self._subset_id is None:
arr = hdf[field % p][:]
if self._subset_bbox is None:
arr = source[:]
else:
arr = hdf[field % p][ymin:ymax, xmin:xmax]
arr = source[ymin:ymax, xmin:xmax]
# Resize, multiply by PFT mask, then add to output where != NoData
downscaled = np.add(
downscaled, np.multiply(self.pft_mask(p), zoom(arr, **opts)))
return self._downscale(downscaled, scale, dtype, nodata)

# Where the PFT map is in the valid range, return data, else NoData
return np.where(
np.logical_and(
np.in1d(self.pft.ravel(), self._pft_codes),
downscaled.ravel() != nodata),
np.multiply(downscaled.ravel(), scale), nodata)\
.reshape(self._shp_1km)\
.astype(dtype)
def downscale_netcdf_by_pft(
self, hdf, field, scale = 1, dtype = np.float32, nodata = -9999):
'''
Resamples 9-km L4C data to 1-km scale by repeating the spatial mean
values for each PFT on the 1-km land-cover grid.
This function assumes that the grouped dataset, `hdf`, is a netCDF4
dataset from a file granule generated by NASA AppEEARS. If the granule
is a spatial subset, that subset matches the bounds defined by the
`subset_bbox` to `NestedGrid`. It allows for the netCDF4 granule to
represent multiple time steps; i.e., arrays can be of the shape
`(T, M, N)` where `T` is one or more time steps.
NOTE: Use scale = 1e6 if a total flux (e.g., total GPP) is required;
`1e6` is the number of square meters in a 1-km L4C pixel.
Parameters
----------
hdf : netCDF4.Dataset
field : str
Template for PFT-mean field in the HDF5 granule, e.g.,
"GPP/gpp_pft%d_mean"
scale : int or float
dtype : numpy.dtype
nodata : int or float
Returns
-------
numpy.ndarray
'''
if nodata < 0:
assert dtype not in (np.uint0, np.uint8, np.uint16, np.uint32, np.uint64),\
"Can't use an unsigned data type with a negative NoData value"
opts = { # Options to zoom()
'order': 0, 'zoom': 9, 'mode': 'grid-constant', 'grid_mode': True
}
# Get the shape of the input 9-km arrays
shp = hdf.variables[field % self._pft_codes[0]].shape
# In NASA AppEEARS, it is possible to request multiple dates of data,
# in which case the resulting netCDF4 granule has 3D arrays where
# the first is the time axis
if len(shp) > 2:
if len(shp) > 3:
raise ValueError(f'Too many dimensions in "{field % self._pft_codes[0]}" array; expected 2 or 3')
# Assumes the first axis is a time axis
dates = shp[0]
else:
dates = 1

result = np.zeros((dates, *self._shp_1km), dtype = dtype)
with ProgressBar(dates, 'Downscaling...') as progress:
for t in range(0, dates):
downscaled = np.zeros(self._shp_1km, dtype = dtype)
for p in self._pft_codes:
# Allow for the possibility of either netCDF4 or h5py datasets
if hasattr(hdf, 'variables'):
source = hdf.variables[field % p]
else:
source = hdf[field % p]
if dates > 1:
arr = source[t,:]
else:
arr = source[:]
# Resize, multiply by PFT mask, then add to output where != NoData
downscaled = np.add(
downscaled, np.multiply(self.pft_mask(p), zoom(arr, **opts)))
result[t] = self._downscale(downscaled, scale, dtype, nodata)
progress.update(t)
return result


class CLI(CommandLineInterface):
Expand All @@ -165,6 +291,30 @@ class CLI(CommandLineInterface):
python resample.py run <hdf5_granule> --field="GPP/gpp_pft%d_mean"
--subset-id="CONUS"
Parameters
----------
output_path : str
The output file path
pft : list or tuple
The PFT codes to use in down-scaling; should probably be `range(1, 9)`
(Default)
field : str
A Python formatting string representing the SPL4CMDL HDF5 data field
name to be down-scaled, e.g., `"SOC/soc_pft%d_mean"` (Default) where
`"%d"` will be filled-in with the numeric PFT code
subset_id : str
The name of a well-known geographic subset, see:
`pyl4c.data.fixtures.SUBSETS_BBOX`
scale : int or float
A number to multiply pixels values against, e.g., `1e6` (1,000 square
kilometers) to convert (g C m-2 day-1) to (g C day-1)
nodata : int or float
The NoData value (Default: -9999)
dtype : str
The NumPy data type (Default: `"float32"`)
verbose : bool
True to print information about the progress
'''

def __init__(
Expand Down
Loading

0 comments on commit 269be6b

Please sign in to comment.