Skip to content

Commit

Permalink
Add tests for resolution-based chunking utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
djhoese committed Sep 29, 2023
1 parent cb90a68 commit 37170e1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
36 changes: 36 additions & 0 deletions satpy/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,42 @@ def test_get_legacy_chunk_size():
assert get_legacy_chunk_size() == 2048


@pytest.mark.parametrize(
("shape", "chunk_dtype", "num_hr", "lr_mult", "scan_width", "exp_result"),
[
((1000, 3200), np.float32, 40, 4, True, (160, -1)), # 1km swath
((1000 // 5, 3200 // 5), np.float32, 40, 20, True, (160 // 5, -1)), # 5km swath
((1000 * 4, 3200 * 4), np.float32, 40, 1, True, (160 * 4, -1)), # 250m swath
((21696 // 2, 21696 // 2), np.float32, 226, 2, False, (1469, 1469)), # 1km area (ABI chunk 226)
((21696 // 2, 21696 // 2), np.float64, 226, 2, False, (1017, 1017)), # 1km area (64-bit)
((21696 // 3, 21696 // 3), np.float32, 226, 6, False, (1469 // 3, 1469 // 3)), # 3km area
((21696, 21696), np.float32, 226, 1, False, (1469 * 2, 1469 * 2)), # 500m area
((7, 1000 * 4, 3200 * 4), np.float32, 40, 1, True, (1, 160 * 4, -1)), # 250m swath with bands
((1, 7, 1000, 3200), np.float32, 40, 1, True, ((1,), (7,), (1000,), (1198, 1198, 804))), # lots of dimensions
],
)
def test_resolution_chunking(shape, chunk_dtype, num_hr, lr_mult, scan_width, exp_result):
"""Test chunks_by_resolution helper function."""
import dask.config

from satpy.utils import chunks_by_resolution

with dask.config.set({"array.chunk-size": "32MiB"}):
chunk_results = chunks_by_resolution(
shape,
chunk_dtype,
num_hr,
lr_mult,
whole_scan_width=scan_width,
)
assert chunk_results == exp_result
for chunk_size in chunk_results:
assert isinstance(chunk_size[0], int) if isinstance(chunk_size, tuple) else isinstance(chunk_size, int)

# make sure the chunks are understandable by dask
da.zeros(shape, dtype=chunk_dtype, chunks=chunk_results)


def test_convert_remote_files_to_fsspec_local_files():
"""Test convertion of remote files to fsspec objects.
Expand Down
8 changes: 5 additions & 3 deletions satpy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ def chunks_by_resolution(
num_high_res_elements: int,
low_res_multiplier: int,
whole_scan_width: bool = False,
) -> tuple[int, ...]:
) -> tuple[int | tuple[int, ...], ...]:
"""Compute dask chunk sizes based on data resolution.
First, chunks are computed for the highest resolution version of the data.
Expand Down Expand Up @@ -668,7 +668,9 @@ def chunks_by_resolution(
for non-category data. If this doesn't represent the final data
type of the data then the final size of chunks in memory will not
match the user's request via dask's ``array.chunk-size``
configuration.
configuration. Sometimes it is useful to keep this as a single
dtype for all reading functionality (ex. ``np.float32``) in order
to keep all read variable chunks the same size regardless of dtype.
num_high_res_elements: Smallest number of high (fine) resolution
elements that make up a single "unit" or chunk of data. This could
be a multiple or factor of the scan size for some instruments and/or
Expand Down Expand Up @@ -734,7 +736,7 @@ def _low_res_chunks_from_high_res(
aligned_chunk_size = np.round(chunk_size_for_high_res / num_high_res_elements) * num_high_res_elements
low_res_chunk_size = aligned_chunk_size / low_res_multiplier
# avoid getting 0 chunk size
return max(low_res_chunk_size, num_high_res_elements / low_res_multiplier)
return int(max(low_res_chunk_size, num_high_res_elements / low_res_multiplier))


def convert_remote_files_to_fsspec(filenames, storage_options=None):
Expand Down

0 comments on commit 37170e1

Please sign in to comment.