From 932731cfbed816b8c0ed135ee61ed0a148fa98fc Mon Sep 17 00:00:00 2001 From: Simon Pfreundschuh Date: Tue, 13 Aug 2024 16:46:03 -0600 Subject: [PATCH] Add ZR rain rates to OPERA. --- chimp/data/opera.py | 78 ++++++++++++++++++++++++++++- test/data/test_opera.py | 107 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 test/data/test_opera.py diff --git a/chimp/data/opera.py b/chimp/data/opera.py index 92604f8..f6af745 100644 --- a/chimp/data/opera.py +++ b/chimp/data/opera.py @@ -8,12 +8,13 @@ from datetime import datetime, timedelta from pathlib import Path import re -from typing import List, Optional +from typing import Dict, List, Optional, Tuple, Union from h5py import File import numpy as np import pandas as pd from pyproj import Transformer +import torch import xarray as xr from pansat import TimeRange, Product @@ -158,3 +159,78 @@ def process_file( OPERA_REFLECTIVITY = Opera("reflectivity", reflectivity) OPERA_SURFACE_PRECIP = Opera("surface_precip", surface_precip) + + +class OperaWPrecip(Opera): + """ + Specialization of the OPERA reference data that also includes surface precipitation + estimates. + """ + def __init__(self): + self.target_name = "reflectivity" + self.pansat_product = reflectivity + ReferenceDataset.__init__( + self, + "opera_w_precip", + scale=4, + targets=[RetrievalTarget("reflectivity")], + ) + + def load_sample( + self, + path: Path, + crop_size: int, + base_scale, + slices: Tuple[int, int, int, int], + rng: np.random.Generator, + rotate: Optional[float] = None, + flip: Optional[bool] = None, + quality_threshold: float = 0.8 + ) -> Dict[str, torch.Tensor]: + targets = super().load_sample( + path=path, + crop_size=crop_size, + base_scale=base_scale, + slices=slices, + rng=rng, + rotate=rotate, + flip=flip, + quality_threshold=quality_threshold + ) + refl = targets["reflectivity"] + no_precip = refl <= -29.99 + refl = 10 ** (refl / 10) + precip = (refl / 200.0) ** (1 / 1.6) + precip[no_precip] = 0.0 + targets["surface_precip_zr"] = precip + return targets + + def find_training_files( + self, + path: Union[Path, List[Path]], + times: Optional[np.ndarray] = None + ) -> Tuple[np.ndarray, List[Path]]: + """ + Find training data files. + + Args: + path: Path to the folder the training data for all input + and reference datasets. + times: Optional array containing valid reference data times for static + inputs. + + Return: + A tuple ``(times, paths)`` containing the times for which training + files are available and the paths pointing to the corresponding file. + """ + pattern = "*????????_??_??.nc" + training_files = sorted( + list((path / "opera").glob(pattern)) + if isinstance(path, Path) else + list(f for f in path if f in list(f.parent.glob("opera" + pattern))) + ) + times = np.array(list(map(get_date, training_files))) + return times, training_files + + +OPERA_W_PRECIP = OperaWPrecip() diff --git a/test/data/test_opera.py b/test/data/test_opera.py new file mode 100644 index 0000000..a998c5c --- /dev/null +++ b/test/data/test_opera.py @@ -0,0 +1,107 @@ +""" +Tests for the chimp.data.opera module. +""" + +from conftest import NEEDS_PANSAT_PASSWORD + +import numpy as np +import pytest +import xarray as xr + +from chimp.areas import NORDICS +from chimp.data.opera import ( + OPERA_REFLECTIVITY, + OPERA_W_PRECIP +) + + +@NEEDS_PANSAT_PASSWORD +def test_find_files_opera(): + """ + Ensure that OPERA files are found for a given time range. + """ + start_time = np.datetime64("2020-01-01T00:01:00") + end_time = np.datetime64("2020-01-01T00:57:00") + time_step = np.timedelta64(15, "m") + files = OPERA_REFLECTIVITY.find_files( + start_time, + end_time, + time_step + ) + assert len(files) == 1 + + local_files = OPERA_REFLECTIVITY.find_files( + start_time, + end_time, + time_step, + files[0].parent + ) + assert len(files) == 1 + + + +@pytest.mark.slow +@NEEDS_PANSAT_PASSWORD +def test_process_files_opera(tmp_path): + """ + Ensure that extraction of observations works as expected. + """ + start_time = np.datetime64("2020-01-01T01:00:00") + end_time = np.datetime64("2020-01-01T02:00:00") + time_step = np.timedelta64(1, "h") + files = OPERA_REFLECTIVITY.find_files( + start_time, + end_time, + time_step + ) + assert len(files) == 1 + OPERA_REFLECTIVITY.process_file( + files[0], + NORDICS, + tmp_path, + time_step=time_step + ) + + training_files = sorted(list((tmp_path / "opera_reflectivity").glob("*.nc"))) + assert len(training_files) == 24 + training_data = xr.load_dataset(training_files[0]) + assert np.isfinite(training_data.reflectivity.data).sum() > 100 + assert np.isfinite(training_data.qi.data).sum() > 100 + + +@pytest.mark.slow +@NEEDS_PANSAT_PASSWORD +def test_opera_w_precip(tmp_path): + """ + Ensure that extraction of observations works as expected. + """ + start_time = np.datetime64("2020-01-01T01:00:00") + end_time = np.datetime64("2020-01-01T02:00:00") + time_step = np.timedelta64(1, "h") + files = OPERA_W_PRECIP.find_files( + start_time, + end_time, + time_step + ) + assert len(files) == 1 + OPERA_W_PRECIP.process_file( + files[0], + NORDICS, + tmp_path, + time_step=time_step + ) + + training_files = sorted(list((tmp_path / "opera_reflectivity").glob("*.nc"))) + assert len(training_files) == 24 + + _, training_files = OPERA_W_PRECIP.find_training_files(tmp_path) + assert len(training_files) == 24 + ref_data = OPERA_W_PRECIP.load_sample( + training_files[0], + crop_size=256, + base_scale=4, + slices=(0, crop_size[0], 0, crop_size[1]), + rng=None + ) + assert "reflectivity" in ref_data + assert "surface_precip_zr" in ref_data