Skip to content

Commit

Permalink
Add ZR rain rates to OPERA.
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon Pfreundschuh committed Aug 13, 2024
1 parent 5798bbf commit 932731c
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 1 deletion.
78 changes: 77 additions & 1 deletion chimp/data/opera.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from datetime import datetime, timedelta
from pathlib import Path
import re
from typing import List, Optional
from typing import Dict, List, Optional, Tuple, Union

from h5py import File
import numpy as np
import pandas as pd
from pyproj import Transformer
import torch
import xarray as xr

from pansat import TimeRange, Product
Expand Down Expand Up @@ -158,3 +159,78 @@ def process_file(

OPERA_REFLECTIVITY = Opera("reflectivity", reflectivity)
OPERA_SURFACE_PRECIP = Opera("surface_precip", surface_precip)


class OperaWPrecip(Opera):
"""
Specialization of the OPERA reference data that also includes surface precipitation
estimates.
"""
def __init__(self):
self.target_name = "reflectivity"
self.pansat_product = reflectivity
ReferenceDataset.__init__(
self,
"opera_w_precip",
scale=4,
targets=[RetrievalTarget("reflectivity")],
)

def load_sample(
self,
path: Path,
crop_size: int,
base_scale,
slices: Tuple[int, int, int, int],
rng: np.random.Generator,
rotate: Optional[float] = None,
flip: Optional[bool] = None,
quality_threshold: float = 0.8
) -> Dict[str, torch.Tensor]:
targets = super().load_sample(
path=path,
crop_size=crop_size,
base_scale=base_scale,
slices=slices,
rng=rng,
rotate=rotate,
flip=flip,
quality_threshold=quality_threshold
)
refl = targets["reflectivity"]
no_precip = refl <= -29.99
refl = 10 ** (refl / 10)
precip = (refl / 200.0) ** (1 / 1.6)
precip[no_precip] = 0.0
targets["surface_precip_zr"] = precip
return targets

def find_training_files(
self,
path: Union[Path, List[Path]],
times: Optional[np.ndarray] = None
) -> Tuple[np.ndarray, List[Path]]:
"""
Find training data files.
Args:
path: Path to the folder the training data for all input
and reference datasets.
times: Optional array containing valid reference data times for static
inputs.
Return:
A tuple ``(times, paths)`` containing the times for which training
files are available and the paths pointing to the corresponding file.
"""
pattern = "*????????_??_??.nc"
training_files = sorted(
list((path / "opera").glob(pattern))
if isinstance(path, Path) else
list(f for f in path if f in list(f.parent.glob("opera" + pattern)))
)
times = np.array(list(map(get_date, training_files)))
return times, training_files


OPERA_W_PRECIP = OperaWPrecip()
107 changes: 107 additions & 0 deletions test/data/test_opera.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Tests for the chimp.data.opera module.
"""

from conftest import NEEDS_PANSAT_PASSWORD

import numpy as np
import pytest
import xarray as xr

from chimp.areas import NORDICS
from chimp.data.opera import (
OPERA_REFLECTIVITY,
OPERA_W_PRECIP
)


@NEEDS_PANSAT_PASSWORD
def test_find_files_opera():
"""
Ensure that OPERA files are found for a given time range.
"""
start_time = np.datetime64("2020-01-01T00:01:00")
end_time = np.datetime64("2020-01-01T00:57:00")
time_step = np.timedelta64(15, "m")
files = OPERA_REFLECTIVITY.find_files(
start_time,
end_time,
time_step
)
assert len(files) == 1

local_files = OPERA_REFLECTIVITY.find_files(
start_time,
end_time,
time_step,
files[0].parent
)
assert len(files) == 1



@pytest.mark.slow
@NEEDS_PANSAT_PASSWORD
def test_process_files_opera(tmp_path):
"""
Ensure that extraction of observations works as expected.
"""
start_time = np.datetime64("2020-01-01T01:00:00")
end_time = np.datetime64("2020-01-01T02:00:00")
time_step = np.timedelta64(1, "h")
files = OPERA_REFLECTIVITY.find_files(
start_time,
end_time,
time_step
)
assert len(files) == 1
OPERA_REFLECTIVITY.process_file(
files[0],
NORDICS,
tmp_path,
time_step=time_step
)

training_files = sorted(list((tmp_path / "opera_reflectivity").glob("*.nc")))
assert len(training_files) == 24
training_data = xr.load_dataset(training_files[0])
assert np.isfinite(training_data.reflectivity.data).sum() > 100
assert np.isfinite(training_data.qi.data).sum() > 100


@pytest.mark.slow
@NEEDS_PANSAT_PASSWORD
def test_opera_w_precip(tmp_path):
"""
Ensure that extraction of observations works as expected.
"""
start_time = np.datetime64("2020-01-01T01:00:00")
end_time = np.datetime64("2020-01-01T02:00:00")
time_step = np.timedelta64(1, "h")
files = OPERA_W_PRECIP.find_files(
start_time,
end_time,
time_step
)
assert len(files) == 1
OPERA_W_PRECIP.process_file(
files[0],
NORDICS,
tmp_path,
time_step=time_step
)

training_files = sorted(list((tmp_path / "opera_reflectivity").glob("*.nc")))
assert len(training_files) == 24

_, training_files = OPERA_W_PRECIP.find_training_files(tmp_path)
assert len(training_files) == 24
ref_data = OPERA_W_PRECIP.load_sample(
training_files[0],
crop_size=256,
base_scale=4,
slices=(0, crop_size[0], 0, crop_size[1]),
rng=None
)
assert "reflectivity" in ref_data
assert "surface_precip_zr" in ref_data

0 comments on commit 932731c

Please sign in to comment.