diff --git a/satpy/etc/readers/ahi_l2_nc.yaml b/satpy/etc/readers/ahi_l2_nc.yaml new file mode 100644 index 0000000000..955d41bbcd --- /dev/null +++ b/satpy/etc/readers/ahi_l2_nc.yaml @@ -0,0 +1,175 @@ +reader: + name: ahi_l2_nc + short_name: AHI L2 NetCDF4 + long_name: Himawari-8/9 AHI Level 2 products in netCDF4 format from NOAA enterprise + status: Beta + supports_fsspec: true + sensors: ['ahi'] + reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader + + +file_types: + ahi_l2_mask: + file_reader: !!python/name:satpy.readers.ahi_l2_nc.HIML2NCFileHandler + file_patterns: + - '{sensor:3s}-CMSK_{version:4s}_{platform:3s}_s{start_time:%Y%m%d%H%M%S%f}_e{end_time:%Y%m%d%H%M%S%f}_c{creation_time:%Y%m%d%H%M%S%f}.nc' + + ahi_l2_type: + file_reader: !!python/name:satpy.readers.ahi_l2_nc.HIML2NCFileHandler + file_patterns: + - '{sensor:3s}-CPHS_{version:4s}_{platform:3s}_s{start_time:%Y%m%d%H%M%S%f}_e{end_time:%Y%m%d%H%M%S%f}_c{creation_time:%Y%m%d%H%M%S%f}.nc' + + ahi_l2_height: + file_reader: !!python/name:satpy.readers.ahi_l2_nc.HIML2NCFileHandler + file_patterns: + - '{sensor:3s}-CHGT_{version:4s}_{platform:3s}_s{start_time:%Y%m%d%H%M%S%f}_e{end_time:%Y%m%d%H%M%S%f}_c{creation_time:%Y%m%d%H%M%S%f}.nc' + +datasets: + # Products from the cloud mask files + cloud_mask: + name: cloud_mask + file_key: CloudMask + file_type: [ ahi_l2_mask ] + + cloud_mask_binary: + name: cloud_mask_binary + file_key: CloudMaskBinary + file_type: [ ahi_l2_mask ] + + cloud_probability: + name: cloud_probability + file_key: CloudProbability + file_type: [ ahi_l2_mask ] + + ice_cloud_probability: + name: ice_cloud_probability + file_key: IceCloudProbability + file_type: [ ahi_l2_mask ] + + phase_uncertainty: + name: phase_uncertainty + file_key: PhaseUncertainty + file_type: [ ahi_l2_mask ] + + dust_mask: + name: dust_mask + file_key: Dust_Mask + file_type: [ ahi_l2_mask ] + + fire_mask: + name: fire_mask + file_key: Fire_Mask + file_type: [ ahi_l2_mask ] + + smoke_mask: + name: smoke_mask + file_key: Smoke_Mask + file_type: [ ahi_l2_mask ] + + # Products from the cloud phase / type files + cloud_phase: + name: cloud_phase + file_key: CloudPhase + file_type: [ ahi_l2_type ] + + cloud_phase_flag: + name: cloud_phase_flag + file_key: CloudPhaseFlag + file_type: [ ahi_l2_type ] + + cloud_type: + name: cloud_type + file_key: CloudType + file_type: [ ahi_l2_type ] + + # Products from the cloud height files + cloud_optical_depth: + name: cloud_optical_depth + file_key: CldOptDpth + file_type: [ ahi_l2_height ] + + cloud_top_emissivity: + name: cloud_top_emissivity + file_key: CldTopEmss + file_type: [ ahi_l2_height ] + + cloud_top_pressure: + name: cloud_top_pressure + file_key: CldTopPres + file_type: [ ahi_l2_height ] + + cloud_top_pressure_low: + name: cloud_top_pressure_low + file_key: CldTopPresLow + file_type: [ ahi_l2_height ] + + cloud_top_temperature: + name: cloud_top_temperature + file_key: CldTopTemp + file_type: [ ahi_l2_height ] + + cloud_top_temperature_low: + name: cloud_top_temperature_low + file_key: CldTopTempLow + file_type: [ ahi_l2_height ] + + cloud_height_quality: + name: cloud_height_quality + file_key: CloudHgtQF + file_type: [ ahi_l2_height ] + + retrieval_cost: + name: retrieval_cost + file_key: Cost + file_type: [ ahi_l2_height ] + + inversion_flag: + name: inversion_flag + file_key: InverFlag + file_type: [ ahi_l2_height ] + + latitude_parallax_corrected: + name: latitude_parallax_corrected + file_key: Latitude_Pc + file_type: [ ahi_l2_height ] + + longitude_parallax_corrected: + name: longitude_parallax_corrected + file_key: Longitude_Pc + file_type: [ ahi_l2_height ] + + cloud_top_pressure_error: + name: cloud_top_pressure_error + file_key: PcError + file_type: [ ahi_l2_height ] + + processing_order: + name: processing_order + file_key: ProcOrder + file_type: [ ahi_l2_height ] + + shadow_mask: + name: shadow_mask + file_key: Shadow_Mask + file_type: [ ahi_l2_height ] + + cloud_top_temperature_error: + name: cloud_top_temperature_error + file_key: TcError + file_type: [ ahi_l2_height ] + + cloud_top_height_error: + name: cloud_top_height_error + file_key: ZcError + file_type: [ ahi_l2_height ] + + # Datasets in all three file types + latitude: + name: latitude + file_key: Latitude + file_type: [ ahi_l2_height, ahi_l2_type, ahi_l2_mask ] + + longitude: + name: longitude + file_key: Longitude + file_type: [ ahi_l2_height, ahi_l2_type, ahi_l2_mask ] diff --git a/satpy/readers/ahi_l2_nc.py b/satpy/readers/ahi_l2_nc.py new file mode 100644 index 0000000000..5159931819 --- /dev/null +++ b/satpy/readers/ahi_l2_nc.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Reader for Himawari L2 cloud products from NOAA's big data programme. + +For more information about the data, see: . + +These products are generated by the NOAA enterprise cloud suite and have filenames like: +AHI-CMSK_v1r1_h09_s202308240540213_e202308240549407_c202308240557548.nc + +The second letter grouping (CMSK above) indicates the product type: + CMSK - Cloud mask + + CHGT - Cloud height + + CPHS - Cloud type and phase + +These products are generated from the AHI sensor on Himawari-8 and Himawari-9, and are +produced at the native instrument resolution for the IR channels (2km at nadir). + +NOTE: This reader is currently only compatible with full disk scenes. Unlike level 1 himawari +data, the netCDF files do not contain the required metadata to produce an appropriate area +definition for the data contents, and hence the area definition is hardcoded into the reader. + +A warning is displayed to the user highlighting this. The assumed area definition is a full +disk image at the nominal subsatellite longitude of 140.7 degrees East. + +All the simple data products are supported here, but multidimensional products are not yet +supported. These include the CldHgtFlag and the CloudMaskPacked variables. +""" + +import logging +from datetime import datetime + +import xarray as xr + +from satpy._compat import cached_property +from satpy.readers._geos_area import get_area_definition, get_area_extent +from satpy.readers.file_handlers import BaseFileHandler + +logger = logging.getLogger(__name__) + +EXPECTED_DATA_AREA = 'Full Disk' + + +class HIML2NCFileHandler(BaseFileHandler): + """File handler for Himawari L2 NOAA enterprise data in netCDF format.""" + + def __init__(self, filename, filename_info, filetype_info): + """Initialize the reader.""" + super().__init__(filename, filename_info, filetype_info) + self.nc = xr.open_dataset(self.filename, + decode_cf=True, + mask_and_scale=False, + chunks={"xc": "auto", "yc": "auto"}) + + # Check that file is a full disk scene, we don't know the area for anything else + if self.nc.attrs['cdm_data_type'] != EXPECTED_DATA_AREA: + raise ValueError('File is not a full disk scene') + + self.sensor = self.nc.attrs['instrument_name'].lower() + self.nlines = self.nc.dims['Columns'] + self.ncols = self.nc.dims['Rows'] + self.platform_name = self.nc.attrs['satellite_name'] + self.platform_shortname = filename_info['platform'] + self._meta = None + + @property + def start_time(self): + """Start timestamp of the dataset.""" + dt = self.nc.attrs['time_coverage_start'] + return datetime.strptime(dt, '%Y-%m-%dT%H:%M:%SZ') + + @property + def end_time(self): + """End timestamp of the dataset.""" + dt = self.nc.attrs['time_coverage_end'] + return datetime.strptime(dt, '%Y-%m-%dT%H:%M:%SZ') + + def get_dataset(self, key, info): + """Load a dataset.""" + var = info['file_key'] + logger.debug('Reading in get_dataset %s.', var) + variable = self.nc[var] + + # Data has 'Latitude' and 'Longitude' coords, these must be replaced. + variable = variable.rename({'Rows': 'y', 'Columns': 'x'}) + + variable = variable.drop('Latitude') + variable = variable.drop('Longitude') + + variable.attrs.update(key.to_dict()) + return variable + + @cached_property + def area(self): + """Get AreaDefinition representing this file's data.""" + return self._get_area_def() + + def get_area_def(self, dsid): + """Get the area definition.""" + del dsid + return self.area + + def _get_area_def(self): + logger.info('The AHI L2 cloud products do not have the metadata required to produce an area definition.' + ' Assuming standard Himawari-8/9 full disk projection.') + + # Basic check to ensure we're processing a full disk (2km) scene.n + if self.nlines != 5500 or self.ncols != 5500: + raise ValueError("Input L2 file is not a full disk Himawari scene. Only full disk data is supported.") + + pdict = {'cfac': 20466275, 'lfac': 20466275, 'coff': 2750.5, 'loff': 2750.5, 'a': 6378137.0, 'h': 35785863.0, + 'b': 6356752.3, 'ssp_lon': 140.7, 'nlines': self.nlines, 'ncols': self.ncols, 'scandir': 'N2S'} + + aex = get_area_extent(pdict) + + pdict['a_name'] = 'Himawari_Area' + pdict['a_desc'] = "AHI Full Disk area" + pdict['p_id'] = f'geos{self.platform_shortname}' + + return get_area_definition(pdict, aex) diff --git a/satpy/tests/reader_tests/test_ahi_l2_nc.py b/satpy/tests/reader_tests/test_ahi_l2_nc.py new file mode 100644 index 0000000000..39de4e1053 --- /dev/null +++ b/satpy/tests/reader_tests/test_ahi_l2_nc.py @@ -0,0 +1,109 @@ +"""Tests for the Himawari L2 netCDF reader.""" + +from datetime import datetime + +import numpy as np +import pytest +import xarray as xr + +from satpy.readers.ahi_l2_nc import HIML2NCFileHandler +from satpy.tests.utils import make_dataid + +rng = np.random.default_rng() +clmk_data = rng.integers(0, 3, (5500, 5500), dtype=np.uint16) +cprob_data = rng.uniform(0, 1, (5500, 5500)) +lat_data = rng.uniform(-90, 90, (5500, 5500)) +lon_data = rng.uniform(-180, 180, (5500, 5500)) + +start_time = datetime(2023, 8, 24, 5, 40, 21) +end_time = datetime(2023, 8, 24, 5, 49, 40) + +dimensions = {'Columns': 5500, 'Rows': 5500} + +exp_ext = (-5499999.9012, -5499999.9012, 5499999.9012, 5499999.9012) + +global_attrs = {"time_coverage_start": start_time.strftime("%Y-%m-%dT%H:%M:%SZ"), + "time_coverage_end": end_time.strftime("%Y-%m-%dT%H:%M:%SZ"), + "instrument_name": "AHI", + "satellite_name": "Himawari-9", + "cdm_data_type": "Full Disk", + } + +badarea_attrs = global_attrs.copy() +badarea_attrs['cdm_data_type'] = 'bad_area' + + +def ahil2_filehandler(fname, platform='h09'): + """Instantiate a Filehandler.""" + fileinfo = {'platform': platform} + filetype = None + fh = HIML2NCFileHandler(fname, fileinfo, filetype) + return fh + + +@pytest.fixture(scope="session") +def himl2_filename(tmp_path_factory): + """Create a fake himawari l2 file.""" + fname = f'{tmp_path_factory.mktemp("data")}/AHI-CMSK_v1r1_h09_s202308240540213_e202308240549407_c202308240557548.nc' + ds = xr.Dataset({'CloudMask': (['Rows', 'Columns'], clmk_data)}, + coords={'Latitude': (['Rows', 'Columns'], lat_data), + 'Longitude': (['Rows', 'Columns'], lon_data)}, + attrs=global_attrs) + ds.to_netcdf(fname) + return fname + + +@pytest.fixture(scope="session") +def himl2_filename_bad(tmp_path_factory): + """Create a fake himawari l2 file.""" + fname = f'{tmp_path_factory.mktemp("data")}/AHI-CMSK_v1r1_h09_s202308240540213_e202308240549407_c202308240557548.nc' + ds = xr.Dataset({'CloudMask': (['Rows', 'Columns'], clmk_data)}, + coords={'Latitude': (['Rows', 'Columns'], lat_data), + 'Longitude': (['Rows', 'Columns'], lon_data)}, + attrs=badarea_attrs) + ds.to_netcdf(fname) + + return fname + + +def test_startend(himl2_filename): + """Test start and end times are set correctly.""" + fh = ahil2_filehandler(himl2_filename) + assert fh.start_time == start_time + assert fh.end_time == end_time + + +def test_ahi_l2_area_def(himl2_filename, caplog): + """Test reader handles area definition correctly.""" + ps = '+proj=geos +lon_0=140.7 +h=35785863 +x_0=0 +y_0=0 +a=6378137 +rf=298.257024882273 +units=m +no_defs +type=crs' + + # Check case where input data is correct size. + fh = ahil2_filehandler(himl2_filename) + clmk_id = make_dataid(name="cloudmask") + area_def = fh.get_area_def(clmk_id) + assert area_def.width == dimensions['Columns'] + assert area_def.height == dimensions['Rows'] + assert np.allclose(area_def.area_extent, exp_ext) + assert area_def.proj4_string == ps + + # Check case where input data is incorrect size. + with pytest.raises(ValueError): + fh = ahil2_filehandler(himl2_filename) + fh.nlines = 3000 + fh.get_area_def(clmk_id) + + +def test_bad_area_name(himl2_filename_bad): + """Check case where area name is not correct.""" + global_attrs['cdm_data_type'] = 'bad_area' + with pytest.raises(ValueError): + ahil2_filehandler(himl2_filename_bad) + global_attrs['cdm_data_type'] = 'Full Disk' + + +def test_load_data(himl2_filename): + """Test that data is loaded successfully.""" + fh = ahil2_filehandler(himl2_filename) + clmk_id = make_dataid(name="cloudmask") + clmk = fh.get_dataset(clmk_id, {'file_key': 'CloudMask'}) + assert np.allclose(clmk.data, clmk_data)