From 32d655e2dd277a053b4411e272c4b0e558083c25 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Thu, 28 Nov 2024 11:50:30 +0100 Subject: [PATCH 1/3] feat: variable as dataclass --- src/pymorize/data_request/__init__.py | 0 src/pymorize/data_request/table.py | 0 src/pymorize/data_request/variable.py | 176 ++++++++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 src/pymorize/data_request/__init__.py create mode 100644 src/pymorize/data_request/table.py create mode 100644 src/pymorize/data_request/variable.py diff --git a/src/pymorize/data_request/__init__.py b/src/pymorize/data_request/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/pymorize/data_request/table.py b/src/pymorize/data_request/table.py new file mode 100644 index 00000000..e69de29b diff --git a/src/pymorize/data_request/variable.py b/src/pymorize/data_request/variable.py new file mode 100644 index 00000000..7199b1b3 --- /dev/null +++ b/src/pymorize/data_request/variable.py @@ -0,0 +1,176 @@ +import json +import pathlib +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Dict, Optional + + +class Variable(ABC): + """Abstract base class for a generic variable.""" + + _type_strings = { + "real": float, + } + """dict: conversion of string names in the tables to actual Python types""" + + ################################################################# + # Properties a Variable needs to have + ################################################################# + + @property + @abstractmethod + def frequency(self) -> str: # Or should this return Frequency? + """Frequency of this variable""" + + @property + @abstractmethod + def modeling_realm(self) -> str: + """Modeling Realm of this variable""" + + @property + @abstractmethod + def standard_name(self) -> str: + """The CF standard name of the variable""" + + @property + @abstractmethod + def units(self) -> str: + """The units of the variable""" + + @property + @abstractmethod + def cell_methods(self) -> str: + """Methods applied to the cell""" + # FIXME(PG): I have no idea what this is + + @property + @abstractmethod + def cell_measures(self) -> str: + """What this cell measure""" + # FIXME(PG): I have no idea what this is + + @property + @abstractmethod + def long_name(self) -> str: + """The CF long name for this variable""" + + @property + @abstractmethod + def comment(self) -> str: + """Comment for NetCDF attributes""" + + @property + @abstractmethod + def dimensions(self) -> tuple[str, ...]: + """Dimensions of this variable""" + + @property + @abstractmethod + def out_name(self) -> str: + """Short name (array name) of this variable""" + + @property + @abstractmethod + def typ(self) -> type: + """The type of this array: int, float, str""" + + @property + @abstractmethod + def positive(self) -> str: + """For 3-D variables, which direction is up/down""" + + @property + @abstractmethod + def valid_min(self) -> float: + """Valid minimum""" + + @property + @abstractmethod + def valid_max(self) -> float: + """Valid maximum""" + + @property + @abstractmethod + def ok_min_mean_abs(self) -> float: + """ok minimum, mean, and absolute value""" + + @property + @abstractmethod + def ok_max_mean_abs(self) -> float: + """ok maximum, mean, and absolute value""" + + @property + @abstractmethod + def table_name(self) -> Optional[str]: + """The table this variable is define in""" + + ################################################################# + # Class methods for construction + ################################################################# + @classmethod + def from_dict(cls, data: dict) -> "Variable": + """Create a Variable instance from a dictionary.""" + typ = cls._type_strings.get(data["type"]) + if typ is None: + raise ValueError(f"Unsupported type: {data['type']}") + return cls( + frequency=data["frequency"], + modeling_realm=data["modeling_realm"], + standard_name=data["standard_name"], + units=data["units"], + cell_methods=data["cell_methods"], + cell_measures=data["cell_measures"], + long_name=data["long_name"], + comment=data["comment"], + dimensions=tuple( + data["dimensions"].split(" ") + ), # NOTE(PG): tuple, because of immutability + out_name=data["out_name"], + typ=cls._type_strings[data["type"]], + positive=data["positive"], + valid_min=data["valid_min"], + valid_max=data["valid_max"], + ok_min_mean_abs=data["ok_min_mean_abs"], + ok_max_mean_abs=data["ok_max_mean_abs"], + table_name=data.get("table_name"), + ) + + @classmethod + def from_json_table_file(cls, filename: str, varname: str) -> "Variable": + with open(filename, "r") as f: + data = json.load(f)["variable_entry"][varname] + data["table_name"] = pathlib.Path(filename).stem + return cls.from_dict(data) + + ################################################################# + # Methods for serialization + ################################################################# + def to_dict(self) -> dict: + """Convert the variable to a dictionary representation""" + return self.__dict__ + + ################################################################# + # Other methods + ################################################################# + # Nothing yet.... + + +@dataclass +class CMIP6Variable(Variable): + frequency: str + modeling_realm: str + standard_name: str + units: str + cell_methods: str + cell_measures: str + long_name: str + comment: str + dimensions: tuple[str, ...] + out_name: str + typ: type + positive: str + valid_min: float + valid_max: float + ok_min_mean_abs: float + ok_max_mean_abs: float + table_name: Optional[str] = None From 6fbcf699901a8be4271c6db9d13e5b05af1dced7 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 29 Nov 2024 11:16:33 +0100 Subject: [PATCH 2/3] feat(data-request): add dataclasses for CMIP6 variables This commit introduces the `DataRequestVariable` abstract base class and its concrete implementation `CMIP6DataRequestVariable`. The new classes use the `dataclass` decorator to automatically generate special methods like `__init__` and `__repr__`. The `DataRequestVariable` class outlines the necessary properties and methods that any variable class should implement. The `CMIP6DataRequestVariable` class is a concrete implementation for CMIP6 variables. Additionally, this commit includes methods for constructing `DataRequestVariable` instances from dictionaries and JSON files, as well as converting instances to dictionary representations. Unit tests for `CMIP6DataRequestVariable` have been added to ensure correct functionality. --- src/pymorize/data_request/variable.py | 102 ++++++++++++++++++++++++-- tests/unit/test_drv.py | 17 +++++ 2 files changed, 111 insertions(+), 8 deletions(-) create mode 100644 tests/unit/test_drv.py diff --git a/src/pymorize/data_request/variable.py b/src/pymorize/data_request/variable.py index 7199b1b3..d098dcb6 100644 --- a/src/pymorize/data_request/variable.py +++ b/src/pymorize/data_request/variable.py @@ -1,11 +1,26 @@ +""" +This module defines the ``DataRequestVariable`` abstract base class and its concrete implementation ``CMIP6DataRequestVariable``. + +The ``DataRequestVariable`` class outlines the necessary properties and methods that any variable class should implement. +It includes properties such as frequency, modeling realm, standard name, units, cell methods, cell measures, +long name, comment, dimensions, out name, type, positive direction, valid minimum and maximum values, +acceptable minimum and maximum mean absolute values, and the table name. + +The ``CMIP6DataRequestVariable`` class is a concrete implementation of the ``DataRequestVariable`` class, specifically for CMIP6 variables. +It uses the ``dataclass`` decorator to automatically generate the ``__init__``, ``__repr__``, and other special methods. + +The module also provides class methods for constructing ``DataRequestVariable`` instances from dictionaries and JSON files, +as well as a method for converting a ``DataRequestVariable`` instance to a dictionary representation. +""" + import json import pathlib from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Dict, Optional +from typing import Optional -class Variable(ABC): +class DataRequestVariable(ABC): """Abstract base class for a generic variable.""" _type_strings = { @@ -14,7 +29,7 @@ class Variable(ABC): """dict: conversion of string names in the tables to actual Python types""" ################################################################# - # Properties a Variable needs to have + # Properties a DataRequestVariable needs to have ################################################################# @property @@ -104,12 +119,17 @@ def ok_max_mean_abs(self) -> float: def table_name(self) -> Optional[str]: """The table this variable is define in""" + @property + @abstractmethod + def attrs(self) -> dict: + """Attributes to update the Xarray DataArray with""" + ################################################################# # Class methods for construction ################################################################# @classmethod - def from_dict(cls, data: dict) -> "Variable": - """Create a Variable instance from a dictionary.""" + def from_dict(cls, data: dict) -> "DataRequestVariable": + """Create a DataRequestVariable instance from a dictionary.""" typ = cls._type_strings.get(data["type"]) if typ is None: raise ValueError(f"Unsupported type: {data['type']}") @@ -136,7 +156,7 @@ def from_dict(cls, data: dict) -> "Variable": ) @classmethod - def from_json_table_file(cls, filename: str, varname: str) -> "Variable": + def from_json_table_file(cls, filename: str, varname: str) -> "DataRequestVariable": with open(filename, "r") as f: data = json.load(f)["variable_entry"][varname] data["table_name"] = pathlib.Path(filename).stem @@ -152,11 +172,13 @@ def to_dict(self) -> dict: ################################################################# # Other methods ################################################################# - # Nothing yet.... + @abstractmethod + def global_attrs(self, override_dict: dict = None) -> dict: + """Global attributes for this variable, used to set on the xr.Dataset""" @dataclass -class CMIP6Variable(Variable): +class CMIP6DataRequestVariable(DataRequestVariable): frequency: str modeling_realm: str standard_name: str @@ -174,3 +196,67 @@ class CMIP6Variable(Variable): ok_min_mean_abs: float ok_max_mean_abs: float table_name: Optional[str] = None + + @classmethod + def from_json_table_file( + cls, filename: str, varname: str + ) -> "CMIP6DataRequestVariable": + with open(filename, "r") as f: + data = json.load(f)["variable_entry"][varname] + data["table_name"] = pathlib.Path(filename).stem.replace("CMIP6_", "") + return cls.from_dict(data) + + @property + def attrs(self) -> dict: + return { + "standard_name": self.standard_name, + "long_name": self.long_name, + "units": self.units, + "cell_methods": self.cell_methods, + "cell_measures": self.cell_measures, + } + + def global_attrs(self, override_dict: dict = None) -> dict: + """Return a dictionary of global attributes for a CMIP6 variable + + Parameters + ---------- + override_dict : dict + A dictionary of attributes to override the default values + """ + override_dict = override_dict or {} + # FIXME: This needs to come from the CVs somehow + rdict = { + "Conventions": None, + "activity_id": None, + "creation_date": None, + "data_specs_version": None, + "experiment": None, + "experiment_id": None, + "forcing_index": None, + "frequency": None, + "further_info_url": None, + "grid": None, + "grid_label": None, + "initialization_index": None, + "institution": None, + "institution_id": None, + "license": None, + "mip_era": None, + "nominal_resolution": None, + "physics_index": None, + "product": None, + "realization_index": None, + "realm": None, + "source": None, + "source_id": None, + "source_type": None, + "sub_experiment": None, + "sub_experiment_id": None, + "table_id": None, + "tracking_id": None, + "variable_id": None, + "variant_label": None, + } + rdict.update(override_dict) + return rdict diff --git a/tests/unit/test_drv.py b/tests/unit/test_drv.py new file mode 100644 index 00000000..281feade --- /dev/null +++ b/tests/unit/test_drv.py @@ -0,0 +1,17 @@ +""" +Tests for DataRequestVariable +""" + +import pathlib + +import pytest + +from pymorize.data_request.variable import CMIP6DataRequestVariable + + +def test_variable_from_cmip6_table(): + table = pathlib.Path("cmip6-cmor-tables/Tables/CMIP6_Omon.json") + variable = "thetao" + drv = CMIP6DataRequestVariable.from_json_table_file(table, variable) + assert drv.frequency == "mon" + assert drv.table_name == "Omon" From d2b9bbb19765f2e0f25df6ec6c0fa40b83772955 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 29 Nov 2024 14:56:31 +0100 Subject: [PATCH 3/3] feat: DataRequestTableHeader as ABC --- setup.py | 1 + src/pymorize/data_request/table.py | 255 +++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+) diff --git a/setup.py b/setup.py index 29837f83..2da733df 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ def read(filename): "pyyaml", "questionary", "randomname", + "semver", "rich-click", "streamlit", "tqdm", diff --git a/src/pymorize/data_request/table.py b/src/pymorize/data_request/table.py index e69de29b..eb6eb731 100644 --- a/src/pymorize/data_request/table.py +++ b/src/pymorize/data_request/table.py @@ -0,0 +1,255 @@ +import json +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Dict, List + +import pendulum +from semver.version import Version + +from .variable import DataRequestVariable + + +class DataRequestTable(ABC): + """Abstract base class for a generic data request table.""" + + @property + @abstractmethod + def table_name(self) -> str: + """Name of the table.""" + pass + + @property + @abstractmethod + def variables(self) -> List[DataRequestVariable]: + """List of variables in the table.""" + pass + + @abstractmethod + def get_variable(self, name: str) -> DataRequestVariable: + """Retrieve a variable's details by name.""" + pass + + @property + @abstractmethod + def header(self) -> "DataRequestTableHeader": + """Header of the table.""" + pass + + +class DataRequestTableHeader(ABC): + + @property + @abstractmethod + def data_specs_version(self) -> Version: + """Data specifications version.""" + pass + + @property + @abstractmethod + def cmor_version(self) -> Version: + """CMOR version.""" + pass + + @property + @abstractmethod + def table_id(self) -> str: + """Name of the table.""" + pass + + @property + @abstractmethod + def realm(self) -> str: + """Realm of the table.""" + pass + + @property + @abstractmethod + def table_date(self) -> pendulum.date: + """Date of the table.""" + pass + + @property + @abstractmethod + def missing_value(self) -> float: + """Missing Value""" + + @property + @abstractmethod + def int_missing_value(self) -> int: + """Integer missing value""" + + @property + @abstractmethod + def product(self) -> str: + """Product""" + + @property + @abstractmethod + def approx_interval(self) -> float: + """Approximate interval (time in days)""" + + @property + @abstractmethod + def generic_levels(self) -> List[str]: + """Generic levels""" + pass + + @property + @abstractmethod + def mip_era(self) -> str: + """MIP era""" + pass + + @property + @abstractmethod + def Conventions(self) -> str: + """Conventions""" + pass + + @classmethod + @abstractmethod + def from_dict(cls, data: dict) -> "DataRequestTableHeader": + """Create a DataRequestTableHeader from a dictionary.""" + pass + + +@dataclass +class CMIP6DataRequestTableHeader(DataRequestTableHeader): + # NOTE(PG): The defaults here refer to the CMIP6 Data Request Tables + # found in commit 1131220 of the cmip6-cmor-tables repository. Some + # of these defaults might not be correct for later versions. + # + # Manual cleanup in the hard-coded defaults: + # - data_specs_version: "01.00.33" -> "1.0.33" to match semver + _table_id: str + _realm: str + _table_date: pendulum.Date + _approx_interval: float + _generic_levels: List[str] + + # Properties with known defaults: + _HARD_CODED_DATA_SPECS_OLD = "01.00.33" + _HARD_CODED_DATA_SPECS_NEW = "1.0.33" + _data_specs_version: Version = Version.parse( + _HARD_CODED_DATA_SPECS_NEW, + optional_minor_and_patch=True, + ) + _cmor_version: Version = Version.parse( + "3.5", + optional_minor_and_patch=True, + ) + _mip_era: str = "CMIP6" + _Conventions: str = "CF-1.7 CMIP-6.2" + _missing_value: float = 1.0e20 + _int_missing_value: int = -999 + _product: str = "model-output" + + @classmethod + def from_dict(cls, data: dict) -> "CMIP6DataRequestTableHeader": + # The input dict needs to have these, since we have no defaults: + extracted_data = dict( + _table_id=data["table_id"].lstrip("Table "), + _realm=data["realm"], + _table_date=pendulum.parse(data["table_date"], strict=False).date(), + _approx_interval=float(data["approx_interval"]), + _generic_levels=data["generic_levels"].split(" "), + ) + # Optionally get the rest, which might not be present: + for key in cls.__dataclass_fields__.keys(): + if key.lstrip("_") in data and key not in extracted_data: + extracted_data[key] = data[key.lstrip("_")] + # Handle Version conversions + if "_data_specs_version" in extracted_data: + extracted_data["_data_specs_version"] = Version.parse( + extracted_data["_data_specs_version"].replace( + cls._HARD_CODED_DATA_SPECS_OLD, + cls._HARD_CODED_DATA_SPECS_NEW, + ), + optional_minor_and_patch=True, + ) + if "_cmor_version" in extracted_data: + extracted_data["_cmor_version"] = Version.parse( + extracted_data["_cmor_version"], + optional_minor_and_patch=True, + ) + # Handle types for missing_value and int_missing_value + if "_missing_value" in extracted_data: + extracted_data["_missing_value"] = float(extracted_data["_missing_value"]) + if "_int_missing_value" in extracted_data: + extracted_data["_int_missing_value"] = int( + extracted_data["_int_missing_value"] + ) + return cls(**extracted_data) + + @property + def table_id(self) -> str: + return self._table_id + + @property + def realm(self) -> str: + return self._realm + + @property + def table_date(self) -> pendulum.Date: + return self._table_date + + @property + def missing_value(self) -> float: + return self._missing_value + + @property + def int_missing_value(self) -> int: + return self._int_missing_value + + @property + def product(self) -> str: + return self._product + + @property + def approx_interval(self) -> float: + return self._approx_interval + + @property + def generic_levels(self) -> List[str]: + return self._generic_levels + + @property + def mip_era(self) -> str: + return self._mip_era + + @property + def Conventions(self) -> str: + return self._Conventions + + @property + def data_specs_version(self) -> Version: + return self._data_specs_version + + @property + def cmor_version(self) -> Version: + return self._cmor_version + + +@dataclass +class CMIP6JSONDataRequestTableHeader(CMIP6DataRequestTableHeader): + + @classmethod + def from_json_file(cls, jfile) -> "CMIP6JSONDataRequestTableHeader": + with open(jfile, "r") as f: + data = json.load(f) + header = data["Header"] + return cls.from_dict(header) + + +class CMIP6DataRequestTable(DataRequestTable): + """Concrete implementation of DataRequestTable for CMIP6.""" + + def __init__(self, data: Dict[str, dict]): + self._data = data + + @property + def variables(self) -> List[str]: + return list(self._data.keys()) + + def get_variable(self, name: str) -> dict: + return self._data.get(name, {})