From 2b248849950e4e736a7bb4b46aa118117d3824ca Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 27 Sep 2024 16:42:56 +0200 Subject: [PATCH] refactor(enums): move config to general (#1233) --- openfisca_core/config.py | 68 ++++++++++++++++++++++ openfisca_core/entities/types.py | 1 - openfisca_core/indexed_enums/enum.py | 11 ++-- openfisca_core/indexed_enums/enum_array.py | 32 +++++----- openfisca_core/indexed_enums/types.py | 29 +++++---- openfisca_core/periods/types.py | 5 -- openfisca_core/types.py | 36 +++++++++++- openfisca_core/variables/__init__.py | 2 +- openfisca_core/variables/config.py | 52 ----------------- openfisca_core/variables/variable.py | 19 +++--- openfisca_tasks/lint.mk | 1 + openfisca_web_api/loader/variables.py | 4 +- 12 files changed, 156 insertions(+), 104 deletions(-) create mode 100644 openfisca_core/config.py diff --git a/openfisca_core/config.py b/openfisca_core/config.py new file mode 100644 index 000000000..c2e4e6ebb --- /dev/null +++ b/openfisca_core/config.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from typing import Final, final + +import dataclasses +import datetime + +from openfisca_core import indexed_enums as enum + +from . import types as t + + +@final +@dataclasses.dataclass(frozen=True) +class ValueType: + dtype: t.DTypeLike + default: object + json_type: str + formatted_value_type: str + is_period_size_independent: bool + + +value_types: Final = { + bool: ValueType( + dtype=t.DTypeBool, + default=False, + json_type="boolean", + formatted_value_type="Boolean", + is_period_size_independent=True, + ), + int: ValueType( + dtype=t.DTypeInt, + default=0, + json_type="integer", + formatted_value_type="Int", + is_period_size_independent=False, + ), + float: ValueType( + dtype=t.DTypeFloat, + default=0, + json_type="number", + formatted_value_type="Float", + is_period_size_independent=False, + ), + str: ValueType( + dtype=t.DTypeStr, + default="", + json_type="string", + formatted_value_type="String", + is_period_size_independent=True, + ), + enum.Enum: ValueType( + dtype=t.DTypeEnum, + default=None, + json_type="string", + formatted_value_type="String", + is_period_size_independent=True, + ), + datetime.date: ValueType( + dtype="datetime64[D]", + default=None, + json_type="string", + formatted_value_type="Date", + is_period_size_independent=True, + ), +} + +__all__ = ["value_types"] diff --git a/openfisca_core/entities/types.py b/openfisca_core/entities/types.py index ef6af9024..2f05954ed 100644 --- a/openfisca_core/entities/types.py +++ b/openfisca_core/entities/types.py @@ -33,7 +33,6 @@ class RoleParams(TypedDict, total=False): "GroupEntity", "Role", "RoleKey", - "RoleParams", "RolePlural", "SingleEntity", "TaxBenefitSystem", diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 960c16b91..fe67f4f31 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -29,10 +29,7 @@ def __init__(self, name: str) -> None: def encode( cls, array: ( - EnumArray - | t.Array[numpy.str_] - | t.Array[numpy.int16] - | t.Array[numpy.int32] + EnumArray | t.Array[t.DTypeEnum] | t.Array[t.DTypeInt] | t.Array[t.DTypeStr] ), ) -> EnumArray: """Encode a string numpy array, an enum item numpy array, or an int numpy @@ -68,7 +65,7 @@ def encode( array = numpy.select( [array == item.name for item in cls], [item.index for item in cls], - ).astype(numpy.int16) + ).astype(t.DTypeEnum) # Enum items arrays elif array.dtype.kind == "O": @@ -92,9 +89,9 @@ def encode( array = numpy.select( [array == item for item in klass], [item.index for item in klass], - ).astype(numpy.int16) + ).astype(t.DTypeEnum) - array = numpy.asarray(array, dtype=numpy.int16) + array = numpy.asarray(array, dtype=t.DTypeEnum) return EnumArray(array, cls) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index d8530f8d1..ce74ddae2 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -1,7 +1,7 @@ from __future__ import annotations from typing import NoReturn, overload -from typing_extensions import TypeGuard +from typing_extensions import Self, TypeGuard import numpy @@ -25,35 +25,35 @@ def __new__( cls, input_array: t.Array[numpy.int16], possible_values: None | type[t.Enum] = None, - ) -> EnumArray: + ) -> Self: obj = numpy.asarray(input_array).view(cls) obj.possible_values = possible_values return obj # See previous comment def __array_finalize__( - self, obj: None | t.EnumArray | t.Array[numpy.generic] + self, obj: None | t.EnumArray | t.Array[t.DTypeObject] ) -> None: if obj is None: - return None + return if isinstance(obj, EnumArray): self.possible_values = obj.possible_values - return None + return @overload # type: ignore[override] - def __eq__(self, other: None | t.Enum | type[t.Enum]) -> t.Array[numpy.bool_]: ... + def __eq__(self, other: None | t.Enum | type[t.Enum]) -> t.Array[t.DTypeBool]: ... @overload - def __eq__(self, other: object) -> t.Array[numpy.bool_] | bool: ... + def __eq__(self, other: object) -> t.Array[t.DTypeBool] | bool: ... - def __eq__(self, other: object) -> t.Array[numpy.bool_] | bool: - boolean_array: t.Array[numpy.bool_] + def __eq__(self, other: object) -> t.Array[t.DTypeBool] | bool: + boolean_array: t.Array[t.DTypeBool] boolean: bool if self.possible_values is None: return NotImplemented - view: t.Array[numpy.int16] = self.view(numpy.ndarray) + view: t.Array[t.DTypeEnum] = self.view(numpy.ndarray) if other is None or self._is_an_enum_type(other): boolean_array = view == other @@ -71,12 +71,12 @@ def __eq__(self, other: object) -> t.Array[numpy.bool_] | bool: return boolean @overload # type: ignore[override] - def __ne__(self, other: None | t.Enum | type[t.Enum]) -> t.Array[numpy.bool_]: ... + def __ne__(self, other: None | t.Enum | type[t.Enum]) -> t.Array[t.DTypeBool]: ... @overload - def __ne__(self, other: object) -> t.Array[numpy.bool_] | bool: ... + def __ne__(self, other: object) -> t.Array[t.DTypeBool] | bool: ... - def __ne__(self, other: object) -> t.Array[numpy.bool_] | bool: + def __ne__(self, other: object) -> t.Array[t.DTypeBool] | bool: return numpy.logical_not(self == other) def _forbidden_operation(self, other: object) -> NoReturn: @@ -97,7 +97,7 @@ def _forbidden_operation(self, other: object) -> NoReturn: __and__ = _forbidden_operation # type: ignore[assignment] __or__ = _forbidden_operation # type: ignore[assignment] - def decode(self) -> t.Array[numpy.int16]: + def decode(self) -> t.Array[t.DTypeEnum]: """Return the array of enum items corresponding to self. For instance: @@ -116,10 +116,10 @@ def decode(self) -> t.Array[numpy.int16]: return numpy.select( [self == item.index for item in self.possible_values], - [item for item in self.possible_values], # type: ignore[misc] + list(self.possible_values), # pyright: ignore[reportArgumentType] ) - def decode_to_str(self) -> t.Array[numpy.str_]: + def decode_to_str(self) -> t.Array[t.DTypeStr]: """Return the array of string identifiers corresponding to self. For instance: diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index ac47be816..34bc43af3 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,16 +1,25 @@ -from openfisca_core.types import Array +from openfisca_core.types import ( + Array, + DTypeBool, + DTypeEnum, + DTypeInt, + DTypeObject, + DTypeStr, + Enum, +) import abc -import enum -import numpy +class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): ... -class Enum(enum.Enum): - index: int - -class EnumArray(Array[numpy.int16], metaclass=abc.ABCMeta): ... - - -__all__ = ["Array", "Enum", "EnumArray"] +__all__ = [ + "Array", + "DTypeBool", + "DTypeEnum", + "DTypeInt", + "DTypeObject", + "DTypeStr", + "Enum", +] diff --git a/openfisca_core/periods/types.py b/openfisca_core/periods/types.py index 092509c62..952c2b280 100644 --- a/openfisca_core/periods/types.py +++ b/openfisca_core/periods/types.py @@ -173,11 +173,6 @@ class PeriodStr(str, metaclass=_PeriodStrMeta): # type: ignore[misc] __all__ = [ "DateUnit", - "ISOCalendarStr", - "ISOFormatStr", "Instant", - "InstantStr", "Period", - "PeriodStr", - "SeqInt", ] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index 711e6c512..0518887f8 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -1,13 +1,17 @@ from __future__ import annotations from collections.abc import Iterable, Sequence, Sized -from numpy.typing import NDArray +from numpy.typing import DTypeLike, NDArray from typing import Any, NewType, TypeVar, Union from typing_extensions import Protocol, TypeAlias +import enum + import numpy import pendulum +# Config + _N_co = TypeVar("_N_co", bound=numpy.generic, covariant=True) #: Type representing an numpy array. @@ -21,6 +25,26 @@ #: Generic type vars. _T_co = TypeVar("_T_co", covariant=True) +#: Type for bool arrays. +DTypeBool: TypeAlias = numpy.bool_ + +#: Type for int arrays. +DTypeInt: TypeAlias = numpy.int32 + +#: Type for float arrays. +DTypeFloat: TypeAlias = numpy.float32 + +#: Type for string arrays. +DTypeStr: TypeAlias = numpy.str_ + +#: Type for Enum arrays. +DTypeEnum: TypeAlias = numpy.int16 + +#: Type for date arrays. +DTypeDate: TypeAlias = numpy.datetime64 + +#: Type for "any" arrays. +DTypeObject: TypeAlias = numpy.object_ # Entities @@ -80,6 +104,13 @@ def clone(self, population: Any, /) -> Holder: ... def get_memory_usage(self, /) -> Any: ... +# Indexed enums + + +class Enum(enum.Enum): + index: int + + # Parameters @@ -189,3 +220,6 @@ def __call__( class Params(Protocol): def __call__(self, instant: Instant, /) -> ParameterNodeAtInstant: ... + + +__all__ = ["DTypeLike"] diff --git a/openfisca_core/variables/__init__.py b/openfisca_core/variables/__init__.py index 1ab191c5c..31a436b09 100644 --- a/openfisca_core/variables/__init__.py +++ b/openfisca_core/variables/__init__.py @@ -21,6 +21,6 @@ # # See: https://www.python.org/dev/peps/pep-0008/#imports -from .config import FORMULA_NAME_PREFIX, VALUE_TYPES # noqa: F401 +from .config import FORMULA_NAME_PREFIX # noqa: F401 from .helpers import get_annualized_variable, get_neutralized_variable # noqa: F401 from .variable import Variable # noqa: F401 diff --git a/openfisca_core/variables/config.py b/openfisca_core/variables/config.py index 54270145b..043665e32 100644 --- a/openfisca_core/variables/config.py +++ b/openfisca_core/variables/config.py @@ -1,53 +1 @@ -import datetime - -import numpy - -from openfisca_core import indexed_enums -from openfisca_core.indexed_enums import Enum - -VALUE_TYPES = { - bool: { - "dtype": numpy.bool_, - "default": False, - "json_type": "boolean", - "formatted_value_type": "Boolean", - "is_period_size_independent": True, - }, - int: { - "dtype": numpy.int32, - "default": 0, - "json_type": "integer", - "formatted_value_type": "Int", - "is_period_size_independent": False, - }, - float: { - "dtype": numpy.float32, - "default": 0, - "json_type": "number", - "formatted_value_type": "Float", - "is_period_size_independent": False, - }, - str: { - "dtype": object, - "default": "", - "json_type": "string", - "formatted_value_type": "String", - "is_period_size_independent": True, - }, - Enum: { - "dtype": indexed_enums.ENUM_ARRAY_DTYPE, - "json_type": "string", - "formatted_value_type": "String", - "is_period_size_independent": True, - }, - datetime.date: { - "dtype": "datetime64[D]", - "default": datetime.date.fromtimestamp(0), # 0 == 1970-01-01 - "json_type": "string", - "formatted_value_type": "Date", - "is_period_size_independent": True, - }, -} - - FORMULA_NAME_PREFIX = "formula" diff --git a/openfisca_core/variables/variable.py b/openfisca_core/variables/variable.py index 926e4c59c..f409a7524 100644 --- a/openfisca_core/variables/variable.py +++ b/openfisca_core/variables/variable.py @@ -9,12 +9,13 @@ import numpy import sortedcontainers -from openfisca_core import commons, periods, types as t +from openfisca_core import commons, config, periods, types as t from openfisca_core.entities import Entity, GroupEntity from openfisca_core.indexed_enums import Enum, EnumArray from openfisca_core.periods import DateUnit, Period -from . import config, helpers +from . import helpers +from .config import FORMULA_NAME_PREFIX class Variable: @@ -111,10 +112,10 @@ def __init__(self, baseline_variable=None) -> None: attr, "value_type", required=True, - allowed_values=config.VALUE_TYPES.keys(), + allowed_values=config.value_types.keys(), ) - self.dtype = config.VALUE_TYPES[self.value_type]["dtype"] - self.json_type = config.VALUE_TYPES[self.value_type]["json_type"] + self.dtype = config.value_types[self.value_type].dtype + self.json_type = config.value_types[self.value_type].json_type if self.value_type == Enum: self.possible_values = self.set( attr, @@ -138,7 +139,7 @@ def __init__(self, baseline_variable=None) -> None: attr, "default_value", allowed_type=self.value_type, - default=config.VALUE_TYPES[self.value_type].get("default"), + default=config.value_types[self.value_type].default, ) self.entity = self.set(attr, "entity", required=True, setter=self.set_entity) self.definition_period = self.set( @@ -166,7 +167,7 @@ def __init__(self, baseline_variable=None) -> None: attr, "is_period_size_independent", allowed_type=bool, - default=config.VALUE_TYPES[self.value_type]["is_period_size_independent"], + default=config.value_types[self.value_type].is_period_size_independent, ) self.introspection_data = self.set( @@ -176,7 +177,7 @@ def __init__(self, baseline_variable=None) -> None: formulas_attr, unexpected_attrs = helpers._partition( attr, - lambda name, value: name.startswith(config.FORMULA_NAME_PREFIX), + lambda name, value: name.startswith(FORMULA_NAME_PREFIX), ) self.formulas = self.set_formulas(formulas_attr) @@ -351,7 +352,7 @@ def raise_error() -> NoReturn: msg, ) - if attribute_name == config.FORMULA_NAME_PREFIX: + if attribute_name == FORMULA_NAME_PREFIX: return datetime.date.min FORMULA_REGEX = r"formula_(\d{4})(?:_(\d{2}))?(?:_(\d{2}))?$" # YYYY or YYYY_MM or YYYY_MM_DD diff --git a/openfisca_tasks/lint.mk b/openfisca_tasks/lint.mk index 656e44a11..6b5511c9b 100644 --- a/openfisca_tasks/lint.mk +++ b/openfisca_tasks/lint.mk @@ -43,6 +43,7 @@ check-types: openfisca_core/entities \ openfisca_core/indexed_enums \ openfisca_core/periods \ + openfisca_core/config.py \ openfisca_core/types.py @$(call print_pass,$@:) diff --git a/openfisca_web_api/loader/variables.py b/openfisca_web_api/loader/variables.py index 6730dc081..a516df5e7 100644 --- a/openfisca_web_api/loader/variables.py +++ b/openfisca_web_api/loader/variables.py @@ -2,8 +2,8 @@ import inspect import textwrap +from openfisca_core import config from openfisca_core.indexed_enums import Enum -from openfisca_core.variables import VALUE_TYPES def get_next_day(date): @@ -76,7 +76,7 @@ def build_variable(variable, country_package_metadata): result = { "id": variable.name, "description": variable.label, - "valueType": VALUE_TYPES[variable.value_type]["formatted_value_type"], + "valueType": config.value_types[variable.value_type].formatted_value_type, "defaultValue": get_default_value(variable), "definitionPeriod": variable.definition_period.upper(), "entity": variable.entity.key,