From 0adc62d0730451c6c10ec75006ad740e725ee6a5 Mon Sep 17 00:00:00 2001 From: Fridolin Glatter Date: Tue, 17 Dec 2024 14:18:09 +0100 Subject: [PATCH] Normalize optimization.table DB model --- ixmp4/core/optimization/table.py | 11 +- ixmp4/data/abstract/optimization/table.py | 13 +- ixmp4/data/api/optimization/table.py | 6 +- ixmp4/data/db/base.py | 2 + ixmp4/data/db/optimization/__init__.py | 2 +- .../data/db/optimization/indexset/__init__.py | 2 +- .../db/optimization/indexset/repository.py | 4 +- ixmp4/data/db/optimization/table/model.py | 155 ++++++++++++++++-- .../data/db/optimization/table/repository.py | 112 ++++++------- ixmp4/data/db/optimization/utils.py | 23 ++- ixmp4/data/db/run/repository.py | 6 +- ixmp4/data/types.py | 7 +- ixmp4/db/__init__.py | 6 + tests/core/test_optimization_table.py | 48 +++--- tests/data/test_optimization_table.py | 59 +++---- 15 files changed, 290 insertions(+), 166 deletions(-) diff --git a/ixmp4/core/optimization/table.py b/ixmp4/core/optimization/table.py index 5b9ef7c9..e55ee7a6 100644 --- a/ixmp4/core/optimization/table.py +++ b/ixmp4/core/optimization/table.py @@ -14,7 +14,6 @@ from ixmp4.data.abstract import Docs as DocsModel from ixmp4.data.abstract import Run from ixmp4.data.abstract import Table as TableModel -from ixmp4.data.abstract.optimization import Column class Table(BaseModelFacade): @@ -35,7 +34,7 @@ def run_id(self) -> int: return self._model.run__id @property - def data(self) -> dict[str, Any]: + def data(self) -> dict[str, list[float] | list[int] | list[str]]: return self._model.data def add(self, data: dict[str, Any] | pd.DataFrame) -> None: @@ -46,12 +45,12 @@ def add(self, data: dict[str, Any] | pd.DataFrame) -> None: ).data @property - def constrained_to_indexsets(self) -> list[str]: - return [column.indexset.name for column in self._model.columns] + def indexsets(self) -> list[str]: + return self._model.indexsets @property - def columns(self) -> list[Column]: - return self._model.columns + def column_names(self) -> list[str] | None: + return self._model.column_names @property def created_at(self) -> datetime | None: diff --git a/ixmp4/data/abstract/optimization/table.py b/ixmp4/data/abstract/optimization/table.py index 4447d145..b52de7cf 100644 --- a/ixmp4/data/abstract/optimization/table.py +++ b/ixmp4/data/abstract/optimization/table.py @@ -13,7 +13,6 @@ from .. import base from ..docs import DocsRepository -from .column import Column class Table(base.BaseModel, Protocol): @@ -21,10 +20,12 @@ class Table(base.BaseModel, Protocol): name: types.String """Unique name of the Table.""" - data: types.JsonDict + data: types.Mapped[dict[str, list[float] | list[int] | list[str]]] """Data stored in the Table.""" - columns: types.Mapped[list[Column]] - """Data specifying this Table's Columns.""" + indexsets: types.Mapped[list[str]] + """List of the names of the IndexSets the Table is bound to.""" + column_names: types.Mapped[list[str] | None] + """List of the Table's column names, if distinct from the IndexSet names.""" run__id: types.Integer "Foreign unique integer id of a run." @@ -56,7 +57,7 @@ def create( """Creates a Table. Each column of the Table needs to be constrained to an existing - :class:ixmp4.data.abstract.optimization.IndexSet. These are specified by name + :class:`ixmp4.data.abstract.optimization.IndexSet`. These are specified by name and per default, these will be the column names. They can be overwritten by specifying `column_names`, which needs to specify a unique name for each column. @@ -78,7 +79,7 @@ def create( ------ :class:`ixmp4.data.abstract.optimization.Table.NotUnique`: If the Table with `name` already exists for the Run with `run_id`. - ValueError + :class:`ixmp4.core.exceptions.OptimizationItemUsageError`: If `column_names` are not unique or not enough names are given. Returns diff --git a/ixmp4/data/api/optimization/table.py b/ixmp4/data/api/optimization/table.py index b6924990..0236012d 100644 --- a/ixmp4/data/api/optimization/table.py +++ b/ixmp4/data/api/optimization/table.py @@ -14,7 +14,6 @@ from .. import base from ..docs import Docs, DocsRepository -from .column import Column class Table(base.BaseModel): @@ -24,8 +23,9 @@ class Table(base.BaseModel): id: int name: str - data: dict[str, Any] - columns: list["Column"] + data: dict[str, list[float] | list[int] | list[str]] + indexsets: list[str] + column_names: list[str] | None run__id: int created_at: datetime | None diff --git a/ixmp4/data/db/base.py b/ixmp4/data/db/base.py index 971737fe..7e859fd1 100644 --- a/ixmp4/data/db/base.py +++ b/ixmp4/data/db/base.py @@ -125,6 +125,8 @@ class CreateKwargs(TypedDict, total=False): parameters: Mapping[str, Any] run_id: int unit_name: str | None + column_names: list[str] | None + constrained_to_indexsets: list[str] class Creator(BaseRepository[ModelType], abstract.Creator): diff --git a/ixmp4/data/db/optimization/__init__.py b/ixmp4/data/db/optimization/__init__.py index f2988cb7..1077edb5 100644 --- a/ixmp4/data/db/optimization/__init__.py +++ b/ixmp4/data/db/optimization/__init__.py @@ -1,6 +1,6 @@ from .column import Column, ColumnRepository from .equation import Equation, EquationRepository -from .indexset import IndexSet, IndexSetRepository +from .indexset import IndexSet, IndexSetData, IndexSetRepository from .parameter import Parameter, ParameterRepository from .scalar import Scalar, ScalarRepository from .table import Table, TableRepository diff --git a/ixmp4/data/db/optimization/indexset/__init__.py b/ixmp4/data/db/optimization/indexset/__init__.py index c6f1275f..bf846cca 100644 --- a/ixmp4/data/db/optimization/indexset/__init__.py +++ b/ixmp4/data/db/optimization/indexset/__init__.py @@ -1,2 +1,2 @@ -from .model import IndexSet +from .model import IndexSet, IndexSetData from .repository import IndexSetRepository diff --git a/ixmp4/data/db/optimization/indexset/repository.py b/ixmp4/data/db/optimization/indexset/repository.py index 69e5eac8..594b266e 100644 --- a/ixmp4/data/db/optimization/indexset/repository.py +++ b/ixmp4/data/db/optimization/indexset/repository.py @@ -78,9 +78,7 @@ def add_data( indexset = self.get_by_id(id=indexset_id) _data = data if isinstance(data, list) else [data] - bulk_insert_enabled_data: list[dict[str, str]] = [ - {"value": str(d)} for d in _data - ] + bulk_insert_enabled_data = [{"value": str(d)} for d in _data] try: self.session.execute( db.insert(IndexSetData).values(indexset__id=indexset_id), diff --git a/ixmp4/data/db/optimization/table/model.py b/ixmp4/data/db/optimization/table/model.py index 1143007a..56e61020 100644 --- a/ixmp4/data/db/optimization/table/model.py +++ b/ixmp4/data/db/optimization/table/model.py @@ -1,14 +1,26 @@ -from typing import Any, ClassVar +from typing import ClassVar, Literal, cast -from sqlalchemy.orm import Mapped as Mapped -from sqlalchemy.orm import validates +import pandas as pd from ixmp4 import db from ixmp4.core.exceptions import OptimizationDataValidationError from ixmp4.data import types from ixmp4.data.abstract import optimization as abstract -from .. import Column, base, utils +from .. import IndexSet, base + + +class TableIndexsetAssociation(base.RootBaseModel): + table_prefix = "optimization_" + + table_id: types.TableId + table: types.Mapped["Table"] = db.relationship( + back_populates="_table_indexset_associations" + ) + indexset_id: types.IndexSetId + indexset: types.Mapped[IndexSet] = db.relationship() + + column_name: types.String = db.Column(db.String(255), nullable=True) class Table(base.BaseModel): @@ -18,20 +30,129 @@ class Table(base.BaseModel): DataInvalid: ClassVar = OptimizationDataValidationError DeletionPrevented: ClassVar = abstract.Table.DeletionPrevented - # constrained_to_indexsets: ClassVar[list[str] | None] = None - run__id: types.RunId - columns: types.Mapped[list["Column"]] = db.relationship() - data: types.JsonDict = db.Column(db.JsonType, nullable=False, default={}) - # TODO: should we pass self to validate_data to raise more specific errors? + __table_args__ = (db.UniqueConstraint("name", "run__id"),) - @validates("data") - def validate_data(self, key: Any, data: dict[str, Any]) -> dict[str, Any]: - return utils.validate_data( - host=self, - data=data, - columns=self.columns, - ) + _table_indexset_associations: types.Mapped[list[TableIndexsetAssociation]] = ( + db.relationship(back_populates="table", cascade="all, delete-orphan") + ) - __table_args__ = (db.UniqueConstraint("name", "run__id"),) + _indexsets: db.AssociationProxy[list[IndexSet]] = db.association_proxy( + "_table_indexset_associations", "indexset" + ) + _column_names: db.AssociationProxy[list[str | None]] = db.association_proxy( + "_table_indexset_associations", "column_name" + ) + + @property + def indexsets(self) -> list[str]: + return [indexset.name for indexset in self._indexsets] + + @property + def column_names(self) -> list[str] | None: + return cast(list[str], self._column_names) if any(self._column_names) else None + + _data: types.Mapped[list["TableData"]] = db.relationship( + back_populates="table", order_by="TableData.id" + ) + + @property + def data(self) -> dict[str, list[float] | list[int] | list[str]]: + if self._data == []: + return {} + else: + renames: dict[str, str] = {} + type_map: dict[str, str] = {} + if self.column_names: + for i in range(len(self.column_names)): + renames[f"Column {i}"] = self.column_names[i] + # would only be None if indexset had no data + type_map[self.column_names[i]] = cast( + Literal["float", "int", "str"], self._indexsets[i]._data_type + ) + else: + for i in range(len(self.indexsets)): + renames[f"Column {i}"] = self.indexsets[i] + type_map[self.indexsets[i]] = cast( + Literal["float", "int", "str"], self._indexsets[i]._data_type + ) + return cast( + dict[str, list[float] | list[int] | list[str]], + pd.DataFrame.from_records( + [ + { + "Column 0": td.value_0, + "Column 1": td.value_1, + "Column 2": td.value_2, + "Column 3": td.value_3, + "Column 4": td.value_4, + "Column 5": td.value_5, + "Column 6": td.value_6, + "Column 7": td.value_7, + "Column 8": td.value_8, + "Column 9": td.value_9, + "Column 10": td.value_10, + "Column 11": td.value_11, + "Column 12": td.value_12, + "Column 13": td.value_13, + "Column 14": td.value_14, + } + for td in self._data + ] + ) + .dropna(axis="columns") + .rename(renames, axis="columns") + .astype(type_map) + .to_dict(orient="list"), + ) + + @data.setter + def data( + self, value: dict[str, list[float] | list[int] | list[str]] | pd.DataFrame + ) -> None: + return None + + +class TableData(base.RootBaseModel): + table_prefix = "optimization_" + + table: types.Mapped["Table"] = db.relationship(back_populates="_data") + table__id: types.TableId + + value_0: types.String = db.Column(db.String, nullable=False) + value_1: types.String = db.Column(db.String, nullable=True) + value_2: types.String = db.Column(db.String, nullable=True) + value_3: types.String = db.Column(db.String, nullable=True) + value_4: types.String = db.Column(db.String, nullable=True) + value_5: types.String = db.Column(db.String, nullable=True) + value_6: types.String = db.Column(db.String, nullable=True) + value_7: types.String = db.Column(db.String, nullable=True) + value_8: types.String = db.Column(db.String, nullable=True) + value_9: types.String = db.Column(db.String, nullable=True) + value_10: types.String = db.Column(db.String, nullable=True) + value_11: types.String = db.Column(db.String, nullable=True) + value_12: types.String = db.Column(db.String, nullable=True) + value_13: types.String = db.Column(db.String, nullable=True) + value_14: types.String = db.Column(db.String, nullable=True) + + __table_args__ = ( + db.UniqueConstraint( + "table__id", + "value_0", + "value_1", + "value_2", + "value_3", + "value_4", + "value_5", + "value_6", + "value_7", + "value_8", + "value_9", + "value_10", + "value_11", + "value_12", + "value_13", + "value_14", + ), + ) diff --git a/ixmp4/data/db/optimization/table/repository.py b/ixmp4/data/db/optimization/table/repository.py index dcdad5be..98f8cf22 100644 --- a/ixmp4/data/db/optimization/table/repository.py +++ b/ixmp4/data/db/optimization/table/repository.py @@ -1,5 +1,5 @@ from collections.abc import Iterable -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast # TODO Import this from typing when dropping Python 3.11 from typing_extensions import Unpack @@ -10,13 +10,16 @@ import pandas as pd from ixmp4 import db -from ixmp4.core.exceptions import OptimizationItemUsageError +from ixmp4.core.exceptions import ( + OptimizationDataValidationError, + OptimizationItemUsageError, +) from ixmp4.data.abstract import optimization as abstract from ixmp4.data.auth.decorators import guard -from .. import ColumnRepository, base +from .. import ColumnRepository, base, utils from .docs import TableDocsRepository -from .model import Table +from .model import Table, TableData, TableIndexsetAssociation class TableRepository( @@ -38,48 +41,25 @@ def __init__(self, *args: "SqlAlchemyBackend") -> None: self.filter_class = OptimizationTableFilter - def _add_column( # type: ignore[no-untyped-def] + def add( self, run_id: int, - table_id: int, - column_name: str, - indexset_name: str, - **kwargs, - ) -> None: - r"""Adds a Column to a Table. - - Parameters - ---------- - run_id : int - The id of the :class:`ixmp4.data.abstract.Run` for which the - :class:`ixmp4.data.abstract.optimization.Table` is defined. - table_id : int - The id of the :class:`ixmp4.data.abstract.optimization.Table`. - column_name : str - The name of the Column, which must be unique in connection with the names of - :class:`ixmp4.data.abstract.Run` and - :class:`ixmp4.data.abstract.optimization.Table`. - indexset_name : str - The name of the :class:`ixmp4.data.abstract.optimization.IndexSet` the - Column will be linked to. - \*\*kwargs: any - Keyword arguments to be passed to - :func:`ixmp4.data.abstract.optimization.Column.create`. - """ - indexset = self.backend.optimization.indexsets.get( - run_id=run_id, name=indexset_name - ) - self.columns.create( - name=column_name, - constrained_to_indexset=indexset.id, - dtype=pd.Series(indexset.data).dtype.name, - table_id=table_id, - unique=True, - **kwargs, + name: str, + constrained_to_indexsets: list[str], + column_names: list[str] | None = None, + ) -> Table: + table = Table(name=name, run__id=run_id) + indexsets = self.backend.optimization.indexsets.list( + name__in=constrained_to_indexsets ) - def add(self, run_id: int, name: str) -> Table: - table = Table(name=name, run__id=run_id) + for i in range(len(indexsets)): + _ = TableIndexsetAssociation( + table=table, + indexset=indexsets[i], + column_name=column_names[i] if column_names else None, + ) + self.session.add(table) return table @@ -125,15 +105,12 @@ def create( "The given `column_names` are not unique!" ) - table = super().create(run_id=run_id, name=name) - for i, name in enumerate(constrained_to_indexsets): - self._add_column( - run_id=run_id, - table_id=table.id, - column_name=column_names[i] if column_names else name, - indexset_name=name, - ) - + table = super().create( + run_id=run_id, + name=name, + constrained_to_indexsets=constrained_to_indexsets, + column_names=column_names, + ) return table @guard("view") @@ -146,12 +123,35 @@ def tabulate(self, **kwargs: Unpack["base.EnumerateKwargs"]) -> pd.DataFrame: @guard("edit") def add_data(self, table_id: int, data: dict[str, Any] | pd.DataFrame) -> None: - if isinstance(data, dict): - data = pd.DataFrame.from_dict(data=data) table = self.get_by_id(id=table_id) - table.data = pd.concat([pd.DataFrame.from_dict(table.data), data]).to_dict( - orient="list" - ) # type: ignore[assignment] + data = pd.DataFrame.from_dict( + data=utils.validate_data( + host=table, + data=data, + columns=table._indexsets, + column_names=table.column_names, + ) + ) + + column_names = table.column_names if table.column_names else table.indexsets + + # Ensure column order is the same as table.indexsets + data = data[column_names] + renames = {name: f"value_{i}" for i, name in enumerate(column_names)} + data.rename(renames, axis="columns", inplace=True) + + bulk_insert_enabled_data = cast( + list[dict[str, str]], data.to_dict(orient="records") + ) + + try: + self.session.execute( + db.insert(TableData).values(table__id=table_id), + bulk_insert_enabled_data, + ) + except db.IntegrityError as e: + self.session.rollback() + raise OptimizationDataValidationError from e self.session.commit() diff --git a/ixmp4/data/db/optimization/utils.py b/ixmp4/data/db/optimization/utils.py index d612b4f3..ae2b73ff 100644 --- a/ixmp4/data/db/optimization/utils.py +++ b/ixmp4/data/db/optimization/utils.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: from .column import Column + from .indexset import IndexSet def collect_indexsets_to_check( @@ -17,9 +18,14 @@ def collect_indexsets_to_check( def validate_data( - host: base.BaseModel, data: dict[str, Any], columns: list["Column"] + host: base.BaseModel, + data: dict[str, Any] | pd.DataFrame, + columns: list["Column"] | list["IndexSet"], + column_names: list[str] | None = None, ) -> dict[str, Any]: - data_frame: pd.DataFrame = pd.DataFrame.from_dict(data) + data_frame = ( + data if isinstance(data, pd.DataFrame) else pd.DataFrame.from_dict(data) + ) # TODO for all of the following, we might want to create unique exceptions # Could me make both more specific by specifiying missing/extra columns? if len(data_frame.columns) < len(columns): @@ -51,7 +57,18 @@ def validate_data( # Can we make this more specific? Iterating over columns; if any is False, # return its name or something? - limited_to_indexsets = collect_indexsets_to_check(columns=columns) + # TODO adapt once we remove Columns as a class + # No way to properly type check generics + try: + # columns are indexsets + limited_to_indexsets = ( + {column.name: column.data for column in columns} # type: ignore[union-attr] + if not column_names + else {column_names[i]: columns[i].data for i in range(len(columns))} # type: ignore[union-attr] + ) + except AttributeError: + # columns are columns + limited_to_indexsets = collect_indexsets_to_check(columns=columns) # type: ignore[arg-type] if not data_frame.isin(limited_to_indexsets).all(axis=None): raise host.DataInvalid( f"While handling {host.__str__()}: \n" diff --git a/ixmp4/data/db/run/repository.py b/ixmp4/data/db/run/repository.py index 2a92580d..0f671ff9 100644 --- a/ixmp4/data/db/run/repository.py +++ b/ixmp4/data/db/run/repository.py @@ -258,10 +258,8 @@ def clone( new_table = self.backend.optimization.tables.create( run_id=run.id, name=table.name, - constrained_to_indexsets=[ - column.indexset.name for column in table.columns - ], - column_names=[column.name for column in table.columns], + constrained_to_indexsets=table.indexsets, + column_names=table.column_names, ) self.backend.optimization.tables.add_data( table_id=new_table.id, data=table.data diff --git a/ixmp4/data/types.py b/ixmp4/data/types.py index 2a255d21..066e3c2a 100644 --- a/ixmp4/data/types.py +++ b/ixmp4/data/types.py @@ -10,11 +10,12 @@ Float = Mapped[float] IndexSetId = Mapped[db.IndexSetIdType] Integer = Mapped[int] -OptimizationDataList = Mapped[list[float] | list[int] | list[str]] JsonDict = Mapped[dict[str, Any]] +Name = Mapped[db.NameType] +OptimizationDataList = Mapped[list[float] | list[int] | list[str]] OptimizationDataType = Mapped[Literal["float", "int", "str"] | None] +RunId = Mapped[db.RunIdType] String = Mapped[str] -Name = Mapped[db.NameType] +TableId = Mapped[db.TableIdType] UniqueName = Mapped[db.UniqueNameType] -RunId = Mapped[db.RunIdType] Username = Mapped[db.UsernameType] diff --git a/ixmp4/db/__init__.py b/ixmp4/db/__init__.py index 7834e9a7..2581cb08 100644 --- a/ixmp4/db/__init__.py +++ b/ixmp4/db/__init__.py @@ -43,6 +43,7 @@ Index, Label, Sequence, + Table, UniqueConstraint, delete, exists, @@ -57,6 +58,7 @@ from sqlalchemy import Column as typing_column from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.exc import IntegrityError, MultipleResultsFound +from sqlalchemy.ext.associationproxy import AssociationProxy, association_proxy from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm import ( Bundle, @@ -83,6 +85,10 @@ # model JsonType = JsonType.with_variant(JSONB(), "postgresql") # type:ignore[no-untyped-call] NameType = Annotated[str, Column(String(255), nullable=False, unique=False)] +TableIdType = Annotated[ + int, + Column(Integer, ForeignKey("optimization_table.id"), nullable=False, index=True), +] RunIdType = Annotated[ int, Column(Integer, ForeignKey("run.id"), nullable=False, index=True), diff --git a/tests/core/test_optimization_table.py b/tests/core/test_optimization_table.py index abe14b6c..b0e0a05b 100644 --- a/tests/core/test_optimization_table.py +++ b/tests/core/test_optimization_table.py @@ -18,7 +18,6 @@ def df_from_list(tables: list[Table]) -> pd.DataFrame: [ [ table.run_id, - table.data, table.name, table.id, table.created_at, @@ -28,7 +27,6 @@ def df_from_list(tables: list[Table]) -> pd.DataFrame: ], columns=[ "run__id", - "data", "name", "id", "created_at", @@ -54,8 +52,8 @@ def test_create_table(self, platform: ixmp4.Platform) -> None: assert table.id == 1 assert table.name == "Table 1" assert table.data == {} - assert table.columns[0].name == indexset.name - assert table.constrained_to_indexsets == [indexset.name] + assert table.indexsets == [indexset.name] + assert table.column_names is None # Test duplicate name raises with pytest.raises(Table.NotUnique): @@ -77,7 +75,7 @@ def test_create_table(self, platform: ixmp4.Platform) -> None: constrained_to_indexsets=[indexset.name], column_names=["Column 1"], ) - assert table_2.columns[0].name == "Column 1" + assert table_2.column_names == ["Column 1"] # Test duplicate column_names raise with pytest.raises( @@ -89,16 +87,6 @@ def test_create_table(self, platform: ixmp4.Platform) -> None: column_names=["Column 1", "Column 1"], ) - # Test column.dtype is registered correctly - indexset_2.add(data=2024) - table_3 = run.optimization.tables.create( - "Table 5", - constrained_to_indexsets=[indexset.name, indexset_2.name], - ) - # If indexset doesn't have data, a generic dtype is registered - assert table_3.columns[0].dtype == "object" - assert table_3.columns[1].dtype == "int64" - def test_get_table(self, platform: ixmp4.Platform) -> None: run = platform.runs.create("Model", "Scenario") (indexset,) = create_indexsets_for_run( @@ -112,8 +100,7 @@ def test_get_table(self, platform: ixmp4.Platform) -> None: assert table.id == 1 assert table.name == "Table" assert table.data == {} - assert table.columns[0].name == indexset.name - assert table.constrained_to_indexsets == [indexset.name] + assert table.indexsets == [indexset.name] with pytest.raises(Table.NotFound): _ = run.optimization.tables.get(name="Table 2") @@ -170,6 +157,7 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: table_2.add(data=test_data_2) assert table_2.data == test_data_2 + # Test overwriting column names table_3 = run.optimization.tables.create( name="Table 3", constrained_to_indexsets=[indexset.name, indexset_2.name], @@ -195,21 +183,19 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: OptimizationDataValidationError, match="Trying to add data to unknown Columns!", ): - table_3.add({"Column 3": [1]}) + table_3.add({"Column 1": ["not there"], "Column 2": [2], "Column 3": [1]}) # Test that order is not important... table_4 = run.optimization.tables.create( - name="Table 4", - constrained_to_indexsets=[indexset.name, indexset_2.name], - column_names=["Column 1", "Column 2"], + name="Table 4", constrained_to_indexsets=[indexset.name, indexset_2.name] ) - test_data_4 = {"Column 2": [2], "Column 1": ["bar"]} + test_data_4 = {indexset_2.name: [2], indexset.name: ["bar"]} table_4.add(data=test_data_4) assert table_4.data == test_data_4 # ...even for expanding - table_4.add(data={"Column 1": ["foo"], "Column 2": [1]}) - assert table_4.data == {"Column 2": [2, 1], "Column 1": ["bar", "foo"]} + table_4.add(data={indexset.name: ["foo"], indexset_2.name: [1]}) + assert table_4.data == {indexset_2.name: [2, 1], indexset.name: ["bar", "foo"]} # This doesn't seem to test a distinct case compared to the above with pytest.raises( @@ -217,7 +203,11 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: match="Trying to add data to unknown Columns!", ): table_4.add( - data={"Column 1": ["bar"], "Column 2": [3], indexset.name: ["foo"]}, + data={ + indexset.name: ["bar"], + indexset_2.name: [3], + "Indexset": ["foo"], + }, ) # Test various data types @@ -234,9 +224,11 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: table_5.add(test_data_5) assert table_5.data == test_data_5 - # This doesn't raise since the union of existing and new data is validated - table_5.add(data={}) - assert table_5.data == test_data_5 + # This raises since only the new data are validated + with pytest.raises( + OptimizationDataValidationError, match="Data is missing for some Columns!" + ): + table_5.add(data={}) def test_list_tables(self, platform: ixmp4.Platform) -> None: run = platform.runs.create("Model", "Scenario") diff --git a/tests/data/test_optimization_table.py b/tests/data/test_optimization_table.py index 9f7aab06..6063fd22 100644 --- a/tests/data/test_optimization_table.py +++ b/tests/data/test_optimization_table.py @@ -16,7 +16,6 @@ def df_from_list(tables: list[Table]) -> pd.DataFrame: [ [ table.run__id, - table.data, table.name, table.id, table.created_at, @@ -26,7 +25,6 @@ def df_from_list(tables: list[Table]) -> pd.DataFrame: ], columns=[ "run__id", - "data", "name", "id", "created_at", @@ -40,18 +38,16 @@ def test_create_table(self, platform: ixmp4.Platform) -> None: run = platform.backend.runs.create("Model", "Scenario") # Test normal creation - indexset_1, indexset_2 = create_indexsets_for_run( - platform=platform, run_id=run.id - ) + indexset_1, _ = create_indexsets_for_run(platform=platform, run_id=run.id) table = platform.backend.optimization.tables.create( run_id=run.id, name="Table", constrained_to_indexsets=[indexset_1.name] ) assert table.run__id == run.id assert table.name == "Table" - assert table.data == {} # JsonDict type currently requires a dict, not None - assert table.columns[0].name == indexset_1.name - assert table.columns[0].constrained_to_indexset == indexset_1.id + assert table.data == {} + assert table.indexsets == [indexset_1.name] + assert table.column_names is None # Test duplicate name raises with pytest.raises(Table.NotUnique): @@ -75,7 +71,7 @@ def test_create_table(self, platform: ixmp4.Platform) -> None: constrained_to_indexsets=[indexset_1.name], column_names=["Column 1"], ) - assert table_2.columns[0].name == "Column 1" + assert table_2.column_names == ["Column 1"] # Test duplicate column_names raise with pytest.raises( @@ -88,20 +84,6 @@ def test_create_table(self, platform: ixmp4.Platform) -> None: column_names=["Column 1", "Column 1"], ) - # Test column.dtype is registered correctly - platform.backend.optimization.indexsets.add_data(indexset_2.id, data=2024) - indexset_2 = platform.backend.optimization.indexsets.get( - run.id, indexset_2.name - ) - table_3 = platform.backend.optimization.tables.create( - run_id=run.id, - name="Table 5", - constrained_to_indexsets=[indexset_1.name, indexset_2.name], - ) - # If indexset doesn't have data, a generic dtype is registered - assert table_3.columns[0].dtype == "object" - assert table_3.columns[1].dtype == "int64" - def test_get_table(self, platform: ixmp4.Platform) -> None: run = platform.backend.runs.create("Model", "Scenario") _, _ = create_indexsets_for_run(platform=platform, run_id=run.id) @@ -184,6 +166,7 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: ) assert table_2.data == test_data_2 + # Test overwriting column names table_3 = platform.backend.optimization.tables.create( run_id=run.id, name="Table 3", @@ -222,7 +205,8 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: match="Trying to add data to unknown Columns!", ): platform.backend.optimization.tables.add_data( - table_id=table_3.id, data={"Column 3": [1]} + table_id=table_3.id, + data={"Column 1": ["foo"], "Column 2": [1], "Column 3": [1]}, ) # Test that order is not important... @@ -230,9 +214,8 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: run_id=run.id, name="Table 4", constrained_to_indexsets=[indexset_1.name, indexset_2.name], - column_names=["Column 1", "Column 2"], ) - test_data_4 = {"Column 2": [2], "Column 1": ["bar"]} + test_data_4 = {indexset_2.name: [2], indexset_1.name: ["bar"]} platform.backend.optimization.tables.add_data( table_id=table_4.id, data=test_data_4 ) @@ -243,12 +226,15 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: # ...even for expanding platform.backend.optimization.tables.add_data( - table_id=table_4.id, data={"Column 1": ["foo"], "Column 2": [1]} + table_id=table_4.id, data={indexset_1.name: ["foo"], indexset_2.name: [1]} ) table_4 = platform.backend.optimization.tables.get( run_id=run.id, name="Table 4" ) - assert table_4.data == {"Column 2": [2, 1], "Column 1": ["bar", "foo"]} + assert table_4.data == { + indexset_2.name: [2, 1], + indexset_1.name: ["bar", "foo"], + } # This doesn't seem to test a distinct case compared to the above with pytest.raises( @@ -257,7 +243,11 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: ): platform.backend.optimization.tables.add_data( table_id=table_4.id, - data={"Column 1": ["bar"], "Column 2": [3], "Indexset": ["foo"]}, + data={ + indexset_1.name: ["bar"], + indexset_2.name: [3], + "Indexset": ["foo"], + }, ) # Test various data types @@ -285,12 +275,11 @@ def test_table_add_data(self, platform: ixmp4.Platform) -> None: ) assert table_5.data == test_data_5 - # This doesn't raise since the union of existing and new data is validated - platform.backend.optimization.tables.add_data(table_id=table_5.id, data={}) - table_5 = platform.backend.optimization.tables.get( - run_id=run.id, name="Table 5" - ) - assert table_5.data == test_data_5 + # This raises since only the new data are validated + with pytest.raises( + OptimizationDataValidationError, match="Data is missing for some Columns!" + ): + platform.backend.optimization.tables.add_data(table_id=table_5.id, data={}) def test_list_table(self, platform: ixmp4.Platform) -> None: run = platform.backend.runs.create("Model", "Scenario")