Move AttrSeries from reporting.utils to (new) reporting.attrseries

iiasa · Oct 3, 2019 · ddf476b · ddf476b
1 parent b7d1e3f
commit ddf476b
Show file tree

Hide file tree

Showing 5 changed files with 145 additions and 130 deletions.
diff --git a/doc/source/reporting.rst b/doc/source/reporting.rst
@@ -130,6 +130,10 @@ Computations
 .. automodule:: ixmp.reporting.computations
    :members:
 
+   Unless otherwise specified, these methods accept and return
+   :class:`Quantity <ixmp.reporting.utils.Quantity>` objects for data
+   arguments/return values.
+
    Calculations:
 
    .. autosummary::
@@ -154,7 +158,7 @@ Computations
 Utilities
 ---------
 
-.. autoclass:: ixmp.reporting.utils.AttrSeries
+.. autoclass:: ixmp.reporting.attrseries.AttrSeries
 
 .. automodule:: ixmp.reporting.utils
    :members:

diff --git a/ixmp/reporting/__init__.py b/ixmp/reporting/__init__.py
@@ -5,7 +5,7 @@
 # The core design pattern uses dask graphs; see
 # http://docs.dask.org/en/latest/spec.html
 # - Reporter.graph is a dictionary where:
-#   - keys are strings or ixmp.reporting.util.Key objects (which compare/hash
+#   - keys are strings or ixmp.reporting.key.Key objects (which compare/hash
 #     equal to their str() representation), and
 #   - values are 'computations' (the Reporter.add() docstring repeats the
 #     definition of computations from the above URL).
@@ -37,7 +37,12 @@
 import yaml
 
 from .key import Key
-from .utils import REPLACE_UNITS, keys_for_quantity, rename_dims, ureg
+from .utils import (
+    REPLACE_UNITS,
+    keys_for_quantity,
+    rename_dims,
+    ureg,
+)
 from . import computations
 from .describe import describe_recursive
 

diff --git a/ixmp/reporting/attrseries.py b/ixmp/reporting/attrseries.py
@@ -0,0 +1,124 @@
+from collections import OrderedDict
+from collections.abc import Collection
+from copy import deepcopy
+
+import pandas as pd
+from pandas.core.generic import NDFrame
+import xarray as xr
+
+
+class AttrSeries(pd.Series):
+    """:class:`pandas.Series` subclass imitating :class:`xarray.DataArray`.
+
+    Future versions of :mod:`ixmp.reporting` will use :class:`xarray.DataArray`
+    as :class:`Quantity`; however, because :mod:`xarray` currently lacks sparse
+    matrix support, ixmp quantities may be too large for available memory.
+
+    The AttrSeries class provides similar methods and behaviour to
+    :class:`xarray.DataArray`, such as an `attrs` dictionary for metadata, so
+    that :mod:`ixmp.reporting.computations` methods can use xarray-like syntax.
+    """
+
+    # normal properties
+    _metadata = ('attrs', )
+
+    def __init__(self, *args, **kwargs):
+        if 'attrs' in kwargs:
+            # Use provided attrs
+            attrs = kwargs.pop('attrs')
+        elif hasattr(args[0], 'attrs'):
+            # Use attrs from an xarray object
+            attrs = args[0].attrs.copy()
+
+            # pre-convert an pd.Series to preserve names and labels
+            args = list(args)
+            args[0] = args[0].to_series()
+        else:
+            # default empty
+            attrs = OrderedDict()
+
+        super().__init__(*args, **kwargs)
+
+        self.attrs = attrs
+
+    def assign_attrs(self, d):
+        self.attrs.update(d)
+        return self
+
+    def assign_coords(self, **kwargs):
+        return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())
+
+    @property
+    def coords(self):
+        """Read-only."""
+        return dict(zip(self.index.names, self.index.levels))
+
+    @property
+    def dims(self):
+        return tuple(self.index.names)
+
+    def sel(self, indexers=None, drop=False, **indexers_kwargs):
+        indexers = indexers or {}
+        indexers.update(indexers_kwargs)
+        if len(indexers) == 1:
+            level, key = list(indexers.items())[0]
+            if not isinstance(key, Collection) and not drop:
+                # When using .loc[] to select 1 label on 1 level, pandas drops
+                # the level. Use .xs() to avoid this behaviour unless drop=True
+                return AttrSeries(self.xs(key, level=level, drop_level=False))
+
+        idx = tuple(indexers.get(l, slice(None)) for l in self.index.names)
+        return AttrSeries(self.loc[idx])
+
+    def sum(self, *args, **kwargs):
+        try:
+            dim = kwargs.pop('dim')
+            if isinstance(self.index, pd.MultiIndex):
+                if len(dim) == len(self.index.names):
+                    # assume dimensions = full multi index, do simple sum
+                    obj = self
+                    kwargs = {}
+                else:
+                    # pivot and sum across columns
+                    obj = self.unstack(dim)
+                    kwargs['axis'] = 1
+            else:
+                if dim != [self.index.name]:
+                    raise ValueError(dim, self.index.name, self)
+                obj = super()
+                kwargs['level'] = dim
+        except KeyError:
+            obj = super()
+        return AttrSeries(obj.sum(*args, **kwargs))
+
+    def squeeze(self, *args, **kwargs):
+        kwargs.pop('drop')
+        return super().squeeze(*args, **kwargs) if len(self) > 1 else self
+
+    def as_xarray(self):
+        return xr.DataArray.from_series(self)
+
+    def transpose(self, *dims):
+        return self.reorder_levels(dims)
+
+    def to_dataframe(self):
+        return self.to_frame()
+
+    def to_series(self):
+        return self
+
+    @property
+    def _constructor(self):
+        return AttrSeries
+
+    def __finalize__(self, other, method=None, **kwargs):
+        """Propagate metadata from other to self.
+
+        This is identical to the version in pandas, except deepcopy() is added
+        so that the 'attrs' OrderedDict is not double-referenced.
+        """
+        if isinstance(other, NDFrame):
+            for name in self._metadata:
+                object.__setattr__(self, name,
+                                   deepcopy(getattr(other, name, None)))
+        return self
diff --git a/ixmp/reporting/utils.py b/ixmp/reporting/utils.py
@@ -1,22 +1,25 @@
-import collections
-from collections.abc import Collection
-from copy import deepcopy
 from functools import partial, reduce
 import logging
 from operator import mul
 
 import pandas as pd
-from pandas.core.generic import NDFrame
 import pint
 import xarray as xr
 
+from .attrseries import AttrSeries
 from .key import Key
 
 
 log = logging.getLogger(__name__)
 
 ureg = pint.UnitRegistry()
 
+# See also:
+# - docstring of attrseries.AttrSeries.
+# - test_report_size() for a test that shows how non-sparse xr.DataArray
+#   triggers MemoryError.
+Quantity = AttrSeries
+# Quantity = xr.DataArray
 
 # Replacements to apply to quantity units before parsing by pint
 REPLACE_UNITS = {
@@ -156,123 +159,6 @@ def invalid(unit):
     return unit
 
 
-class AttrSeries(pd.Series):
-    """:class:`pandas.Series` subclass imitating :class:`xarray.DataArray`.
-
-    Future versions of :mod:`ixmp.reporting` will use :class:`xarray.DataArray`
-    as :class:`Quantity`; however, because :mod:`xarray` currently lacks sparse
-    matrix support, ixmp quantities may be too large for memory.
-
-    The AttrSeries class provides similar methods and behaviour to
-    :class:`xarray.DataArray`, such as an `attrs` dictionary for metadata, so
-    that :mod:`ixmp.reporting.computations` methods can use xarray-like syntax.
-    """
-
-    # normal properties
-    _metadata = ('attrs', )
-
-    def __init__(self, *args, **kwargs):
-        if 'attrs' in kwargs:
-            # Use provided attrs
-            attrs = kwargs.pop('attrs')
-        elif hasattr(args[0], 'attrs'):
-            # Use attrs from an xarray object
-            attrs = args[0].attrs.copy()
-
-            # pre-convert an pd.Series to preserve names and labels
-            args = list(args)
-            args[0] = args[0].to_series()
-        else:
-            # default empty
-            attrs = collections.OrderedDict()
-
-        super().__init__(*args, **kwargs)
-
-        self.attrs = attrs
-
-    def assign_attrs(self, d):
-        self.attrs.update(d)
-        return self
-
-    def assign_coords(self, **kwargs):
-        return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())
-
-    @property
-    def coords(self):
-        """Read-only."""
-        return dict(zip(self.index.names, self.index.levels))
-
-    @property
-    def dims(self):
-        return tuple(self.index.names)
-
-    def sel(self, indexers=None, drop=False, **indexers_kwargs):
-        indexers = indexers or {}
-        indexers.update(indexers_kwargs)
-        if len(indexers) == 1:
-            level, key = list(indexers.items())[0]
-            if not isinstance(key, Collection) and not drop:
-                # When using .loc[] to select 1 label on 1 level, pandas drops
-                # the level. Use .xs() to avoid this behaviour unless drop=True
-                return AttrSeries(self.xs(key, level=level, drop_level=False))
-
-        idx = tuple(indexers.get(l, slice(None)) for l in self.index.names)
-        return AttrSeries(self.loc[idx])
-
-    def sum(self, *args, **kwargs):
-        try:
-            dim = kwargs.pop('dim')
-            if isinstance(self.index, pd.MultiIndex):
-                if len(dim) == len(self.index.names):
-                    # assume dimensions = full multi index, do simple sum
-                    obj = self
-                    kwargs = {}
-                else:
-                    # pivot and sum across columns
-                    obj = self.unstack(dim)
-                    kwargs['axis'] = 1
-            else:
-                if dim != [self.index.name]:
-                    raise ValueError(dim, self.index.name, self)
-                obj = super()
-                kwargs['level'] = dim
-        except KeyError:
-            obj = super()
-        return AttrSeries(obj.sum(*args, **kwargs))
-
-    def squeeze(self, *args, **kwargs):
-        kwargs.pop('drop')
-        return super().squeeze(*args, **kwargs) if len(self) > 1 else self
-
-    def as_xarray(self):
-        return xr.DataArray.from_series(self)
-
-    def transpose(self, *dims):
-        return self.reorder_levels(dims)
-
-    def to_dataframe(self):
-        return self.to_frame()
-
-    def to_series(self):
-        return self
-
-    @property
-    def _constructor(self):
-        return AttrSeries
-
-    def __finalize__(self, other, method=None, **kwargs):
-        """Propagate metadata from other to self.
-
-        This is identical to the version in pandas, except deepcopy() is added
-        so that the 'attrs' OrderedDict is not double-referenced.
-        """
-        if isinstance(other, NDFrame):
-            for name in self._metadata:
-                object.__setattr__(self, name,
-                                   deepcopy(getattr(other, name, None)))
-        return self
-
-
 def data_for_quantity(ix_type, name, column, scenario, filters=None):
     """Retrieve data from *scenario*.
 
@@ -342,7 +228,7 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
     # Convert to a Dataset, assign attrbutes and name
     # ds = xr.Dataset.from_dataframe(data)[column]
     # or to a new "Attribute Series"
-    ds = AttrSeries(data[column])
+    ds = Quantity(data[column])
 
     ds = ds \
         .assign_attrs(attrs) \
@@ -357,10 +243,6 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
     return ds
 
 
-# Quantity = xr.DataArray
-Quantity = AttrSeries
-
-
 def concat(*args, **kwargs):
     if Quantity is AttrSeries:
         kwargs.pop('dim')

diff --git a/ixmp/testing.py b/ixmp/testing.py
@@ -27,7 +27,7 @@
 
 from .config import _config as ixmp_config
 from .core import Platform, Scenario, IAMC_IDX
-from .reporting.utils import Quantity, AttrSeries
+from .reporting.utils import AttrSeries, Quantity
 
 
 models = {