Skip to content

Commit

Permalink
Move AttrSeries from reporting.utils to (new) reporting.attrseries
Browse files Browse the repository at this point in the history
  • Loading branch information
khaeru committed Oct 3, 2019
1 parent b7d1e3f commit ddf476b
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 130 deletions.
6 changes: 5 additions & 1 deletion doc/source/reporting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ Computations
.. automodule:: ixmp.reporting.computations
:members:

Unless otherwise specified, these methods accept and return
:class:`Quantity <ixmp.reporting.utils.Quantity>` objects for data
arguments/return values.

Calculations:

.. autosummary::
Expand All @@ -154,7 +158,7 @@ Computations
Utilities
---------

.. autoclass:: ixmp.reporting.utils.AttrSeries
.. autoclass:: ixmp.reporting.attrseries.AttrSeries

.. automodule:: ixmp.reporting.utils
:members:
Expand Down
9 changes: 7 additions & 2 deletions ixmp/reporting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# The core design pattern uses dask graphs; see
# http://docs.dask.org/en/latest/spec.html
# - Reporter.graph is a dictionary where:
# - keys are strings or ixmp.reporting.util.Key objects (which compare/hash
# - keys are strings or ixmp.reporting.key.Key objects (which compare/hash
# equal to their str() representation), and
# - values are 'computations' (the Reporter.add() docstring repeats the
# definition of computations from the above URL).
Expand Down Expand Up @@ -37,7 +37,12 @@
import yaml

from .key import Key
from .utils import REPLACE_UNITS, keys_for_quantity, rename_dims, ureg
from .utils import (
REPLACE_UNITS,
keys_for_quantity,
rename_dims,
ureg,
)
from . import computations
from .describe import describe_recursive

Expand Down
124 changes: 124 additions & 0 deletions ixmp/reporting/attrseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from collections import OrderedDict
from collections.abc import Collection
from copy import deepcopy

import pandas as pd
from pandas.core.generic import NDFrame
import xarray as xr


class AttrSeries(pd.Series):
""":class:`pandas.Series` subclass imitating :class:`xarray.DataArray`.
Future versions of :mod:`ixmp.reporting` will use :class:`xarray.DataArray`
as :class:`Quantity`; however, because :mod:`xarray` currently lacks sparse
matrix support, ixmp quantities may be too large for available memory.
The AttrSeries class provides similar methods and behaviour to
:class:`xarray.DataArray`, such as an `attrs` dictionary for metadata, so
that :mod:`ixmp.reporting.computations` methods can use xarray-like syntax.
"""

# normal properties
_metadata = ('attrs', )

def __init__(self, *args, **kwargs):
if 'attrs' in kwargs:
# Use provided attrs
attrs = kwargs.pop('attrs')
elif hasattr(args[0], 'attrs'):
# Use attrs from an xarray object
attrs = args[0].attrs.copy()

# pre-convert an pd.Series to preserve names and labels
args = list(args)
args[0] = args[0].to_series()
else:
# default empty
attrs = OrderedDict()

super().__init__(*args, **kwargs)

self.attrs = attrs

def assign_attrs(self, d):
self.attrs.update(d)
return self

def assign_coords(self, **kwargs):
return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())

@property
def coords(self):
"""Read-only."""
return dict(zip(self.index.names, self.index.levels))

@property
def dims(self):
return tuple(self.index.names)

def sel(self, indexers=None, drop=False, **indexers_kwargs):
indexers = indexers or {}
indexers.update(indexers_kwargs)
if len(indexers) == 1:
level, key = list(indexers.items())[0]
if not isinstance(key, Collection) and not drop:
# When using .loc[] to select 1 label on 1 level, pandas drops
# the level. Use .xs() to avoid this behaviour unless drop=True
return AttrSeries(self.xs(key, level=level, drop_level=False))

idx = tuple(indexers.get(l, slice(None)) for l in self.index.names)
return AttrSeries(self.loc[idx])

def sum(self, *args, **kwargs):
try:
dim = kwargs.pop('dim')
if isinstance(self.index, pd.MultiIndex):
if len(dim) == len(self.index.names):
# assume dimensions = full multi index, do simple sum
obj = self
kwargs = {}
else:
# pivot and sum across columns
obj = self.unstack(dim)
kwargs['axis'] = 1
else:
if dim != [self.index.name]:
raise ValueError(dim, self.index.name, self)
obj = super()
kwargs['level'] = dim
except KeyError:
obj = super()
return AttrSeries(obj.sum(*args, **kwargs))

def squeeze(self, *args, **kwargs):
kwargs.pop('drop')
return super().squeeze(*args, **kwargs) if len(self) > 1 else self

def as_xarray(self):
return xr.DataArray.from_series(self)

def transpose(self, *dims):
return self.reorder_levels(dims)

def to_dataframe(self):
return self.to_frame()

def to_series(self):
return self

@property
def _constructor(self):
return AttrSeries

def __finalize__(self, other, method=None, **kwargs):
"""Propagate metadata from other to self.
This is identical to the version in pandas, except deepcopy() is added
so that the 'attrs' OrderedDict is not double-referenced.
"""
if isinstance(other, NDFrame):
for name in self._metadata:
object.__setattr__(self, name,
deepcopy(getattr(other, name, None)))
return self
134 changes: 8 additions & 126 deletions ixmp/reporting/utils.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
import collections
from collections.abc import Collection
from copy import deepcopy
from functools import partial, reduce
import logging
from operator import mul

import pandas as pd
from pandas.core.generic import NDFrame
import pint
import xarray as xr

from .attrseries import AttrSeries
from .key import Key


log = logging.getLogger(__name__)

ureg = pint.UnitRegistry()

# See also:
# - docstring of attrseries.AttrSeries.
# - test_report_size() for a test that shows how non-sparse xr.DataArray
# triggers MemoryError.
Quantity = AttrSeries
# Quantity = xr.DataArray

# Replacements to apply to quantity units before parsing by pint
REPLACE_UNITS = {
Expand Down Expand Up @@ -156,123 +159,6 @@ def invalid(unit):
return unit


class AttrSeries(pd.Series):
""":class:`pandas.Series` subclass imitating :class:`xarray.DataArray`.
Future versions of :mod:`ixmp.reporting` will use :class:`xarray.DataArray`
as :class:`Quantity`; however, because :mod:`xarray` currently lacks sparse
matrix support, ixmp quantities may be too large for memory.
The AttrSeries class provides similar methods and behaviour to
:class:`xarray.DataArray`, such as an `attrs` dictionary for metadata, so
that :mod:`ixmp.reporting.computations` methods can use xarray-like syntax.
"""

# normal properties
_metadata = ('attrs', )

def __init__(self, *args, **kwargs):
if 'attrs' in kwargs:
# Use provided attrs
attrs = kwargs.pop('attrs')
elif hasattr(args[0], 'attrs'):
# Use attrs from an xarray object
attrs = args[0].attrs.copy()

# pre-convert an pd.Series to preserve names and labels
args = list(args)
args[0] = args[0].to_series()
else:
# default empty
attrs = collections.OrderedDict()

super().__init__(*args, **kwargs)

self.attrs = attrs

def assign_attrs(self, d):
self.attrs.update(d)
return self

def assign_coords(self, **kwargs):
return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())

@property
def coords(self):
"""Read-only."""
return dict(zip(self.index.names, self.index.levels))

@property
def dims(self):
return tuple(self.index.names)

def sel(self, indexers=None, drop=False, **indexers_kwargs):
indexers = indexers or {}
indexers.update(indexers_kwargs)
if len(indexers) == 1:
level, key = list(indexers.items())[0]
if not isinstance(key, Collection) and not drop:
# When using .loc[] to select 1 label on 1 level, pandas drops
# the level. Use .xs() to avoid this behaviour unless drop=True
return AttrSeries(self.xs(key, level=level, drop_level=False))

idx = tuple(indexers.get(l, slice(None)) for l in self.index.names)
return AttrSeries(self.loc[idx])

def sum(self, *args, **kwargs):
try:
dim = kwargs.pop('dim')
if isinstance(self.index, pd.MultiIndex):
if len(dim) == len(self.index.names):
# assume dimensions = full multi index, do simple sum
obj = self
kwargs = {}
else:
# pivot and sum across columns
obj = self.unstack(dim)
kwargs['axis'] = 1
else:
if dim != [self.index.name]:
raise ValueError(dim, self.index.name, self)
obj = super()
kwargs['level'] = dim
except KeyError:
obj = super()
return AttrSeries(obj.sum(*args, **kwargs))

def squeeze(self, *args, **kwargs):
kwargs.pop('drop')
return super().squeeze(*args, **kwargs) if len(self) > 1 else self

def as_xarray(self):
return xr.DataArray.from_series(self)

def transpose(self, *dims):
return self.reorder_levels(dims)

def to_dataframe(self):
return self.to_frame()

def to_series(self):
return self

@property
def _constructor(self):
return AttrSeries

def __finalize__(self, other, method=None, **kwargs):
"""Propagate metadata from other to self.
This is identical to the version in pandas, except deepcopy() is added
so that the 'attrs' OrderedDict is not double-referenced.
"""
if isinstance(other, NDFrame):
for name in self._metadata:
object.__setattr__(self, name,
deepcopy(getattr(other, name, None)))
return self


def data_for_quantity(ix_type, name, column, scenario, filters=None):
"""Retrieve data from *scenario*.
Expand Down Expand Up @@ -342,7 +228,7 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
# Convert to a Dataset, assign attrbutes and name
# ds = xr.Dataset.from_dataframe(data)[column]
# or to a new "Attribute Series"
ds = AttrSeries(data[column])
ds = Quantity(data[column])

ds = ds \
.assign_attrs(attrs) \
Expand All @@ -357,10 +243,6 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
return ds


# Quantity = xr.DataArray
Quantity = AttrSeries


def concat(*args, **kwargs):
if Quantity is AttrSeries:
kwargs.pop('dim')
Expand Down
2 changes: 1 addition & 1 deletion ixmp/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from .config import _config as ixmp_config
from .core import Platform, Scenario, IAMC_IDX
from .reporting.utils import Quantity, AttrSeries
from .reporting.utils import AttrSeries, Quantity


models = {
Expand Down

0 comments on commit ddf476b

Please sign in to comment.