From dd6db1b03a349cf1dda24ed8d70dca7abdc5f738 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 21 Nov 2019 15:50:09 +0100
Subject: [PATCH 01/13] Remove caching from ixmp.core

---
 ixmp/core.py | 88 +++++++---------------------------------------------
 1 file changed, 11 insertions(+), 77 deletions(-)

diff --git a/ixmp/core.py b/ixmp/core.py
index 47ae057e6..1a2cc3bd1 100644
--- a/ixmp/core.py
+++ b/ixmp/core.py
@@ -11,7 +11,6 @@
 from .utils import (
     as_str_list,
     check_year,
-    filtered,
     logger,
     parse_url
 )
@@ -617,9 +616,9 @@ def __init__(self, mp, model, scenario, version=None, scheme=None,
             warn('Using `ixmp.Scenario` for MESSAGE-scheme scenarios is '
                  'deprecated, please use `message_ix.Scenario`')
 
-        # Initialize cache
-        self._cache = cache
-        self._pycache = {}
+    @property
+    def _cache(self):
+        return hasattr(self.platform._backend, '_cache')
 
     @classmethod
     def from_url(cls, url, errors='warn'):
@@ -678,37 +677,11 @@ def load_scenario_data(self):
         if not self._cache:
             raise ValueError('Cache must be enabled to load scenario data')
 
-        funcs = {
-            'set': (self.set_list, self.set),
-            'par': (self.par_list, self.par),
-            'var': (self.var_list, self.var),
-            'equ': (self.equ_list, self.equ),
-        }
-        for ix_type, (list_func, get_func) in funcs.items():
+        for ix_type in 'equ', 'par', 'set', 'var':
             logger().info('Caching {} data'.format(ix_type))
-            for item in list_func():
-                get_func(item)
-
-    def _element(self, ix_type, name, filters=None, cache=None):
-        """Return a pd.DataFrame of item elements."""
-        cache_key = (ix_type, name)
-
-        # if dataframe in python cache, retrieve from there
-        if cache_key in self._pycache:
-            return filtered(self._pycache[cache_key], filters)
-
-        # if no cache, retrieve from Backend with filters
-        if filters is not None and not self._cache:
-            return self._backend('item_get_elements', ix_type, name, filters)
-
-        # otherwise, retrieve from Java and keep in python cache
-        df = self._backend('item_get_elements', ix_type, name, None)
-
-        # save if using memcache
-        if self._cache:
-            self._pycache[cache_key] = df
-
-        return filtered(df, filters)
+            get_func = getattr(self, ix_type)
+            for name in getattr(self, '{}_list'.format(ix_type))():
+                get_func(name)
 
     def idx_sets(self, name):
         """Return the list of index sets for an item (set, par, var, equ)
@@ -799,7 +772,7 @@ def set(self, name, filters=None, **kwargs):
         -------
         pandas.DataFrame
         """
-        return self._element('set', name, filters, **kwargs)
+        return self._backend('item_get_elements', 'set', name, filters)
 
     def add_set(self, name, key, comment=None):
         """Add elements to an existing set.
@@ -825,7 +798,6 @@ def add_set(self, name, key, comment=None):
         """
         # TODO expand docstring (here or in doc/source/api.rst) with examples,
         #      per test_core.test_add_set.
-        self.clear_cache(name=name, ix_type='set')
 
         # Get index names for set *name*, may raise KeyError
         idx_names = self.idx_names(name)
@@ -915,8 +887,6 @@ def remove_set(self, name, key=None):
         key : dataframe or key list or concatenated string
             elements to be removed
         """
-        self.clear_cache(name=name, ix_type='set')
-
         if key is None:
             self._backend('delete_item', 'set', name)
         else:
@@ -955,7 +925,7 @@ def par(self, name, filters=None, **kwargs):
         filters : dict
             index names mapped list of index set elements
         """
-        return self._element('par', name, filters, **kwargs)
+        return self._backend('item_get_elements', 'par', name, filters)
 
     def add_par(self, name, key_or_data=None, value=None, unit=None,
                 comment=None, key=None, val=None):
@@ -1060,9 +1030,6 @@ def add_par(self, name, key_or_data=None, value=None, unit=None,
         # Store
         self._backend('item_set_elements', 'par', name, elements)
 
-        # Clear cache
-        self.clear_cache(name=name, ix_type='par')
-
     def init_scalar(self, name, val, unit, comment=None):
         """Initialize a new scalar.
 
@@ -1108,7 +1075,6 @@ def change_scalar(self, name, val, unit, comment=None):
         comment : str, optional
             Description of the change.
         """
-        self.clear_cache(name=name, ix_type='par')
         self._backend('item_set_elements', 'par', name,
                       [(None, float(val), unit, comment)])
 
@@ -1122,8 +1088,6 @@ def remove_par(self, name, key=None):
         key : dataframe or key list or concatenated string, optional
             elements to be removed
         """
-        self.clear_cache(name=name, ix_type='par')
-
         if key is None:
             self._backend('delete_item', 'par', name)
         else:
@@ -1162,7 +1126,7 @@ def var(self, name, filters=None, **kwargs):
         filters : dict
             index names mapped list of index set elements
         """
-        return self._element('var', name, filters, **kwargs)
+        return self._backend('item_get_elements', 'var', name, filters)
 
     def equ_list(self):
         """List all defined equations."""
@@ -1196,7 +1160,7 @@ def equ(self, name, filters=None, **kwargs):
         filters : dict
             index names mapped list of index set elements
         """
-        return self._element('equ', name, filters, **kwargs)
+        return self._backend('item_get_elements', 'equ', name, filters)
 
     def clone(self, model=None, scenario=None, annotation=None,
               keep_solution=True, shift_first_model_year=None, platform=None,
@@ -1295,7 +1259,6 @@ def remove_solution(self, first_model_year=None):
             If Scenario has no solution or if `first_model_year` is not `int`.
         """
         if self.has_solution():
-            self.clear_cache()  # reset Python data cache
             check_year(first_model_year, 'first_model_year')
             self._backend('clear_solution', first_model_year)
         else:
@@ -1390,35 +1353,6 @@ def callback(scenario, **kwargs):
                 # Callback indicates convergence is reached
                 break
 
-    def clear_cache(self, name=None, ix_type=None):
-        """clear the Python cache of item elements
-
-        Parameters
-        ----------
-        name : str, optional
-            item name (`None` clears entire Python cache)
-        ix_type : str, optional
-            type of item (if provided, cache clearing is faster)
-        """
-        # if no name is given, clean the entire cache
-        if name is None:
-            self._pycache = {}
-            return  # exit early
-
-        # remove this element from the cache if it exists
-        key = None
-        keys = self._pycache.keys()
-        if ix_type is not None:
-            key = (ix_type, name) if (ix_type, name) in keys else None
-        else:  # look for it
-            hits = [k for k in keys if k[1] == name]  # 0 is ix_type, 1 is name
-            if len(hits) > 1:
-                raise ValueError('Multiple values named {}'.format(name))
-            if len(hits) == 1:
-                key = hits[0]
-        if key is not None:
-            self._pycache.pop(key)
-
     def get_meta(self, name=None):
         """get scenario metadata
 

From 1f8e9e74ce3a8b62f75239f3ed5105903fa7c674 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 21 Nov 2019 16:09:24 +0100
Subject: [PATCH 02/13] Add backend.base.CachingBackend

---
 ixmp/backend/base.py | 68 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/ixmp/backend/base.py b/ixmp/backend/base.py
index a2af88331..30fd5a0b7 100644
--- a/ixmp/backend/base.py
+++ b/ixmp/backend/base.py
@@ -1,4 +1,6 @@
 from abc import ABC, abstractmethod
+from copy import copy
+import json
 
 from ixmp.core import TimeSeries, Scenario
 
@@ -759,3 +761,69 @@ def cat_set_elements(self, ms: Scenario, name, cat, keys, is_unique):
         -------
         None
         """
+
+
+class CachingBackend(Backend):
+    """Backend with additional features for caching values."""
+    _cache = {}
+    _cache_hit = {}
+
+    def __init__(self):
+        """Initialize the cache."""
+        self._cache = {}
+        self._cache_hit = {}
+
+    @classmethod
+    def _cache_key(self, ts, ix_type, name, filters=None):
+        """Return a hashable cache key.
+
+        Parameters
+        ----------
+        ts : .TimeSeries
+        ix_type : str
+        name : str
+        filters : dict
+        """
+        ts = id(ts)
+        if filters is None or len(filters) == 0:
+            return (ts, ix_type, name)
+        else:
+            # Convert filters into a hashable object
+            filters = hash(json.dumps(sorted(filters.items())))
+            return (ts, ix_type, name, filters)
+
+    def cache_get(self, ts, ix_type, name, filters):
+        """Retrieve value from cache."""
+        key = self._cache_key(ts, ix_type, name, filters)
+
+        if key in self._cache:
+            self._cache_hit[key] = self._cache_hit.setdefault(key, 0) + 1
+            return copy(self._cache[key])
+        else:
+            raise KeyError(ts, ix_type, name, filters)
+
+    def cache(self, ts, ix_type, name, filters, value):
+        """Store value in cache."""
+        key = self._cache_key(ts, ix_type, name, filters)
+
+        refreshed = key in self._cache
+        self._cache[key] = value
+
+        return refreshed
+
+    def cache_invalidate(self, ts, ix_type=None, name=None, filters=None):
+        """Invalidate all cached values for *ix_type* and *name*.
+
+        If *filters* is :obj:`None` (the default), all filtered values are
+        also invalidated. If all argument are none, all
+        """
+        key = self._cache_key(ts, ix_type, name, filters)
+
+        if filters is None:
+            i = slice(1) if (ix_type is name is None) else slice(3)
+            to_remove = filter(lambda k: k[i] == key[i], self._cache.keys())
+        else:
+            to_remove = [key]
+
+        for key in list(to_remove):
+            self._cache.pop(key)

From ffd846275a36e6eedd2e6aab1986eebe5bcd7bb2 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 21 Nov 2019 16:10:03 +0100
Subject: [PATCH 03/13] Base JDBCBackend on CachingBackend

---
 ixmp/backend/jdbc.py | 56 +++++++++++++++++++++++++++++++++++---------
 ixmp/model/gams.py   |  3 ---
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py
index 355cd6cba..7e077bfc1 100644
--- a/ixmp/backend/jdbc.py
+++ b/ixmp/backend/jdbc.py
@@ -17,9 +17,9 @@
 
 from ixmp import config
 from ixmp.core import Scenario
-from ixmp.utils import islistable
+from ixmp.utils import filtered, islistable
 from . import FIELDS
-from .base import Backend
+from .base import CachingBackend
 
 
 log = logging.getLogger(__name__)
@@ -100,7 +100,7 @@ def _temp_dbprops(driver=None, path=None, url=None, user=None, password=None):
     return str(file), full_url
 
 
-class JDBCBackend(Backend):
+class JDBCBackend(CachingBackend):
     """Backend using JPype/JDBC to connect to Oracle and HyperSQLDB instances.
 
     Parameters
@@ -205,6 +205,9 @@ def __init__(self, jvmargs=None, **kwargs):
             else:
                 raise RuntimeError('unhandled Java exception:' + info) from e
 
+        # Invoke the parent constructor to initialize the cache
+        super().__init__()
+
     def __del__(self):
         try:
             Path(self._properties_file).unlink()
@@ -450,7 +453,7 @@ def clone(self, s, platform_dest, model, scenario, annotation,
 
         # Instantiate same class as the original object
         return s.__class__(platform_dest, model, scenario,
-                           version=jclone.getVersion(), cache=s._cache)
+                           version=jclone.getVersion())
 
     def has_solution(self, s):
         return self.jindex[s].hasSolution()
@@ -483,12 +486,30 @@ def init_item(self, s, type, name, idx_sets, idx_names):
 
     def delete_item(self, s, type, name):
         getattr(self.jindex[s], f'remove{type.title()}')()
+        self.cache_invalidate(s, type, name)
 
     def item_index(self, s, name, sets_or_names):
         jitem = self._get_item(s, 'item', name, load=False)
         return list(getattr(jitem, f'getIdx{sets_or_names.title()}')())
 
     def item_get_elements(self, s, type, name, filters=None):
+        try:
+            # Retrieve the cached value with this exact set of filters
+            return self.cache_get(s, type, name, filters)
+        except KeyError:
+            pass  # Cache miss
+
+        try:
+            # Retrieve a cached, unfiltered value of the same item
+            unfiltered = self.cache_get(s, type, name, None)
+        except KeyError:
+            pass  # Cache miss
+        else:
+            # Success; filter and return
+            return filtered(unfiltered, filters)
+
+        # Failed to load item from cache
+
         # Retrieve the item
         item = self._get_item(s, type, name, load=True)
 
@@ -525,19 +546,24 @@ def item_get_elements(self, s, type, name, filters=None):
                 data['lvl'] = item.getLevels(jList)
                 data['mrg'] = item.getMarginals(jList)
 
-            return pd.DataFrame.from_dict(data, orient='columns') \
-                               .astype(types)
+            result = pd.DataFrame.from_dict(data, orient='columns') \
+                                 .astype(types)
         elif type == 'set':
             # Index sets
-            return pd.Series(item.getCol(0, jList))
+            result = pd.Series(item.getCol(0, jList))
         elif type == 'par':
             # Scalar parameters
-            return dict(value=item.getScalarValue().floatValue(),
-                        unit=str(item.getScalarUnit()))
+            result = dict(value=item.getScalarValue().floatValue(),
+                          unit=str(item.getScalarUnit()))
         elif type in ('equ', 'var'):
             # Scalar equations and variables
-            return dict(lvl=item.getScalarLevel().floatValue(),
-                        mrg=item.getScalarMarginal().floatValue())
+            result = dict(lvl=item.getScalarLevel().floatValue(),
+                          mrg=item.getScalarMarginal().floatValue())
+
+        # Store cache
+        self.cache(s, type, name, filters, result)
+
+        return result
 
     def item_set_elements(self, s, type, name, elements):
         jobj = self._get_item(s, type, name)
@@ -566,11 +592,15 @@ def item_set_elements(self, s, type, name, elements):
             else:  # pragma: no cover
                 raise RuntimeError('unhandled Java exception') from e
 
+        self.cache_invalidate(s, type, name)
+
     def item_delete_elements(self, s, type, name, keys):
         jitem = self._get_item(s, type, name, load=False)
         for key in keys:
             jitem.removeElement(to_jlist2(key))
 
+        self.cache_invalidate(s, type, name)
+
     def get_meta(self, s):
         def unwrap(v):
             """Unwrap metadata numeric value (BigDecimal -> Double)"""
@@ -593,6 +623,8 @@ def clear_solution(self, s, from_year=None):
         else:
             self.jindex[s].removeSolution()
 
+        self.cache_invalidate(s)
+
     # MsgScenario methods
 
     def cat_list(self, ms, name):
@@ -630,6 +662,8 @@ def read_gdx(self, s, path, check_solution, comment, equ_list, var_list):
             str(path.parent), path.name, comment, to_jlist2(var_list),
             to_jlist2(equ_list), check_solution)
 
+        self.cache_invalidate(s)
+
     def _get_item(self, s, ix_type, name, load=True):
         """Return the Java object for item *name* of *ix_type*.
 
diff --git a/ixmp/model/gams.py b/ixmp/model/gams.py
index 436433f17..b3f1824df 100644
--- a/ixmp/model/gams.py
+++ b/ixmp/model/gams.py
@@ -132,9 +132,6 @@ def format(key):
         # Invoke GAMS
         check_call(command, shell=os.name == 'nt', cwd=model_file.parent)
 
-        # Reset Python data cache
-        scenario.clear_cache()
-
         # Read model solution
         scenario._backend('read_gdx', self.out_file,
                           self.check_solution,

From e51645b4ec95b137751c35144021fce5a4226fad Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 21 Nov 2019 16:10:22 +0100
Subject: [PATCH 04/13] Adjust reporting to avoid Scenario._element

---
 ixmp/reporting/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ixmp/reporting/utils.py b/ixmp/reporting/utils.py
index dc3b8d36a..dfe7c61f7 100644
--- a/ixmp/reporting/utils.py
+++ b/ixmp/reporting/utils.py
@@ -224,7 +224,7 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
         filters = filters_to_use
 
     # Retrieve quantity data
-    data = scenario._element(ix_type, name, filters)
+    data = getattr(scenario, ix_type)(name, filters)
 
     # ixmp/GAMS scalar is not returned as pd.DataFrame
     if isinstance(data, dict):

From 8be1669356b89fbd6e21008e8430314f8ac592a0 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 21 Nov 2019 16:10:33 +0100
Subject: [PATCH 05/13] Adjust tests

---
 tests/test_core.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 154b1b49f..119367514 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -307,25 +307,32 @@ def test_meta(test_mp):
 
 
 def test_load_scenario_data(test_mp):
-    scen = ixmp.Scenario(test_mp, *can_args, cache=True)
+    """load_scenario_data() caches all data."""
+    scen = ixmp.Scenario(test_mp, *can_args)
     scen.load_scenario_data()
-    assert ('par', 'd') in scen._pycache  # key exists
-    df = scen.par('d', filters={'i': ['seattle']})
-    obs = df.loc[0, 'unit']
-    exp = 'km'
-    assert obs == exp
+
+    cache_key = scen.platform._backend._cache_key(scen, 'par', 'd')
+
+    # Item exists in cache
+    assert cache_key in scen.platform._backend._cache
+
+    # Cache has not been used
+    hits_before = scen.platform._backend._cache_hit.get(cache_key, 0)
+    assert hits_before == 0
+
+    # Retrieving the expected value
+    assert 'km' == scen.par('d', filters={'i': ['seattle']}).loc[0, 'unit']
+
+    # Cache was used to return the value
+    hits_after = scen.platform._backend._cache_hit[cache_key]
+    assert hits_after == hits_before + 1
 
 
 def test_load_scenario_data_clear_cache(test_mp):
     # this fails on commit: 4376f54
     scen = ixmp.Scenario(test_mp, *can_args, cache=True)
     scen.load_scenario_data()
-    scen.clear_cache(name='d')
-
-
-def test_load_scenario_data_raises(test_mp):
-    scen = ixmp.Scenario(test_mp, *can_args, cache=False)
-    pytest.raises(ValueError, scen.load_scenario_data)
+    scen.platform._backend.cache_invalidate(scen, 'par', 'd')
 
 
 def test_log_level(test_mp):

From 19d07caca621e9abf536fb3c09c7e5d400c9d033 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 21 Nov 2019 20:48:45 +0100
Subject: [PATCH 06/13] Preallocate DataFrame in JDBCBackend.item_get_elements

---
 ixmp/backend/jdbc.py    | 51 +++++++++++++++++++++++++----------------
 ixmp/reporting/utils.py | 20 ++++++++--------
 tests/test_reporting.py |  7 +++---
 3 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py
index 7e077bfc1..e68dfe113 100644
--- a/ixmp/backend/jdbc.py
+++ b/ixmp/backend/jdbc.py
@@ -14,6 +14,7 @@
 from jpype import JClass
 import numpy as np
 import pandas as pd
+from pandas.api.types import CategoricalDtype
 
 from ixmp import config
 from ixmp.core import Scenario
@@ -524,30 +525,40 @@ def item_get_elements(self, s, type, name, filters=None):
 
         if item.getDim() > 0:
             # Mapping set or multi-dimensional equation, parameter, or variable
-            idx_names = list(item.getIdxNames())
+            columns = list(item.getIdxNames())
             idx_sets = list(item.getIdxSets())
 
-            data = {}
-            types = {}
+            # Prepare dtypes for index columns
+            dtypes = {}
+            for idx_name, idx_set in zip(columns, idx_sets):
+                if idx_set == 'year':
+                    dtypes[idx_name] = int
+                else:
+                    dtypes[idx_name] = CategoricalDtype(
+                        self.item_get_elements(s, 'set', idx_set))
 
-            # Retrieve index columns
-            for d, (d_name, d_set) in enumerate(zip(idx_names, idx_sets)):
-                data[d_name] = item.getCol(d, jList)
-                if d_set == 'year':
-                    # Record column for later type conversion
-                    types[d_name] = int
-
-            # Retrieve value columns
+            # Prepare dtypes for additional columns
             if type == 'par':
-                data['value'] = item.getValues(jList)
-                data['unit'] = item.getUnits(jList)
-
-            if type in ('equ', 'var'):
-                data['lvl'] = item.getLevels(jList)
-                data['mrg'] = item.getMarginals(jList)
-
-            result = pd.DataFrame.from_dict(data, orient='columns') \
-                                 .astype(types)
+                columns.extend(['value', 'unit'])
+                dtypes['value'] = float
+                dtypes['unit'] = CategoricalDtype(self.jobj.getUnitList())
+            elif type in ('equ', 'var'):
+                columns.extend(['lvl', 'mrg'])
+                dtypes.update({'lvl': float, 'mrg': float})
+
+            # Prepare empty DataFrame
+            result = pd.DataFrame(index=pd.RangeIndex(len(jList)),
+                                  columns=columns) \
+                       .astype(dtypes)
+
+            for i in range(len(idx_sets)):
+                result.iloc[:, i] = item.getCol(i, jList)
+            if type == 'par':
+                result.loc[:, 'value'] = item.getValues(jList)
+                result.loc[:, 'unit'] = item.getUnits(jList)
+            elif type in ('equ', 'var'):
+                result.loc[:, 'lvl'] = item.getLevels(jList)
+                result.loc[:, 'mrg'] = item.getMarginals(jList)
         elif type == 'set':
             # Index sets
             result = pd.Series(item.getCol(0, jList))
diff --git a/ixmp/reporting/utils.py b/ixmp/reporting/utils.py
index dfe7c61f7..244cf31d1 100644
--- a/ixmp/reporting/utils.py
+++ b/ixmp/reporting/utils.py
@@ -208,9 +208,10 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
     log.debug('Retrieving data for {}'.format(name))
 
     # Only use the relevant filters
+    idx_names = scenario.idx_names(name)
     if filters:
         # Dimensions of the object
-        dims = dims_for_qty(scenario.idx_names(name))
+        dims = dims_for_qty(idx_names)
 
         # Mapping from renamed dimensions to Scenario dimension names
         MAP = get_reversed_rename_dims()
@@ -237,6 +238,9 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
         log.warning(f'0 values for {ix_type} {name!r} using filters:'
                     f'\n  {filters!r}\n  Subsequent computations may fail.')
 
+    # Convert categorical dtype to str
+    data = data.astype({col: str for col in idx_names})
+
     # List of the dimensions
     dims = dims_for_qty(data)
 
@@ -258,8 +262,8 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
     # Set index if 1 or more dimensions
     if len(dims):
         # First rename, then set index
-        data.rename(columns=RENAME_DIMS, inplace=True)
-        data.set_index(dims, inplace=True)
+        data = data.rename(columns=RENAME_DIMS) \
+                   .set_index(dims)
 
     # Check sparseness
     # try:
@@ -272,20 +276,18 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None):
     # info = (name, shape, filled, size, need_to_chunk)
     # log.debug(' '.join(map(str, info)))
 
-    # Convert to a Dataset, assign attrbutes and name
-    # ds = xr.Dataset.from_dataframe(data)[column]
-    # or to a new "Attribute Series"
-    ds = as_quantity(data[column]) \
+    # Convert to a Quantity, assign attrbutes and name
+    qty = as_quantity(data[column]) \
         .assign_attrs(attrs) \
         .rename(name + ('-margin' if column == 'mrg' else ''))
 
     try:
         # Remove length-1 dimensions for scalars
-        ds = ds.squeeze('index', drop=True)
+        qty = qty.squeeze('index', drop=True)
     except KeyError:
         pass
 
-    return ds
+    return qty
 
 
 def as_attrseries(obj):
diff --git a/tests/test_reporting.py b/tests/test_reporting.py
index b8222d7da..c1dae8916 100644
--- a/tests/test_reporting.py
+++ b/tests/test_reporting.py
@@ -537,9 +537,10 @@ def test_report_size(test_mp):
     # test_mp.add_unit('kg')
     scen = ixmp.Scenario(test_mp, 'size test', 'base', version='new')
 
-    # Dimensions and their lengths
-    dims = 'abcdef'
-    sizes = [1, 5, 21, 21, 89, 377]  # Fibonacci #s; next 1597, 6765
+    # Dimensions and their lengths (Fibonacci numbers)
+    N_dims = 6
+    dims = 'abcdefgh'[:N_dims + 1]
+    sizes = [1, 5, 21, 21, 89, 377, 1597, 6765][:N_dims + 1]
 
     # commented: "377 / 73984365 elements = 0.00051% full"
     # from functools import reduce

From feae17b04eee9975dcca2ed79cb11c37e3e6b780 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 21 Nov 2019 21:56:40 +0100
Subject: [PATCH 07/13] Use JPype slice code path

---
 ixmp/backend/jdbc.py    | 12 +++++++-----
 tests/test_reporting.py |  1 -
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py
index e68dfe113..c071635e7 100644
--- a/ixmp/backend/jdbc.py
+++ b/ixmp/backend/jdbc.py
@@ -551,14 +551,16 @@ def item_get_elements(self, s, type, name, filters=None):
                                   columns=columns) \
                        .astype(dtypes)
 
+            # Copy vectors from Java into DataFrame columns
+            # NB [:] causes JPype to use a faster code path
             for i in range(len(idx_sets)):
-                result.iloc[:, i] = item.getCol(i, jList)
+                result.iloc[:, i] = item.getCol(i, jList)[:]
             if type == 'par':
-                result.loc[:, 'value'] = item.getValues(jList)
-                result.loc[:, 'unit'] = item.getUnits(jList)
+                result.loc[:, 'value'] = item.getValues(jList)[:]
+                result.loc[:, 'unit'] = item.getUnits(jList)[:]
             elif type in ('equ', 'var'):
-                result.loc[:, 'lvl'] = item.getLevels(jList)
-                result.loc[:, 'mrg'] = item.getMarginals(jList)
+                result.loc[:, 'lvl'] = item.getLevels(jList)[:]
+                result.loc[:, 'mrg'] = item.getMarginals(jList)[:]
         elif type == 'set':
             # Index sets
             result = pd.Series(item.getCol(0, jList))
diff --git a/tests/test_reporting.py b/tests/test_reporting.py
index c1dae8916..b8177573e 100644
--- a/tests/test_reporting.py
+++ b/tests/test_reporting.py
@@ -1,7 +1,6 @@
 """Tests for ixmp.reporting."""
 import os
 
-from click.testing import CliRunner
 import ixmp
 import numpy as np
 import pandas as pd

From 4b4c872237fefb38b8e2d0e0a78247f8cd46fa27 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 22 Nov 2019 10:02:01 +0100
Subject: [PATCH 08/13] Use as_str_list in filtered()

---
 ixmp/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ixmp/utils.py b/ixmp/utils.py
index 73e741e20..c750e218e 100644
--- a/ixmp/utils.py
+++ b/ixmp/utils.py
@@ -164,7 +164,7 @@ def filtered(df, filters):
 
     mask = pd.Series(True, index=df.index)
     for k, v in filters.items():
-        isin = df[k].isin(v)
+        isin = df[k].isin(as_str_list(v))
         mask = mask & isin
     return df[mask]
 

From 7ea0eb97672e8df6bd8ae8c71643716f2fd0bf9f Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 22 Nov 2019 10:30:01 +0100
Subject: [PATCH 09/13] Handle 'year' int dtype for message_ix after data load

---
 ixmp/backend/jdbc.py |  7 ++-----
 ixmp/core.py         | 11 ++++++++++-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py
index c071635e7..893842aff 100644
--- a/ixmp/backend/jdbc.py
+++ b/ixmp/backend/jdbc.py
@@ -531,11 +531,8 @@ def item_get_elements(self, s, type, name, filters=None):
             # Prepare dtypes for index columns
             dtypes = {}
             for idx_name, idx_set in zip(columns, idx_sets):
-                if idx_set == 'year':
-                    dtypes[idx_name] = int
-                else:
-                    dtypes[idx_name] = CategoricalDtype(
-                        self.item_get_elements(s, 'set', idx_set))
+                dtypes[idx_name] = CategoricalDtype(
+                    self.item_get_elements(s, 'set', idx_set))
 
             # Prepare dtypes for additional columns
             if type == 'par':
diff --git a/ixmp/core.py b/ixmp/core.py
index 1a2cc3bd1..0fa508970 100644
--- a/ixmp/core.py
+++ b/ixmp/core.py
@@ -925,7 +925,16 @@ def par(self, name, filters=None, **kwargs):
         filters : dict
             index names mapped list of index set elements
         """
-        return self._backend('item_get_elements', 'par', name, filters)
+        result = self._backend('item_get_elements', 'par', name, filters)
+
+        # FIXME message_ix requires 'year' columns to be returned as integers
+        #       This code should be in a message_ix override of this method.
+        dtypes = {}
+        for idx_set, col_name in zip(self.idx_sets(), self.idx_names()):
+            if idx_set == 'year':
+                dtypes[col_name] = int
+
+        return result.astype(dtypes)
 
     def add_par(self, name, key_or_data=None, value=None, unit=None,
                 comment=None, key=None, val=None):

From c6ae2f74f8d8e1936203abdd79f1d403bb9477bc Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 22 Nov 2019 10:37:32 +0100
Subject: [PATCH 10/13] Handle scalars in previous

---
 ixmp/core.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ixmp/core.py b/ixmp/core.py
index 0fa508970..e68d22d7c 100644
--- a/ixmp/core.py
+++ b/ixmp/core.py
@@ -930,11 +930,12 @@ def par(self, name, filters=None, **kwargs):
         # FIXME message_ix requires 'year' columns to be returned as integers
         #       This code should be in a message_ix override of this method.
         dtypes = {}
-        for idx_set, col_name in zip(self.idx_sets(), self.idx_names()):
+        for idx_set, col_name in zip(self.idx_sets(name),
+                                     self.idx_names(name)):
             if idx_set == 'year':
                 dtypes[col_name] = int
 
-        return result.astype(dtypes)
+        return result.astype(dtypes) if len(dtypes) else result
 
     def add_par(self, name, key_or_data=None, value=None, unit=None,
                 comment=None, key=None, val=None):

From b4408652c3bdbf0b15cb8ffb6fa796eeaef15257 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 22 Nov 2019 11:18:55 +0100
Subject: [PATCH 11/13] Document CachingBackend

---
 doc/source/api-backend.rst | 13 ++++++++++
 doc/source/api-python.rst  |  1 -
 doc/source/reporting.rst   | 26 +++++++++----------
 ixmp/backend/base.py       | 53 +++++++++++++++++++++++++++++++++-----
 4 files changed, 71 insertions(+), 22 deletions(-)

diff --git a/doc/source/api-backend.rst b/doc/source/api-backend.rst
index 43840882f..1b4bdba61 100644
--- a/doc/source/api-backend.rst
+++ b/doc/source/api-backend.rst
@@ -51,6 +51,10 @@ Backend API
   - :class:`Platform <ixmp.Platform>` code is not affected by where and how data is stored; it merely handles user arguments and then makes, usually, a single :class:`Backend` call.
   - :class:`Backend` code does not need to perform argument checking; merely store and retrieve data reliably.
 
+- Additional Backends may inherit from :class:`Backend` or
+  :class:`CachingBackend`.
+
+
 .. autodata:: ixmp.backend.FIELDS
 
 .. currentmodule:: ixmp.backend.base
@@ -143,3 +147,12 @@ Backend API
       cat_get_elements
       cat_list
       cat_set_elements
+
+
+.. autoclass:: ixmp.backend.base.CachingBackend
+   :members:
+   :private-members:
+
+   CachingBackend stores cache values for multiple :class:`.TimeSeries`/:class:`Scenario` objects, and for multiple values of a *filters* argument.
+
+   Subclasses **must** call :meth:`cache`, :meth:`cache_get`, and :meth:`cache_invalidate` as appropriate to manage the cache; CachingBackend does not enforce any such logic.
diff --git a/doc/source/api-python.rst b/doc/source/api-python.rst
index 1e038a2fe..f6981b6af 100644
--- a/doc/source/api-python.rst
+++ b/doc/source/api-python.rst
@@ -124,7 +124,6 @@ Scenario
       add_par
       add_set
       change_scalar
-      clear_cache
       clone
       equ
       equ_list
diff --git a/doc/source/reporting.rst b/doc/source/reporting.rst
index 494d13760..52068f3e6 100644
--- a/doc/source/reporting.rst
+++ b/doc/source/reporting.rst
@@ -136,24 +136,22 @@ Others:
      >>> k1.drop('a', 'c') == k2.drop('a') == 'foo:b'
      True
 
-   Notes
-   -----
-   A Key has the same hash, and compares equal to its ``str()``. ``repr(key)``
-   prints the Key in angle brackets ('<>') to signify it is a Key object.
+   Some notes:
 
-   >>> repr(k1)
-   <foo:a-b-c>
+   - A Key has the same hash, and compares equal to its ``str()``.
+     ``repr(key)`` prints the Key in angle brackets ('<>') to signify it is a Key object.
 
-   Keys are *immutable*: the properties :attr:`name`, :attr:`dims`, and
-   :attr:`tag` are read-only, and the methods :meth:`append`, :meth:`drop`, and
-   :meth:`add_tag` return *new* Key objects.
+     >>> repr(k1)
+     <foo:a-b-c>
 
-   Keys may be generated concisely by defining a convenience method:
+   - Keys are *immutable*: the properties :attr:`name`, :attr:`dims`, and :attr:`tag` are read-only, and the methods :meth:`append`, :meth:`drop`, and :meth:`add_tag` return *new* Key objects.
 
-   >>> def foo(dims):
-   >>>     return Key('foo', dims.split())
-   >>> foo('a b c')
-   foo:a-b-c
+   - Keys may be generated concisely by defining a convenience method:
+
+     >>> def foo(dims):
+     >>>     return Key('foo', dims.split())
+     >>> foo('a b c')
+     foo:a-b-c
 
 
 Computations
diff --git a/ixmp/backend/base.py b/ixmp/backend/base.py
index 30fd5a0b7..f8747812a 100644
--- a/ixmp/backend/base.py
+++ b/ixmp/backend/base.py
@@ -764,12 +764,20 @@ def cat_set_elements(self, ms: Scenario, name, cat, keys, is_unique):
 
 
 class CachingBackend(Backend):
-    """Backend with additional features for caching values."""
+    """Backend with additional features for caching data."""
+
+    #: Cache of values. Keys are given by :meth:`_cache_key`; values depend on
+    #: the subclass' usage of the cache.
     _cache = {}
+
+    #: Count of number of times a value was retrieved from cache successfully
+    #: using :meth:`cache_get`.
     _cache_hit = {}
 
     def __init__(self):
-        """Initialize the cache."""
+        super().__init__()
+
+        # Empty the cache
         self._cache = {}
         self._cache_hit = {}
 
@@ -777,12 +785,21 @@ def __init__(self):
     def _cache_key(self, ts, ix_type, name, filters=None):
         """Return a hashable cache key.
 
+        ixmp *filters* (a :class:`dict` of :class:`list`) are converted to a
+        unique id that is hashable.
+
         Parameters
         ----------
         ts : .TimeSeries
         ix_type : str
         name : str
         filters : dict
+
+        Returns
+        -------
+        tuple
+            A hashable key with 4 elements for *ts*, *ix_type*, *name*, and
+            *filters*.
         """
         ts = id(ts)
         if filters is None or len(filters) == 0:
@@ -793,7 +810,17 @@ def _cache_key(self, ts, ix_type, name, filters=None):
             return (ts, ix_type, name, filters)
 
     def cache_get(self, ts, ix_type, name, filters):
-        """Retrieve value from cache."""
+        """Retrieve value from cache.
+
+        The value in :attr:`_cache` is copied to avoid cached values being
+        modified by user code. :attr:`_cache_hit` is incremented.
+
+        Raises
+        ------
+        KeyError
+            If the key for *ts*, *ix_type*, *name* and *filters* is not in the
+            cache.
+        """
         key = self._cache_key(ts, ix_type, name, filters)
 
         if key in self._cache:
@@ -803,7 +830,14 @@ def cache_get(self, ts, ix_type, name, filters):
             raise KeyError(ts, ix_type, name, filters)
 
     def cache(self, ts, ix_type, name, filters, value):
-        """Store value in cache."""
+        """Store *value* in cache.
+
+        Returns
+        -------
+        bool
+            :obj:`True` if the key was already in the cache and its value was
+            overwritten.
+        """
         key = self._cache_key(ts, ix_type, name, filters)
 
         refreshed = key in self._cache
@@ -812,10 +846,15 @@ def cache(self, ts, ix_type, name, filters, value):
         return refreshed
 
     def cache_invalidate(self, ts, ix_type=None, name=None, filters=None):
-        """Invalidate all cached values for *ix_type* and *name*.
+        """Invalidate cached values.
+
+        With all arguments given, single key/value is removed from the cache.
+        Otherwise, multiple keys/values are removed:
 
-        If *filters* is :obj:`None` (the default), all filtered values are
-        also invalidated. If all argument are none, all
+        - *ts* only: all cached values associated with the :class:`.TimeSeries`
+          or :class:`.Scenario` object.
+        - *ts*, *ix_type*, and *name*: all cached values associated with the
+          ixmp item, whether filtered or unfiltered.
         """
         key = self._cache_key(ts, ix_type, name, filters)
 

From ff73799fe9cc659633b897dfb9e5a4eaa4318509 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 22 Nov 2019 11:28:48 +0100
Subject: [PATCH 12/13] Update JDBCBackend docs

---
 doc/source/api-backend.rst | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/doc/source/api-backend.rst b/doc/source/api-backend.rst
index 1b4bdba61..41434f301 100644
--- a/doc/source/api-backend.rst
+++ b/doc/source/api-backend.rst
@@ -22,13 +22,8 @@ Provided backends
 
    JDBCBackend supports:
 
-   - ``dbtype='HSQLDB'``: HyperSQL databases in local files.
-   - Remote databases. This is accomplished by creating a :class:`ixmp.Platform` with the ``dbprops`` argument pointing a file that specifies JDBC information. For instance::
-
-       jdbc.driver = oracle.jdbc.driver.OracleDriver
-       jdbc.url = jdbc:oracle:thin:@database-server.example.com:1234:SCHEMA
-       jdbc.user = USER
-       jdbc.pwd = PASSWORD
+   - Databases in local files (HyperSQL) using ``driver='hsqldb'`` and the *path* argument.
+   - Remote, Oracle databases using ``driver='oracle'`` and the *url*, *username* and *password* arguments.
 
    It has the following methods that are not part of the overall :class:`Backend` API:
 
@@ -38,11 +33,30 @@ Provided backends
       read_gdx
       write_gdx
 
+   JDBCBackend caches values in memory to improve performance when repeatedly reading data from the same items with :meth:`.par`, :meth:`.equ`, or :meth:`.var`.
+
+   .. tip:: If repeatedly accessing the same item with different *filters*:
+
+      1. First, access the item by calling e.g. :meth:`.par` *without* any filters.
+         This causes the full contents of the item to be loaded into cache.
+      2. Then, access by making multiple :meth:`.par` calls with different *filters* arguments.
+         The cache value is filtered and returned without further access to the database.
+
+   .. tip:: Modifying an item by adding or deleting elements invalidates its cache.
+
 .. automethod:: ixmp.backend.jdbc.start_jvm
 
 Backend API
 -----------
 
+.. currentmodule:: ixmp.backend.base
+
+.. autosummary::
+
+   ixmp.backend.FIELDS
+   ixmp.backend.base.Backend
+   ixmp.backend.base.CachingBackend
+
 - :class:`ixmp.Platform` implements a *user-friendly* API for scientific programming.
   This means its methods can take many types of arguments, check, and transform them—in a way that provides modeler-users with easy, intuitive workflows.
 - In contrast, :class:`Backend` has a *very simple* API that accepts arguments and returns values in basic Python data types and structures.
@@ -57,8 +71,6 @@ Backend API
 
 .. autodata:: ixmp.backend.FIELDS
 
-.. currentmodule:: ixmp.backend.base
-
 .. autoclass:: ixmp.backend.base.Backend
    :members:
 

From c49c41630a60f52e4f983746a69e40598023b8a9 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 22 Nov 2019 11:30:13 +0100
Subject: [PATCH 13/13] Update RELEASE_NOTES

---
 RELEASE_NOTES.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index aba2cf096..f8e8633bf 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -20,7 +20,8 @@ Configuration for ixmp and its storage backends has been streamlined.
 
 - [#189](https://github.com/iiasa/ixmp/pull/189): Identify and load Scenarios using URLs.
 - [#182](https://github.com/iiasa/ixmp/pull/182),
-  [#200](https://github.com/iiasa/ixmp/pull/200): Add new Backend, Model APIs and JDBCBackend, GAMSModel classes.
+  [#200](https://github.com/iiasa/ixmp/pull/200),
+  [#213](https://github.com/iiasa/ixmp/pull/213): Add new Backend, Model APIs and CachingBackend, JDBCBackend, GAMSModel classes.
 - [#188](https://github.com/iiasa/ixmp/pull/188),
   [#195](https://github.com/iiasa/ixmp/pull/195): Enhance reporting.
 - [#177](https://github.com/iiasa/ixmp/pull/177): add ability to pass `gams_args` through `Scenario.solve()`