From dd6db1b03a349cf1dda24ed8d70dca7abdc5f738 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 21 Nov 2019 15:50:09 +0100 Subject: [PATCH 01/13] Remove caching from ixmp.core --- ixmp/core.py | 88 +++++++--------------------------------------------- 1 file changed, 11 insertions(+), 77 deletions(-) diff --git a/ixmp/core.py b/ixmp/core.py index 47ae057e6..1a2cc3bd1 100644 --- a/ixmp/core.py +++ b/ixmp/core.py @@ -11,7 +11,6 @@ from .utils import ( as_str_list, check_year, - filtered, logger, parse_url ) @@ -617,9 +616,9 @@ def __init__(self, mp, model, scenario, version=None, scheme=None, warn('Using `ixmp.Scenario` for MESSAGE-scheme scenarios is ' 'deprecated, please use `message_ix.Scenario`') - # Initialize cache - self._cache = cache - self._pycache = {} + @property + def _cache(self): + return hasattr(self.platform._backend, '_cache') @classmethod def from_url(cls, url, errors='warn'): @@ -678,37 +677,11 @@ def load_scenario_data(self): if not self._cache: raise ValueError('Cache must be enabled to load scenario data') - funcs = { - 'set': (self.set_list, self.set), - 'par': (self.par_list, self.par), - 'var': (self.var_list, self.var), - 'equ': (self.equ_list, self.equ), - } - for ix_type, (list_func, get_func) in funcs.items(): + for ix_type in 'equ', 'par', 'set', 'var': logger().info('Caching {} data'.format(ix_type)) - for item in list_func(): - get_func(item) - - def _element(self, ix_type, name, filters=None, cache=None): - """Return a pd.DataFrame of item elements.""" - cache_key = (ix_type, name) - - # if dataframe in python cache, retrieve from there - if cache_key in self._pycache: - return filtered(self._pycache[cache_key], filters) - - # if no cache, retrieve from Backend with filters - if filters is not None and not self._cache: - return self._backend('item_get_elements', ix_type, name, filters) - - # otherwise, retrieve from Java and keep in python cache - df = self._backend('item_get_elements', ix_type, name, None) - - # save if using memcache - if self._cache: - self._pycache[cache_key] = df - - return filtered(df, filters) + get_func = getattr(self, ix_type) + for name in getattr(self, '{}_list'.format(ix_type))(): + get_func(name) def idx_sets(self, name): """Return the list of index sets for an item (set, par, var, equ) @@ -799,7 +772,7 @@ def set(self, name, filters=None, **kwargs): ------- pandas.DataFrame """ - return self._element('set', name, filters, **kwargs) + return self._backend('item_get_elements', 'set', name, filters) def add_set(self, name, key, comment=None): """Add elements to an existing set. @@ -825,7 +798,6 @@ def add_set(self, name, key, comment=None): """ # TODO expand docstring (here or in doc/source/api.rst) with examples, # per test_core.test_add_set. - self.clear_cache(name=name, ix_type='set') # Get index names for set *name*, may raise KeyError idx_names = self.idx_names(name) @@ -915,8 +887,6 @@ def remove_set(self, name, key=None): key : dataframe or key list or concatenated string elements to be removed """ - self.clear_cache(name=name, ix_type='set') - if key is None: self._backend('delete_item', 'set', name) else: @@ -955,7 +925,7 @@ def par(self, name, filters=None, **kwargs): filters : dict index names mapped list of index set elements """ - return self._element('par', name, filters, **kwargs) + return self._backend('item_get_elements', 'par', name, filters) def add_par(self, name, key_or_data=None, value=None, unit=None, comment=None, key=None, val=None): @@ -1060,9 +1030,6 @@ def add_par(self, name, key_or_data=None, value=None, unit=None, # Store self._backend('item_set_elements', 'par', name, elements) - # Clear cache - self.clear_cache(name=name, ix_type='par') - def init_scalar(self, name, val, unit, comment=None): """Initialize a new scalar. @@ -1108,7 +1075,6 @@ def change_scalar(self, name, val, unit, comment=None): comment : str, optional Description of the change. """ - self.clear_cache(name=name, ix_type='par') self._backend('item_set_elements', 'par', name, [(None, float(val), unit, comment)]) @@ -1122,8 +1088,6 @@ def remove_par(self, name, key=None): key : dataframe or key list or concatenated string, optional elements to be removed """ - self.clear_cache(name=name, ix_type='par') - if key is None: self._backend('delete_item', 'par', name) else: @@ -1162,7 +1126,7 @@ def var(self, name, filters=None, **kwargs): filters : dict index names mapped list of index set elements """ - return self._element('var', name, filters, **kwargs) + return self._backend('item_get_elements', 'var', name, filters) def equ_list(self): """List all defined equations.""" @@ -1196,7 +1160,7 @@ def equ(self, name, filters=None, **kwargs): filters : dict index names mapped list of index set elements """ - return self._element('equ', name, filters, **kwargs) + return self._backend('item_get_elements', 'equ', name, filters) def clone(self, model=None, scenario=None, annotation=None, keep_solution=True, shift_first_model_year=None, platform=None, @@ -1295,7 +1259,6 @@ def remove_solution(self, first_model_year=None): If Scenario has no solution or if `first_model_year` is not `int`. """ if self.has_solution(): - self.clear_cache() # reset Python data cache check_year(first_model_year, 'first_model_year') self._backend('clear_solution', first_model_year) else: @@ -1390,35 +1353,6 @@ def callback(scenario, **kwargs): # Callback indicates convergence is reached break - def clear_cache(self, name=None, ix_type=None): - """clear the Python cache of item elements - - Parameters - ---------- - name : str, optional - item name (`None` clears entire Python cache) - ix_type : str, optional - type of item (if provided, cache clearing is faster) - """ - # if no name is given, clean the entire cache - if name is None: - self._pycache = {} - return # exit early - - # remove this element from the cache if it exists - key = None - keys = self._pycache.keys() - if ix_type is not None: - key = (ix_type, name) if (ix_type, name) in keys else None - else: # look for it - hits = [k for k in keys if k[1] == name] # 0 is ix_type, 1 is name - if len(hits) > 1: - raise ValueError('Multiple values named {}'.format(name)) - if len(hits) == 1: - key = hits[0] - if key is not None: - self._pycache.pop(key) - def get_meta(self, name=None): """get scenario metadata From 1f8e9e74ce3a8b62f75239f3ed5105903fa7c674 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 21 Nov 2019 16:09:24 +0100 Subject: [PATCH 02/13] Add backend.base.CachingBackend --- ixmp/backend/base.py | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/ixmp/backend/base.py b/ixmp/backend/base.py index a2af88331..30fd5a0b7 100644 --- a/ixmp/backend/base.py +++ b/ixmp/backend/base.py @@ -1,4 +1,6 @@ from abc import ABC, abstractmethod +from copy import copy +import json from ixmp.core import TimeSeries, Scenario @@ -759,3 +761,69 @@ def cat_set_elements(self, ms: Scenario, name, cat, keys, is_unique): ------- None """ + + +class CachingBackend(Backend): + """Backend with additional features for caching values.""" + _cache = {} + _cache_hit = {} + + def __init__(self): + """Initialize the cache.""" + self._cache = {} + self._cache_hit = {} + + @classmethod + def _cache_key(self, ts, ix_type, name, filters=None): + """Return a hashable cache key. + + Parameters + ---------- + ts : .TimeSeries + ix_type : str + name : str + filters : dict + """ + ts = id(ts) + if filters is None or len(filters) == 0: + return (ts, ix_type, name) + else: + # Convert filters into a hashable object + filters = hash(json.dumps(sorted(filters.items()))) + return (ts, ix_type, name, filters) + + def cache_get(self, ts, ix_type, name, filters): + """Retrieve value from cache.""" + key = self._cache_key(ts, ix_type, name, filters) + + if key in self._cache: + self._cache_hit[key] = self._cache_hit.setdefault(key, 0) + 1 + return copy(self._cache[key]) + else: + raise KeyError(ts, ix_type, name, filters) + + def cache(self, ts, ix_type, name, filters, value): + """Store value in cache.""" + key = self._cache_key(ts, ix_type, name, filters) + + refreshed = key in self._cache + self._cache[key] = value + + return refreshed + + def cache_invalidate(self, ts, ix_type=None, name=None, filters=None): + """Invalidate all cached values for *ix_type* and *name*. + + If *filters* is :obj:`None` (the default), all filtered values are + also invalidated. If all argument are none, all + """ + key = self._cache_key(ts, ix_type, name, filters) + + if filters is None: + i = slice(1) if (ix_type is name is None) else slice(3) + to_remove = filter(lambda k: k[i] == key[i], self._cache.keys()) + else: + to_remove = [key] + + for key in list(to_remove): + self._cache.pop(key) From ffd846275a36e6eedd2e6aab1986eebe5bcd7bb2 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 21 Nov 2019 16:10:03 +0100 Subject: [PATCH 03/13] Base JDBCBackend on CachingBackend --- ixmp/backend/jdbc.py | 56 +++++++++++++++++++++++++++++++++++--------- ixmp/model/gams.py | 3 --- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py index 355cd6cba..7e077bfc1 100644 --- a/ixmp/backend/jdbc.py +++ b/ixmp/backend/jdbc.py @@ -17,9 +17,9 @@ from ixmp import config from ixmp.core import Scenario -from ixmp.utils import islistable +from ixmp.utils import filtered, islistable from . import FIELDS -from .base import Backend +from .base import CachingBackend log = logging.getLogger(__name__) @@ -100,7 +100,7 @@ def _temp_dbprops(driver=None, path=None, url=None, user=None, password=None): return str(file), full_url -class JDBCBackend(Backend): +class JDBCBackend(CachingBackend): """Backend using JPype/JDBC to connect to Oracle and HyperSQLDB instances. Parameters @@ -205,6 +205,9 @@ def __init__(self, jvmargs=None, **kwargs): else: raise RuntimeError('unhandled Java exception:' + info) from e + # Invoke the parent constructor to initialize the cache + super().__init__() + def __del__(self): try: Path(self._properties_file).unlink() @@ -450,7 +453,7 @@ def clone(self, s, platform_dest, model, scenario, annotation, # Instantiate same class as the original object return s.__class__(platform_dest, model, scenario, - version=jclone.getVersion(), cache=s._cache) + version=jclone.getVersion()) def has_solution(self, s): return self.jindex[s].hasSolution() @@ -483,12 +486,30 @@ def init_item(self, s, type, name, idx_sets, idx_names): def delete_item(self, s, type, name): getattr(self.jindex[s], f'remove{type.title()}')() + self.cache_invalidate(s, type, name) def item_index(self, s, name, sets_or_names): jitem = self._get_item(s, 'item', name, load=False) return list(getattr(jitem, f'getIdx{sets_or_names.title()}')()) def item_get_elements(self, s, type, name, filters=None): + try: + # Retrieve the cached value with this exact set of filters + return self.cache_get(s, type, name, filters) + except KeyError: + pass # Cache miss + + try: + # Retrieve a cached, unfiltered value of the same item + unfiltered = self.cache_get(s, type, name, None) + except KeyError: + pass # Cache miss + else: + # Success; filter and return + return filtered(unfiltered, filters) + + # Failed to load item from cache + # Retrieve the item item = self._get_item(s, type, name, load=True) @@ -525,19 +546,24 @@ def item_get_elements(self, s, type, name, filters=None): data['lvl'] = item.getLevels(jList) data['mrg'] = item.getMarginals(jList) - return pd.DataFrame.from_dict(data, orient='columns') \ - .astype(types) + result = pd.DataFrame.from_dict(data, orient='columns') \ + .astype(types) elif type == 'set': # Index sets - return pd.Series(item.getCol(0, jList)) + result = pd.Series(item.getCol(0, jList)) elif type == 'par': # Scalar parameters - return dict(value=item.getScalarValue().floatValue(), - unit=str(item.getScalarUnit())) + result = dict(value=item.getScalarValue().floatValue(), + unit=str(item.getScalarUnit())) elif type in ('equ', 'var'): # Scalar equations and variables - return dict(lvl=item.getScalarLevel().floatValue(), - mrg=item.getScalarMarginal().floatValue()) + result = dict(lvl=item.getScalarLevel().floatValue(), + mrg=item.getScalarMarginal().floatValue()) + + # Store cache + self.cache(s, type, name, filters, result) + + return result def item_set_elements(self, s, type, name, elements): jobj = self._get_item(s, type, name) @@ -566,11 +592,15 @@ def item_set_elements(self, s, type, name, elements): else: # pragma: no cover raise RuntimeError('unhandled Java exception') from e + self.cache_invalidate(s, type, name) + def item_delete_elements(self, s, type, name, keys): jitem = self._get_item(s, type, name, load=False) for key in keys: jitem.removeElement(to_jlist2(key)) + self.cache_invalidate(s, type, name) + def get_meta(self, s): def unwrap(v): """Unwrap metadata numeric value (BigDecimal -> Double)""" @@ -593,6 +623,8 @@ def clear_solution(self, s, from_year=None): else: self.jindex[s].removeSolution() + self.cache_invalidate(s) + # MsgScenario methods def cat_list(self, ms, name): @@ -630,6 +662,8 @@ def read_gdx(self, s, path, check_solution, comment, equ_list, var_list): str(path.parent), path.name, comment, to_jlist2(var_list), to_jlist2(equ_list), check_solution) + self.cache_invalidate(s) + def _get_item(self, s, ix_type, name, load=True): """Return the Java object for item *name* of *ix_type*. diff --git a/ixmp/model/gams.py b/ixmp/model/gams.py index 436433f17..b3f1824df 100644 --- a/ixmp/model/gams.py +++ b/ixmp/model/gams.py @@ -132,9 +132,6 @@ def format(key): # Invoke GAMS check_call(command, shell=os.name == 'nt', cwd=model_file.parent) - # Reset Python data cache - scenario.clear_cache() - # Read model solution scenario._backend('read_gdx', self.out_file, self.check_solution, From e51645b4ec95b137751c35144021fce5a4226fad Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 21 Nov 2019 16:10:22 +0100 Subject: [PATCH 04/13] Adjust reporting to avoid Scenario._element --- ixmp/reporting/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ixmp/reporting/utils.py b/ixmp/reporting/utils.py index dc3b8d36a..dfe7c61f7 100644 --- a/ixmp/reporting/utils.py +++ b/ixmp/reporting/utils.py @@ -224,7 +224,7 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None): filters = filters_to_use # Retrieve quantity data - data = scenario._element(ix_type, name, filters) + data = getattr(scenario, ix_type)(name, filters) # ixmp/GAMS scalar is not returned as pd.DataFrame if isinstance(data, dict): From 8be1669356b89fbd6e21008e8430314f8ac592a0 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 21 Nov 2019 16:10:33 +0100 Subject: [PATCH 05/13] Adjust tests --- tests/test_core.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 154b1b49f..119367514 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -307,25 +307,32 @@ def test_meta(test_mp): def test_load_scenario_data(test_mp): - scen = ixmp.Scenario(test_mp, *can_args, cache=True) + """load_scenario_data() caches all data.""" + scen = ixmp.Scenario(test_mp, *can_args) scen.load_scenario_data() - assert ('par', 'd') in scen._pycache # key exists - df = scen.par('d', filters={'i': ['seattle']}) - obs = df.loc[0, 'unit'] - exp = 'km' - assert obs == exp + + cache_key = scen.platform._backend._cache_key(scen, 'par', 'd') + + # Item exists in cache + assert cache_key in scen.platform._backend._cache + + # Cache has not been used + hits_before = scen.platform._backend._cache_hit.get(cache_key, 0) + assert hits_before == 0 + + # Retrieving the expected value + assert 'km' == scen.par('d', filters={'i': ['seattle']}).loc[0, 'unit'] + + # Cache was used to return the value + hits_after = scen.platform._backend._cache_hit[cache_key] + assert hits_after == hits_before + 1 def test_load_scenario_data_clear_cache(test_mp): # this fails on commit: 4376f54 scen = ixmp.Scenario(test_mp, *can_args, cache=True) scen.load_scenario_data() - scen.clear_cache(name='d') - - -def test_load_scenario_data_raises(test_mp): - scen = ixmp.Scenario(test_mp, *can_args, cache=False) - pytest.raises(ValueError, scen.load_scenario_data) + scen.platform._backend.cache_invalidate(scen, 'par', 'd') def test_log_level(test_mp): From 19d07caca621e9abf536fb3c09c7e5d400c9d033 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 21 Nov 2019 20:48:45 +0100 Subject: [PATCH 06/13] Preallocate DataFrame in JDBCBackend.item_get_elements --- ixmp/backend/jdbc.py | 51 +++++++++++++++++++++++++---------------- ixmp/reporting/utils.py | 20 ++++++++-------- tests/test_reporting.py | 7 +++--- 3 files changed, 46 insertions(+), 32 deletions(-) diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py index 7e077bfc1..e68dfe113 100644 --- a/ixmp/backend/jdbc.py +++ b/ixmp/backend/jdbc.py @@ -14,6 +14,7 @@ from jpype import JClass import numpy as np import pandas as pd +from pandas.api.types import CategoricalDtype from ixmp import config from ixmp.core import Scenario @@ -524,30 +525,40 @@ def item_get_elements(self, s, type, name, filters=None): if item.getDim() > 0: # Mapping set or multi-dimensional equation, parameter, or variable - idx_names = list(item.getIdxNames()) + columns = list(item.getIdxNames()) idx_sets = list(item.getIdxSets()) - data = {} - types = {} + # Prepare dtypes for index columns + dtypes = {} + for idx_name, idx_set in zip(columns, idx_sets): + if idx_set == 'year': + dtypes[idx_name] = int + else: + dtypes[idx_name] = CategoricalDtype( + self.item_get_elements(s, 'set', idx_set)) - # Retrieve index columns - for d, (d_name, d_set) in enumerate(zip(idx_names, idx_sets)): - data[d_name] = item.getCol(d, jList) - if d_set == 'year': - # Record column for later type conversion - types[d_name] = int - - # Retrieve value columns + # Prepare dtypes for additional columns if type == 'par': - data['value'] = item.getValues(jList) - data['unit'] = item.getUnits(jList) - - if type in ('equ', 'var'): - data['lvl'] = item.getLevels(jList) - data['mrg'] = item.getMarginals(jList) - - result = pd.DataFrame.from_dict(data, orient='columns') \ - .astype(types) + columns.extend(['value', 'unit']) + dtypes['value'] = float + dtypes['unit'] = CategoricalDtype(self.jobj.getUnitList()) + elif type in ('equ', 'var'): + columns.extend(['lvl', 'mrg']) + dtypes.update({'lvl': float, 'mrg': float}) + + # Prepare empty DataFrame + result = pd.DataFrame(index=pd.RangeIndex(len(jList)), + columns=columns) \ + .astype(dtypes) + + for i in range(len(idx_sets)): + result.iloc[:, i] = item.getCol(i, jList) + if type == 'par': + result.loc[:, 'value'] = item.getValues(jList) + result.loc[:, 'unit'] = item.getUnits(jList) + elif type in ('equ', 'var'): + result.loc[:, 'lvl'] = item.getLevels(jList) + result.loc[:, 'mrg'] = item.getMarginals(jList) elif type == 'set': # Index sets result = pd.Series(item.getCol(0, jList)) diff --git a/ixmp/reporting/utils.py b/ixmp/reporting/utils.py index dfe7c61f7..244cf31d1 100644 --- a/ixmp/reporting/utils.py +++ b/ixmp/reporting/utils.py @@ -208,9 +208,10 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None): log.debug('Retrieving data for {}'.format(name)) # Only use the relevant filters + idx_names = scenario.idx_names(name) if filters: # Dimensions of the object - dims = dims_for_qty(scenario.idx_names(name)) + dims = dims_for_qty(idx_names) # Mapping from renamed dimensions to Scenario dimension names MAP = get_reversed_rename_dims() @@ -237,6 +238,9 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None): log.warning(f'0 values for {ix_type} {name!r} using filters:' f'\n {filters!r}\n Subsequent computations may fail.') + # Convert categorical dtype to str + data = data.astype({col: str for col in idx_names}) + # List of the dimensions dims = dims_for_qty(data) @@ -258,8 +262,8 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None): # Set index if 1 or more dimensions if len(dims): # First rename, then set index - data.rename(columns=RENAME_DIMS, inplace=True) - data.set_index(dims, inplace=True) + data = data.rename(columns=RENAME_DIMS) \ + .set_index(dims) # Check sparseness # try: @@ -272,20 +276,18 @@ def data_for_quantity(ix_type, name, column, scenario, filters=None): # info = (name, shape, filled, size, need_to_chunk) # log.debug(' '.join(map(str, info))) - # Convert to a Dataset, assign attrbutes and name - # ds = xr.Dataset.from_dataframe(data)[column] - # or to a new "Attribute Series" - ds = as_quantity(data[column]) \ + # Convert to a Quantity, assign attrbutes and name + qty = as_quantity(data[column]) \ .assign_attrs(attrs) \ .rename(name + ('-margin' if column == 'mrg' else '')) try: # Remove length-1 dimensions for scalars - ds = ds.squeeze('index', drop=True) + qty = qty.squeeze('index', drop=True) except KeyError: pass - return ds + return qty def as_attrseries(obj): diff --git a/tests/test_reporting.py b/tests/test_reporting.py index b8222d7da..c1dae8916 100644 --- a/tests/test_reporting.py +++ b/tests/test_reporting.py @@ -537,9 +537,10 @@ def test_report_size(test_mp): # test_mp.add_unit('kg') scen = ixmp.Scenario(test_mp, 'size test', 'base', version='new') - # Dimensions and their lengths - dims = 'abcdef' - sizes = [1, 5, 21, 21, 89, 377] # Fibonacci #s; next 1597, 6765 + # Dimensions and their lengths (Fibonacci numbers) + N_dims = 6 + dims = 'abcdefgh'[:N_dims + 1] + sizes = [1, 5, 21, 21, 89, 377, 1597, 6765][:N_dims + 1] # commented: "377 / 73984365 elements = 0.00051% full" # from functools import reduce From feae17b04eee9975dcca2ed79cb11c37e3e6b780 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 21 Nov 2019 21:56:40 +0100 Subject: [PATCH 07/13] Use JPype slice code path --- ixmp/backend/jdbc.py | 12 +++++++----- tests/test_reporting.py | 1 - 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py index e68dfe113..c071635e7 100644 --- a/ixmp/backend/jdbc.py +++ b/ixmp/backend/jdbc.py @@ -551,14 +551,16 @@ def item_get_elements(self, s, type, name, filters=None): columns=columns) \ .astype(dtypes) + # Copy vectors from Java into DataFrame columns + # NB [:] causes JPype to use a faster code path for i in range(len(idx_sets)): - result.iloc[:, i] = item.getCol(i, jList) + result.iloc[:, i] = item.getCol(i, jList)[:] if type == 'par': - result.loc[:, 'value'] = item.getValues(jList) - result.loc[:, 'unit'] = item.getUnits(jList) + result.loc[:, 'value'] = item.getValues(jList)[:] + result.loc[:, 'unit'] = item.getUnits(jList)[:] elif type in ('equ', 'var'): - result.loc[:, 'lvl'] = item.getLevels(jList) - result.loc[:, 'mrg'] = item.getMarginals(jList) + result.loc[:, 'lvl'] = item.getLevels(jList)[:] + result.loc[:, 'mrg'] = item.getMarginals(jList)[:] elif type == 'set': # Index sets result = pd.Series(item.getCol(0, jList)) diff --git a/tests/test_reporting.py b/tests/test_reporting.py index c1dae8916..b8177573e 100644 --- a/tests/test_reporting.py +++ b/tests/test_reporting.py @@ -1,7 +1,6 @@ """Tests for ixmp.reporting.""" import os -from click.testing import CliRunner import ixmp import numpy as np import pandas as pd From 4b4c872237fefb38b8e2d0e0a78247f8cd46fa27 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 22 Nov 2019 10:02:01 +0100 Subject: [PATCH 08/13] Use as_str_list in filtered() --- ixmp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ixmp/utils.py b/ixmp/utils.py index 73e741e20..c750e218e 100644 --- a/ixmp/utils.py +++ b/ixmp/utils.py @@ -164,7 +164,7 @@ def filtered(df, filters): mask = pd.Series(True, index=df.index) for k, v in filters.items(): - isin = df[k].isin(v) + isin = df[k].isin(as_str_list(v)) mask = mask & isin return df[mask] From 7ea0eb97672e8df6bd8ae8c71643716f2fd0bf9f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 22 Nov 2019 10:30:01 +0100 Subject: [PATCH 09/13] Handle 'year' int dtype for message_ix after data load --- ixmp/backend/jdbc.py | 7 ++----- ixmp/core.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py index c071635e7..893842aff 100644 --- a/ixmp/backend/jdbc.py +++ b/ixmp/backend/jdbc.py @@ -531,11 +531,8 @@ def item_get_elements(self, s, type, name, filters=None): # Prepare dtypes for index columns dtypes = {} for idx_name, idx_set in zip(columns, idx_sets): - if idx_set == 'year': - dtypes[idx_name] = int - else: - dtypes[idx_name] = CategoricalDtype( - self.item_get_elements(s, 'set', idx_set)) + dtypes[idx_name] = CategoricalDtype( + self.item_get_elements(s, 'set', idx_set)) # Prepare dtypes for additional columns if type == 'par': diff --git a/ixmp/core.py b/ixmp/core.py index 1a2cc3bd1..0fa508970 100644 --- a/ixmp/core.py +++ b/ixmp/core.py @@ -925,7 +925,16 @@ def par(self, name, filters=None, **kwargs): filters : dict index names mapped list of index set elements """ - return self._backend('item_get_elements', 'par', name, filters) + result = self._backend('item_get_elements', 'par', name, filters) + + # FIXME message_ix requires 'year' columns to be returned as integers + # This code should be in a message_ix override of this method. + dtypes = {} + for idx_set, col_name in zip(self.idx_sets(), self.idx_names()): + if idx_set == 'year': + dtypes[col_name] = int + + return result.astype(dtypes) def add_par(self, name, key_or_data=None, value=None, unit=None, comment=None, key=None, val=None): From c6ae2f74f8d8e1936203abdd79f1d403bb9477bc Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 22 Nov 2019 10:37:32 +0100 Subject: [PATCH 10/13] Handle scalars in previous --- ixmp/core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ixmp/core.py b/ixmp/core.py index 0fa508970..e68d22d7c 100644 --- a/ixmp/core.py +++ b/ixmp/core.py @@ -930,11 +930,12 @@ def par(self, name, filters=None, **kwargs): # FIXME message_ix requires 'year' columns to be returned as integers # This code should be in a message_ix override of this method. dtypes = {} - for idx_set, col_name in zip(self.idx_sets(), self.idx_names()): + for idx_set, col_name in zip(self.idx_sets(name), + self.idx_names(name)): if idx_set == 'year': dtypes[col_name] = int - return result.astype(dtypes) + return result.astype(dtypes) if len(dtypes) else result def add_par(self, name, key_or_data=None, value=None, unit=None, comment=None, key=None, val=None): From b4408652c3bdbf0b15cb8ffb6fa796eeaef15257 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 22 Nov 2019 11:18:55 +0100 Subject: [PATCH 11/13] Document CachingBackend --- doc/source/api-backend.rst | 13 ++++++++++ doc/source/api-python.rst | 1 - doc/source/reporting.rst | 26 +++++++++---------- ixmp/backend/base.py | 53 +++++++++++++++++++++++++++++++++----- 4 files changed, 71 insertions(+), 22 deletions(-) diff --git a/doc/source/api-backend.rst b/doc/source/api-backend.rst index 43840882f..1b4bdba61 100644 --- a/doc/source/api-backend.rst +++ b/doc/source/api-backend.rst @@ -51,6 +51,10 @@ Backend API - :class:`Platform ` code is not affected by where and how data is stored; it merely handles user arguments and then makes, usually, a single :class:`Backend` call. - :class:`Backend` code does not need to perform argument checking; merely store and retrieve data reliably. +- Additional Backends may inherit from :class:`Backend` or + :class:`CachingBackend`. + + .. autodata:: ixmp.backend.FIELDS .. currentmodule:: ixmp.backend.base @@ -143,3 +147,12 @@ Backend API cat_get_elements cat_list cat_set_elements + + +.. autoclass:: ixmp.backend.base.CachingBackend + :members: + :private-members: + + CachingBackend stores cache values for multiple :class:`.TimeSeries`/:class:`Scenario` objects, and for multiple values of a *filters* argument. + + Subclasses **must** call :meth:`cache`, :meth:`cache_get`, and :meth:`cache_invalidate` as appropriate to manage the cache; CachingBackend does not enforce any such logic. diff --git a/doc/source/api-python.rst b/doc/source/api-python.rst index 1e038a2fe..f6981b6af 100644 --- a/doc/source/api-python.rst +++ b/doc/source/api-python.rst @@ -124,7 +124,6 @@ Scenario add_par add_set change_scalar - clear_cache clone equ equ_list diff --git a/doc/source/reporting.rst b/doc/source/reporting.rst index 494d13760..52068f3e6 100644 --- a/doc/source/reporting.rst +++ b/doc/source/reporting.rst @@ -136,24 +136,22 @@ Others: >>> k1.drop('a', 'c') == k2.drop('a') == 'foo:b' True - Notes - ----- - A Key has the same hash, and compares equal to its ``str()``. ``repr(key)`` - prints the Key in angle brackets ('<>') to signify it is a Key object. + Some notes: - >>> repr(k1) - + - A Key has the same hash, and compares equal to its ``str()``. + ``repr(key)`` prints the Key in angle brackets ('<>') to signify it is a Key object. - Keys are *immutable*: the properties :attr:`name`, :attr:`dims`, and - :attr:`tag` are read-only, and the methods :meth:`append`, :meth:`drop`, and - :meth:`add_tag` return *new* Key objects. + >>> repr(k1) + - Keys may be generated concisely by defining a convenience method: + - Keys are *immutable*: the properties :attr:`name`, :attr:`dims`, and :attr:`tag` are read-only, and the methods :meth:`append`, :meth:`drop`, and :meth:`add_tag` return *new* Key objects. - >>> def foo(dims): - >>> return Key('foo', dims.split()) - >>> foo('a b c') - foo:a-b-c + - Keys may be generated concisely by defining a convenience method: + + >>> def foo(dims): + >>> return Key('foo', dims.split()) + >>> foo('a b c') + foo:a-b-c Computations diff --git a/ixmp/backend/base.py b/ixmp/backend/base.py index 30fd5a0b7..f8747812a 100644 --- a/ixmp/backend/base.py +++ b/ixmp/backend/base.py @@ -764,12 +764,20 @@ def cat_set_elements(self, ms: Scenario, name, cat, keys, is_unique): class CachingBackend(Backend): - """Backend with additional features for caching values.""" + """Backend with additional features for caching data.""" + + #: Cache of values. Keys are given by :meth:`_cache_key`; values depend on + #: the subclass' usage of the cache. _cache = {} + + #: Count of number of times a value was retrieved from cache successfully + #: using :meth:`cache_get`. _cache_hit = {} def __init__(self): - """Initialize the cache.""" + super().__init__() + + # Empty the cache self._cache = {} self._cache_hit = {} @@ -777,12 +785,21 @@ def __init__(self): def _cache_key(self, ts, ix_type, name, filters=None): """Return a hashable cache key. + ixmp *filters* (a :class:`dict` of :class:`list`) are converted to a + unique id that is hashable. + Parameters ---------- ts : .TimeSeries ix_type : str name : str filters : dict + + Returns + ------- + tuple + A hashable key with 4 elements for *ts*, *ix_type*, *name*, and + *filters*. """ ts = id(ts) if filters is None or len(filters) == 0: @@ -793,7 +810,17 @@ def _cache_key(self, ts, ix_type, name, filters=None): return (ts, ix_type, name, filters) def cache_get(self, ts, ix_type, name, filters): - """Retrieve value from cache.""" + """Retrieve value from cache. + + The value in :attr:`_cache` is copied to avoid cached values being + modified by user code. :attr:`_cache_hit` is incremented. + + Raises + ------ + KeyError + If the key for *ts*, *ix_type*, *name* and *filters* is not in the + cache. + """ key = self._cache_key(ts, ix_type, name, filters) if key in self._cache: @@ -803,7 +830,14 @@ def cache_get(self, ts, ix_type, name, filters): raise KeyError(ts, ix_type, name, filters) def cache(self, ts, ix_type, name, filters, value): - """Store value in cache.""" + """Store *value* in cache. + + Returns + ------- + bool + :obj:`True` if the key was already in the cache and its value was + overwritten. + """ key = self._cache_key(ts, ix_type, name, filters) refreshed = key in self._cache @@ -812,10 +846,15 @@ def cache(self, ts, ix_type, name, filters, value): return refreshed def cache_invalidate(self, ts, ix_type=None, name=None, filters=None): - """Invalidate all cached values for *ix_type* and *name*. + """Invalidate cached values. + + With all arguments given, single key/value is removed from the cache. + Otherwise, multiple keys/values are removed: - If *filters* is :obj:`None` (the default), all filtered values are - also invalidated. If all argument are none, all + - *ts* only: all cached values associated with the :class:`.TimeSeries` + or :class:`.Scenario` object. + - *ts*, *ix_type*, and *name*: all cached values associated with the + ixmp item, whether filtered or unfiltered. """ key = self._cache_key(ts, ix_type, name, filters) From ff73799fe9cc659633b897dfb9e5a4eaa4318509 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 22 Nov 2019 11:28:48 +0100 Subject: [PATCH 12/13] Update JDBCBackend docs --- doc/source/api-backend.rst | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/doc/source/api-backend.rst b/doc/source/api-backend.rst index 1b4bdba61..41434f301 100644 --- a/doc/source/api-backend.rst +++ b/doc/source/api-backend.rst @@ -22,13 +22,8 @@ Provided backends JDBCBackend supports: - - ``dbtype='HSQLDB'``: HyperSQL databases in local files. - - Remote databases. This is accomplished by creating a :class:`ixmp.Platform` with the ``dbprops`` argument pointing a file that specifies JDBC information. For instance:: - - jdbc.driver = oracle.jdbc.driver.OracleDriver - jdbc.url = jdbc:oracle:thin:@database-server.example.com:1234:SCHEMA - jdbc.user = USER - jdbc.pwd = PASSWORD + - Databases in local files (HyperSQL) using ``driver='hsqldb'`` and the *path* argument. + - Remote, Oracle databases using ``driver='oracle'`` and the *url*, *username* and *password* arguments. It has the following methods that are not part of the overall :class:`Backend` API: @@ -38,11 +33,30 @@ Provided backends read_gdx write_gdx + JDBCBackend caches values in memory to improve performance when repeatedly reading data from the same items with :meth:`.par`, :meth:`.equ`, or :meth:`.var`. + + .. tip:: If repeatedly accessing the same item with different *filters*: + + 1. First, access the item by calling e.g. :meth:`.par` *without* any filters. + This causes the full contents of the item to be loaded into cache. + 2. Then, access by making multiple :meth:`.par` calls with different *filters* arguments. + The cache value is filtered and returned without further access to the database. + + .. tip:: Modifying an item by adding or deleting elements invalidates its cache. + .. automethod:: ixmp.backend.jdbc.start_jvm Backend API ----------- +.. currentmodule:: ixmp.backend.base + +.. autosummary:: + + ixmp.backend.FIELDS + ixmp.backend.base.Backend + ixmp.backend.base.CachingBackend + - :class:`ixmp.Platform` implements a *user-friendly* API for scientific programming. This means its methods can take many types of arguments, check, and transform them—in a way that provides modeler-users with easy, intuitive workflows. - In contrast, :class:`Backend` has a *very simple* API that accepts arguments and returns values in basic Python data types and structures. @@ -57,8 +71,6 @@ Backend API .. autodata:: ixmp.backend.FIELDS -.. currentmodule:: ixmp.backend.base - .. autoclass:: ixmp.backend.base.Backend :members: From c49c41630a60f52e4f983746a69e40598023b8a9 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Fri, 22 Nov 2019 11:30:13 +0100 Subject: [PATCH 13/13] Update RELEASE_NOTES --- RELEASE_NOTES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index aba2cf096..f8e8633bf 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -20,7 +20,8 @@ Configuration for ixmp and its storage backends has been streamlined. - [#189](https://github.com/iiasa/ixmp/pull/189): Identify and load Scenarios using URLs. - [#182](https://github.com/iiasa/ixmp/pull/182), - [#200](https://github.com/iiasa/ixmp/pull/200): Add new Backend, Model APIs and JDBCBackend, GAMSModel classes. + [#200](https://github.com/iiasa/ixmp/pull/200), + [#213](https://github.com/iiasa/ixmp/pull/213): Add new Backend, Model APIs and CachingBackend, JDBCBackend, GAMSModel classes. - [#188](https://github.com/iiasa/ixmp/pull/188), [#195](https://github.com/iiasa/ixmp/pull/195): Enhance reporting. - [#177](https://github.com/iiasa/ixmp/pull/177): add ability to pass `gams_args` through `Scenario.solve()`