From 09fee4235e821925f6ce1606a6e34dd8776039ae Mon Sep 17 00:00:00 2001 From: Nikolay Kushin Date: Fri, 22 Nov 2019 16:52:20 +0100 Subject: [PATCH] Convert year dimensions to integers (#218) - bring back special handling for year dim - extend to any dim containing year reference - simplify df creation by using again from_dict Note: it turns out from_dict has same/better performance in comparison with alternative aproach passing index to empty df upon creation. It also allows to have int dtype (which empty df doesn't support because of NaNs) --- ixmp/backend/jdbc.py | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/ixmp/backend/jdbc.py b/ixmp/backend/jdbc.py index 88d1897fe..7060de74c 100644 --- a/ixmp/backend/jdbc.py +++ b/ixmp/backend/jdbc.py @@ -529,36 +529,24 @@ def item_get_elements(self, s, type, name, filters=None): columns = list(item.getIdxNames()) idx_sets = list(item.getIdxSets()) - # Prepare dtypes for index columns - dtypes = {} - for idx_name, idx_set in zip(columns, idx_sets): - dtypes[idx_name] = CategoricalDtype( - self.item_get_elements(s, 'set', idx_set)) + data = {} + # Prepare arrays with column values column + # NB [:] causes JPype to use a faster code path + for i, (idx_name, idx_set) in enumerate(zip(columns, idx_sets)): + dtype = 'int16' if 'year' in idx_name else 'category' + data[idx_name] = pd.Series(item.getCol(i, jList)[:], dtype=dtype) - # Prepare dtypes for additional columns + # Add type-specific columns if type == 'par': - columns.extend(['value', 'unit']) - dtypes['value'] = float - dtypes['unit'] = CategoricalDtype(self.jobj.getUnitList()) + data['value'] = pd.Series(item.getValues(jList)[:], dtype='float') + data['unit'] = pd.Series(item.getUnits(jList)[:], dtype='category') elif type in ('equ', 'var'): - columns.extend(['lvl', 'mrg']) - dtypes.update({'lvl': float, 'mrg': float}) + data['lvl'] = pd.Series(item.getLevels(jList)[:], dtype='float') + data['mrg'] = pd.Series(item.getMarginals(jList)[:], dtype='float') - # Prepare empty DataFrame - result = pd.DataFrame(index=pd.RangeIndex(len(jList)), - columns=columns) \ - .astype(dtypes) + # Construct DataFrame + result = pd.DataFrame.from_dict(data, orient='columns') - # Copy vectors from Java into DataFrame columns - # NB [:] causes JPype to use a faster code path - for i in range(len(idx_sets)): - result.iloc[:, i] = item.getCol(i, jList)[:] - if type == 'par': - result.loc[:, 'value'] = item.getValues(jList)[:] - result.loc[:, 'unit'] = item.getUnits(jList)[:] - elif type in ('equ', 'var'): - result.loc[:, 'lvl'] = item.getLevels(jList)[:] - result.loc[:, 'mrg'] = item.getMarginals(jList)[:] elif type == 'set': # Index sets result = pd.Series(item.getCol(0, jList))