Skip to content

Commit

Permalink
Convert year dimensions to integers (#218)
Browse files Browse the repository at this point in the history
- bring back special handling for year dim
- extend to any dim containing year reference
- simplify df creation by using again from_dict

Note: it turns out from_dict has same/better performance in comparison with alternative aproach passing index to empty df upon creation.
It also allows to have int dtype (which empty df doesn't support because of NaNs)
  • Loading branch information
zikolach committed Nov 22, 2019
1 parent f68bbeb commit 2dfcbd2
Showing 1 changed file with 13 additions and 26 deletions.
39 changes: 13 additions & 26 deletions ixmp/backend/jdbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from jpype import JClass
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype

from ixmp import config
from ixmp.core import Scenario
Expand Down Expand Up @@ -529,36 +528,24 @@ def item_get_elements(self, s, type, name, filters=None):
columns = list(item.getIdxNames())
idx_sets = list(item.getIdxSets())

# Prepare dtypes for index columns
dtypes = {}
for idx_name, idx_set in zip(columns, idx_sets):
dtypes[idx_name] = CategoricalDtype(
self.item_get_elements(s, 'set', idx_set))
data = {}
# Prepare arrays with column values column
# NB [:] causes JPype to use a faster code path
for i, (idx_name, idx_set) in enumerate(zip(columns, idx_sets)):
dtype = 'int16' if 'year' in idx_name else 'category'
data[idx_name] = pd.Series(item.getCol(i, jList)[:], dtype=dtype)

# Prepare dtypes for additional columns
# Add type-specific columns
if type == 'par':
columns.extend(['value', 'unit'])
dtypes['value'] = float
dtypes['unit'] = CategoricalDtype(self.jobj.getUnitList())
data['value'] = pd.Series(item.getValues(jList)[:], dtype='float')
data['unit'] = pd.Series(item.getUnits(jList)[:], dtype='category')
elif type in ('equ', 'var'):
columns.extend(['lvl', 'mrg'])
dtypes.update({'lvl': float, 'mrg': float})
data['lvl'] = pd.Series(item.getLevels(jList)[:], dtype='float')
data['mrg'] = pd.Series(item.getMarginals(jList)[:], dtype='float')

# Prepare empty DataFrame
result = pd.DataFrame(index=pd.RangeIndex(len(jList)),
columns=columns) \
.astype(dtypes)
# Construct DataFrame
result = pd.DataFrame.from_dict(data, orient='columns')

# Copy vectors from Java into DataFrame columns
# NB [:] causes JPype to use a faster code path
for i in range(len(idx_sets)):
result.iloc[:, i] = item.getCol(i, jList)[:]
if type == 'par':
result.loc[:, 'value'] = item.getValues(jList)[:]
result.loc[:, 'unit'] = item.getUnits(jList)[:]
elif type in ('equ', 'var'):
result.loc[:, 'lvl'] = item.getLevels(jList)[:]
result.loc[:, 'mrg'] = item.getMarginals(jList)[:]
elif type == 'set':
# Index sets
result = pd.Series(item.getCol(0, jList))
Expand Down

0 comments on commit 2dfcbd2

Please sign in to comment.