Skip to content

Commit

Permalink
Merge pull request #140 from bashtage/fix-coverage
Browse files Browse the repository at this point in the history
ENH: Allow env to be set for SystemFormulas
  • Loading branch information
bashtage authored Dec 11, 2017
2 parents ab001a0 + 47a1214 commit 9247a1f
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 2 deletions.
62 changes: 61 additions & 1 deletion linearmodels/system/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,22 @@ def formula(self):
"""Cleaned version of formula"""
return self._clean_formula

@property
def eval_env(self):
"""Set or get the eval env depth"""
return self._eval_env

@eval_env.setter
def eval_env(self, value):
self._eval_env = value
# Update parsers for new level
parsers = self._parsers
new_parsers = OrderedDict()
for key in parsers:
parser = parsers[key]
new_parsers[key] = IVFormulaParser(parser._formula, parser._data, self._eval_env)
self._parsers = new_parsers

@property
def equation_labels(self):
return list(self._parsers.keys())
Expand Down Expand Up @@ -532,6 +548,47 @@ def __str__(self):
return out

def predict(self, params, *, equations=None, data=None, eval_env=8):
"""
Predict values for additional data
Parameters
----------
params : array-like
Model parameters (nvar by 1)
equations : dict
Dictionary-like structure containing exogenous and endogenous
variables. Each key is an equations label and must
match the labels used to fir the model. Each value must be either a tuple
of the form (exog, endog) or a dictionary with keys 'exog' and 'endog'.
If predictions are not required for one of more of the model equations,
these keys can be omitted.
data : DataFrame
Values to use when making predictions from a model constructed
from a formula
eval_env : int
Depth of use when evaluating formulas using Patsy.
Returns
-------
predictions : DataFrame
Fitted values from supplied data and parameters
Notes
-----
If `data` is not none, then `equations` must be none.
Predictions from models constructed using formulas can
be computed using either `equations`, which will treat these are
arrays of values corresponding to the formula-process data, or using
`data` which will be processed using the formula used to construct the
values corresponding to the original model specification.
When using `exog` and `endog`, the regressor array for a particular
equation is assembled as
`[equations[eqn]['exog'], equations[eqn]['endog']]` where `eqn` is
an equation label. These must correspond to the columns in the
estimated model.
"""

if data is not None:
parser = SystemFormulaParser(self.formula, data=data, eval_env=eval_env)
equations = parser.data
Expand All @@ -543,19 +600,22 @@ def predict(self, params, *, equations=None, data=None, eval_env=8):
for i, label in enumerate(self._eq_labels):
kx = self._x[i].shape[1]
if label in equations:
b = params[loc:loc + kx]
eqn = equations[label] # type: dict
exog = eqn.get('exog', None)
endog = eqn.get('endog', None)
if exog is None and endog is None:
loc += kx
continue

if exog is not None:
exog_endog = IVData(exog).pandas
if endog is not None:
endog = IVData(endog)
exog_endog = concat([exog_endog, endog.pandas], 1)
else:
exog_endog = IVData(endog).pandas
b = params[loc:loc + kx]

fitted = exog_endog.values @ b
fitted = DataFrame(fitted, index=exog_endog.index, columns=[label])
out[label] = fitted
Expand Down
36 changes: 35 additions & 1 deletion linearmodels/tests/panel/test_formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import pandas as pd
import pytest

from linearmodels.compat.pandas import assert_frame_equal
from linearmodels.formula import (between_ols, first_difference_ols, panel_ols,
pooled_ols, random_effects, fama_macbeth)
from linearmodels.panel.model import (BetweenOLS, FirstDifferenceOLS, PanelOLS,
PooledOLS, RandomEffects, FamaMacBeth)
PooledOLS, RandomEffects, FamaMacBeth,
PanelFormulaParser)
from linearmodels.tests.panel._utility import generate_data, datatypes

pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
Expand Down Expand Up @@ -40,6 +42,11 @@ def models(request):
return request.param


@pytest.fixture(params=[True, False])
def effects(request):
return request.param


def sigmoid(v):
return np.exp(v) / (1 + np.exp(v))

Expand Down Expand Up @@ -216,3 +223,30 @@ def test_formulas_predict_error(data, models, formula):
res = model(data.y, x[vars]).fit()
with pytest.raises(ValueError):
res.predict(data=joined)


def test_parser(data, formula, effects):
if not isinstance(data.y, pd.DataFrame):
return
if effects:
formula += ' + EntityEffects + TimeEffects'
joined = data.x
joined['y'] = data.y
parser = PanelFormulaParser(formula, joined)
dep, exog = parser.data
assert_frame_equal(parser.dependent, dep)
assert_frame_equal(parser.exog, exog)
parser.eval_env = 3
assert parser.eval_env == 3
parser.eval_env = 2
assert parser.eval_env == 2
assert parser.entity_effect == ('EntityEffects' in formula)
assert parser.time_effect == ('TimeEffects' in formula)

formula += ' + FixedEffects '
if effects:
with pytest.raises(ValueError):
PanelFormulaParser(formula, joined)
else:
parser = PanelFormulaParser(formula, joined)
assert parser.entity_effect
50 changes: 50 additions & 0 deletions linearmodels/tests/system/test_formulas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import OrderedDict
from itertools import product

import numpy as np
Expand All @@ -7,6 +8,7 @@
from linearmodels import SUR, IVSystemGMM, IV3SLS
from linearmodels.compat.pandas import assert_series_equal, assert_frame_equal
from linearmodels.formula import sur, iv_system_gmm, iv_3sls
from linearmodels.system.model import SystemFormulaParser
from linearmodels.tests.system._utility import generate_3sls_data_v2
from linearmodels.utility import AttrDict

Expand Down Expand Up @@ -100,6 +102,20 @@ def test_predict_partial(config):
pred2 = res.predict(data=joined, dataframe=True)
assert_frame_equal(pred2[pred.columns], pred)

eqns = AttrDict()
for key in list(mod._equations.keys())[1:]:
eqns[key] = mod._equations[key]
final = list(mod._equations.keys())[0]
eqns[final] = {'exog': None, 'endog': None}
pred3 = res.predict(equations=eqns, dataframe=True)
assert_frame_equal(pred2[pred3.columns], pred3)

eqns = AttrDict()
for key in mod._equations:
eqns[key] = {k: v for k, v in mod._equations[key].items() if v.shape[1] > 0}
pred4 = res.predict(equations=eqns, dataframe=True)
assert_frame_equal(pred2, pred4)


def test_invalid_predict(config):
fmla, model, interface = config
Expand All @@ -110,3 +126,37 @@ def test_invalid_predict(config):
res = mod.fit()
with pytest.raises(ValueError):
res.predict(data=joined, equations=mod._equations)


def test_parser(config):
fmla, model, interface = config
parser = SystemFormulaParser(fmla, joined, eval_env=5)
orig_data = parser.data
assert isinstance(orig_data, OrderedDict)
assert parser.eval_env == 5

parser.eval_env = 4
assert parser.eval_env == 4
exog = parser.exog
dep = parser.dependent
endog = parser.endog
instr = parser.instruments
for key in orig_data:
eq = orig_data[key]
assert_frame_equal(exog[key], eq['exog'])
assert_frame_equal(dep[key], eq['dependent'])
assert_frame_equal(endog[key], eq['endog'])
assert_frame_equal(instr[key], eq['instruments'])

labels = parser.equation_labels
for label in labels:
assert label in orig_data
new_parser = SystemFormulaParser(parser.formula, joined, eval_env=5)

new_data = new_parser.data
for key in orig_data:
eq1 = orig_data[key]
eq2 = new_data[key]
for key in eq1:
if eq1[key] is not None:
assert_frame_equal(eq1[key], eq2[key])

0 comments on commit 9247a1f

Please sign in to comment.