Skip to content

Commit

Permalink
ENH: Allow env to be set for SystemFormulas
Browse files Browse the repository at this point in the history
Allow env to be changed for system formulas
Add doc string for predict
Test rare predict paths
Test rare formula parser paths
  • Loading branch information
bashtage committed Dec 11, 2017
1 parent ab001a0 commit 47a1214
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 2 deletions.
62 changes: 61 additions & 1 deletion linearmodels/system/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,22 @@ def formula(self):
"""Cleaned version of formula"""
return self._clean_formula

@property
def eval_env(self):
"""Set or get the eval env depth"""
return self._eval_env

@eval_env.setter
def eval_env(self, value):
self._eval_env = value
# Update parsers for new level
parsers = self._parsers
new_parsers = OrderedDict()
for key in parsers:
parser = parsers[key]
new_parsers[key] = IVFormulaParser(parser._formula, parser._data, self._eval_env)
self._parsers = new_parsers

@property
def equation_labels(self):
return list(self._parsers.keys())
Expand Down Expand Up @@ -532,6 +548,47 @@ def __str__(self):
return out

def predict(self, params, *, equations=None, data=None, eval_env=8):
"""
Predict values for additional data
Parameters
----------
params : array-like
Model parameters (nvar by 1)
equations : dict
Dictionary-like structure containing exogenous and endogenous
variables. Each key is an equations label and must
match the labels used to fir the model. Each value must be either a tuple
of the form (exog, endog) or a dictionary with keys 'exog' and 'endog'.
If predictions are not required for one of more of the model equations,
these keys can be omitted.
data : DataFrame
Values to use when making predictions from a model constructed
from a formula
eval_env : int
Depth of use when evaluating formulas using Patsy.
Returns
-------
predictions : DataFrame
Fitted values from supplied data and parameters
Notes
-----
If `data` is not none, then `equations` must be none.
Predictions from models constructed using formulas can
be computed using either `equations`, which will treat these are
arrays of values corresponding to the formula-process data, or using
`data` which will be processed using the formula used to construct the
values corresponding to the original model specification.
When using `exog` and `endog`, the regressor array for a particular
equation is assembled as
`[equations[eqn]['exog'], equations[eqn]['endog']]` where `eqn` is
an equation label. These must correspond to the columns in the
estimated model.
"""

if data is not None:
parser = SystemFormulaParser(self.formula, data=data, eval_env=eval_env)
equations = parser.data
Expand All @@ -543,19 +600,22 @@ def predict(self, params, *, equations=None, data=None, eval_env=8):
for i, label in enumerate(self._eq_labels):
kx = self._x[i].shape[1]
if label in equations:
b = params[loc:loc + kx]
eqn = equations[label] # type: dict
exog = eqn.get('exog', None)
endog = eqn.get('endog', None)
if exog is None and endog is None:
loc += kx
continue

if exog is not None:
exog_endog = IVData(exog).pandas
if endog is not None:
endog = IVData(endog)
exog_endog = concat([exog_endog, endog.pandas], 1)
else:
exog_endog = IVData(endog).pandas
b = params[loc:loc + kx]

fitted = exog_endog.values @ b
fitted = DataFrame(fitted, index=exog_endog.index, columns=[label])
out[label] = fitted
Expand Down
36 changes: 35 additions & 1 deletion linearmodels/tests/panel/test_formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import pandas as pd
import pytest

from linearmodels.compat.pandas import assert_frame_equal
from linearmodels.formula import (between_ols, first_difference_ols, panel_ols,
pooled_ols, random_effects, fama_macbeth)
from linearmodels.panel.model import (BetweenOLS, FirstDifferenceOLS, PanelOLS,
PooledOLS, RandomEffects, FamaMacBeth)
PooledOLS, RandomEffects, FamaMacBeth,
PanelFormulaParser)
from linearmodels.tests.panel._utility import generate_data, datatypes

pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
Expand Down Expand Up @@ -40,6 +42,11 @@ def models(request):
return request.param


@pytest.fixture(params=[True, False])
def effects(request):
return request.param


def sigmoid(v):
return np.exp(v) / (1 + np.exp(v))

Expand Down Expand Up @@ -216,3 +223,30 @@ def test_formulas_predict_error(data, models, formula):
res = model(data.y, x[vars]).fit()
with pytest.raises(ValueError):
res.predict(data=joined)


def test_parser(data, formula, effects):
if not isinstance(data.y, pd.DataFrame):
return
if effects:
formula += ' + EntityEffects + TimeEffects'
joined = data.x
joined['y'] = data.y
parser = PanelFormulaParser(formula, joined)
dep, exog = parser.data
assert_frame_equal(parser.dependent, dep)
assert_frame_equal(parser.exog, exog)
parser.eval_env = 3
assert parser.eval_env == 3
parser.eval_env = 2
assert parser.eval_env == 2
assert parser.entity_effect == ('EntityEffects' in formula)
assert parser.time_effect == ('TimeEffects' in formula)

formula += ' + FixedEffects '
if effects:
with pytest.raises(ValueError):
PanelFormulaParser(formula, joined)
else:
parser = PanelFormulaParser(formula, joined)
assert parser.entity_effect
50 changes: 50 additions & 0 deletions linearmodels/tests/system/test_formulas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import OrderedDict
from itertools import product

import numpy as np
Expand All @@ -7,6 +8,7 @@
from linearmodels import SUR, IVSystemGMM, IV3SLS
from linearmodels.compat.pandas import assert_series_equal, assert_frame_equal
from linearmodels.formula import sur, iv_system_gmm, iv_3sls
from linearmodels.system.model import SystemFormulaParser
from linearmodels.tests.system._utility import generate_3sls_data_v2
from linearmodels.utility import AttrDict

Expand Down Expand Up @@ -100,6 +102,20 @@ def test_predict_partial(config):
pred2 = res.predict(data=joined, dataframe=True)
assert_frame_equal(pred2[pred.columns], pred)

eqns = AttrDict()
for key in list(mod._equations.keys())[1:]:
eqns[key] = mod._equations[key]
final = list(mod._equations.keys())[0]
eqns[final] = {'exog': None, 'endog': None}
pred3 = res.predict(equations=eqns, dataframe=True)
assert_frame_equal(pred2[pred3.columns], pred3)

eqns = AttrDict()
for key in mod._equations:
eqns[key] = {k: v for k, v in mod._equations[key].items() if v.shape[1] > 0}
pred4 = res.predict(equations=eqns, dataframe=True)
assert_frame_equal(pred2, pred4)


def test_invalid_predict(config):
fmla, model, interface = config
Expand All @@ -110,3 +126,37 @@ def test_invalid_predict(config):
res = mod.fit()
with pytest.raises(ValueError):
res.predict(data=joined, equations=mod._equations)


def test_parser(config):
fmla, model, interface = config
parser = SystemFormulaParser(fmla, joined, eval_env=5)
orig_data = parser.data
assert isinstance(orig_data, OrderedDict)
assert parser.eval_env == 5

parser.eval_env = 4
assert parser.eval_env == 4
exog = parser.exog
dep = parser.dependent
endog = parser.endog
instr = parser.instruments
for key in orig_data:
eq = orig_data[key]
assert_frame_equal(exog[key], eq['exog'])
assert_frame_equal(dep[key], eq['dependent'])
assert_frame_equal(endog[key], eq['endog'])
assert_frame_equal(instr[key], eq['instruments'])

labels = parser.equation_labels
for label in labels:
assert label in orig_data
new_parser = SystemFormulaParser(parser.formula, joined, eval_env=5)

new_data = new_parser.data
for key in orig_data:
eq1 = orig_data[key]
eq2 = new_data[key]
for key in eq1:
if eq1[key] is not None:
assert_frame_equal(eq1[key], eq2[key])

0 comments on commit 47a1214

Please sign in to comment.